Ready-made solutions to test

Welcome to the section with ready-to-use notebooks. Use the provided sample codes to quickly send GPU inferences. We have prepared solutions for the Automatic1111 and vLLM environments.


Below is a sample Python code that you can use in a Jupyter notebook. It will trigger 10 inferences to your endpoint and save 10 generated graphics. Remember to fill in the following fields:


Use a ready-made notebook in Colab:

Google Colab

You can also copy the code below and paste it into your Jupyter Notebook.

import requests, io, time, base64
from PIL import Image

endpoint = 'enter your endpoint address'
access_token = 'enter your access token'

headers = {
    'Authorization': f'Bearer {access_token}',
    'Content-Type': 'application/json'

data = {
    "prompt": "portrait of a young women, blue eyes, cinematic",
    "steps": 15,
    "width": 512,
    "height": 512

for i in range(1, 100):
  start_time = time.time()
  response ='{endpoint}/sdapi/v1/txt2img', headers=headers, json=data)
      r = response.json()
  except Exception:
      print(f"Status code: {response.status_code}, Data: {response.content}")
  end_time = time.time()

  inference_time = (end_time - start_time)
  print(f'Inference time #{i}:', inference_time, "seconds")

  image =['images'][0])))'modelserve_output-{i}.png')


Use a ready-made notebook in Colab:

Google Colab

You can also copy the code below and paste it into your Jupyter Notebook.

import requests, io, time, base64
from PIL import Image

endpoint = 'enter your endpoint address' # Enter your endpoint address (check more ->
access_token = 'enter your access token' # Enter your Access token (check more ->

headers = {
    'Authorization': f'Bearer {access_token}',
    'Content-Type': 'application/json'

# Function to encode an image URL to base64
def encode_image_url_to_base64(image_url):
    response = requests.get(image_url)
    if response.status_code == 200:
        encoded_string = base64.b64encode(response.content).decode('utf-8')
        return encoded_string
        raise Exception(f"Failed to fetch image. Status code: {response.status_code}")

image_url = ""
encoded_image = encode_image_url_to_base64(image_url)

data = {
    "prompt": "girl with red eyes", # Check more ->
    "init_images": [encoded_image],
    "steps": 30,  # Optimal steps to maintain quality
    "width": 512,  # Match the original image dimensions
    "height": 512,  # Match the original image dimensions
    "denoising_strength": 0.2,  # Low denoising strength to preserve original details
    "cfg_scale": 7  # Classifier-Free Guidance Scale

start_time = time.time()
response ='{endpoint}/sdapi/v1/img2img', headers=headers, json=data)

    r = response.json()

    if 'images' in r:
        base64_image = r['images'][0]
        image_data = base64.b64decode(base64_image)
        image ='modelserve-img2img_output.png')
        print("Image saved to modelserve-img2img_output.png")
        print("Error: 'images' key not found in the response.")
        print(f"Data: {response.content}")
except Exception as e:
    print(f"Exception: {str(e)}")
    print(f"Status code: {response.status_code}, Data: {response.content}")

end_time = time.time()
inference_time = (end_time - start_time)
print(f'Inference time #{i}:', inference_time, "seconds")

Check more: Automatic1111


Below is a sample Python code that you can use in a Jupyter notebook. Remember to fill in the following fields:

Chat Completions

Use a ready-made notebook in Colab:

Google Colab

You can also copy the code below and paste it into your Jupyter Notebook.

import requests

# Endpoint address and access token
endpoint = 'enter your endpoint address'
access_token = 'enter your access token'

# Headers for the requests
headers = {
    'Authorization': f'Bearer {access_token}',
    'Content-Type': 'application/json'

# Fetching the model ID
get_model = requests.get(url=f'{endpoint}/v1/models', headers=headers)
model = get_model.json()['data'][0]['id']

# Data payload for the POST request
data = {
    "model": model,
    "messages": [
            "role": "system",
            "content": "You are a helpful assistant."
            "role": "user",
            "content": "Who won the World Cup in 2018?"

# Sending a POST request to /v1/chat/completions
response ='{endpoint}/v1/chat/completions', headers=headers, json=data)

# Handling the response
if response.status_code == 200:
    print("Response:", response.json()['choices'][0]['message']['content'])
    print("Request failed with status code:", response.status_code)

Check more: vLLM

Audio (speech2text)

Below is a sample Python code that you can use in a Jupyter notebook. It will transcribe the audio file into English text. Remember to fill in the following fields:

Use a ready-made notebook in Colab:

Google Colab

You can also copy the code below and paste it into your Jupyter Notebook.

import requests

endpoint = 'enter your endpoint address' 
access_token = 'enter your access token'

# Headers for the requests
headers = {
    'accept': 'application/json',
    'Authorization': f'Bearer {access_token}'

url_to_file = '' # Path to audio
file = requests.get(url_to_file)

files = {'file': ('modelserve-example.mp3', file.content, 'audio/mpeg')} # Filename

response ='{endpoint}/transcriptions/?response_format=text', headers=headers, files=files)

# Handling the response
    result = response.json()
except ValueError:
    print(f"Status code: {response.status_code}, Data: {response.content}")

Check more: Audio (text2speech)