Ready-made solutions to test
Welcome to the section with ready-to-use notebooks. Use the provided sample codes to quickly send GPU inferences. We have prepared solutions for the Automatic1111 and vLLM environments.
Automatic1111
Below is a sample Python code that you can use in a Jupyter notebook. It will trigger 10 inferences to your endpoint and save 10 generated graphics. Remember to fill in the following fields:
endpoint = 'enter your endpoint address'
- where to find the endpoint address?access_token = 'enter your access token'
- where to find the access token?
txt2img
Use a ready-made notebook in Colab:
You can also copy the code below and paste it into your Jupyter Notebook.
import requests, io, time, base64
from PIL import Image
endpoint = 'enter your endpoint address'
access_token = 'enter your access token'
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json'
}
data = {
"prompt": "portrait of a young women, blue eyes, cinematic",
"steps": 15,
"width": 512,
"height": 512
}
for i in range(1, 100):
start_time = time.time()
response = requests.post(url=f'{endpoint}/sdapi/v1/txt2img', headers=headers, json=data)
try:
r = response.json()
except Exception:
print(f"Status code: {response.status_code}, Data: {response.content}")
continue
end_time = time.time()
inference_time = (end_time - start_time)
print(f'Inference time #{i}:', inference_time, "seconds")
image = Image.open(io.BytesIO(base64.b64decode(r['images'][0])))
image.save(f'modelserve_output-{i}.png')
img2img
Use a ready-made notebook in Colab:
You can also copy the code below and paste it into your Jupyter Notebook.
import requests, io, time, base64
from PIL import Image
endpoint = 'enter your endpoint address' # Enter your endpoint address (check more -> https://docs.modelserve.ai/endpoints/#display-list-of-endpoints)
access_token = 'enter your access token' # Enter your Access token (check more -> https://docs.modelserve.ai/quickstart/#generate-an-access-token)
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json'
}
# Function to encode an image URL to base64
def encode_image_url_to_base64(image_url):
response = requests.get(image_url)
if response.status_code == 200:
encoded_string = base64.b64encode(response.content).decode('utf-8')
return encoded_string
else:
raise Exception(f"Failed to fetch image. Status code: {response.status_code}")
image_url = "https://cdn.prod.website-files.com/65116ddd046d708ff3526348/65522f916945a65ca3b6b1dc_test-1.png"
encoded_image = encode_image_url_to_base64(image_url)
data = {
"prompt": "girl with red eyes", # Check more -> https://docs.modelserve.ai/automatic1111/#img2img
"init_images": [encoded_image],
"steps": 30, # Optimal steps to maintain quality
"width": 512, # Match the original image dimensions
"height": 512, # Match the original image dimensions
"denoising_strength": 0.2, # Low denoising strength to preserve original details
"cfg_scale": 7 # Classifier-Free Guidance Scale
}
start_time = time.time()
response = requests.post(url=f'{endpoint}/sdapi/v1/img2img', headers=headers, json=data)
try:
r = response.json()
print(r)
if 'images' in r:
base64_image = r['images'][0]
image_data = base64.b64decode(base64_image)
image = Image.open(io.BytesIO(image_data))
image.save('modelserve-img2img_output.png')
print("Image saved to modelserve-img2img_output.png")
else:
print("Error: 'images' key not found in the response.")
print(f"Data: {response.content}")
except Exception as e:
print(f"Exception: {str(e)}")
print(f"Status code: {response.status_code}, Data: {response.content}")
end_time = time.time()
inference_time = (end_time - start_time)
print(f'Inference time #{i}:', inference_time, "seconds")
vLLM
Below is a sample Python code that you can use in a Jupyter notebook. Remember to fill in the following fields:
endpoint = 'enter your endpoint address'
- where to find the endpoint address?access_token = 'enter your access token'
- where to find the access token?
Chat Completions
Use a ready-made notebook in Colab:
You can also copy the code below and paste it into your Jupyter Notebook.
import requests
# Endpoint address and access token
endpoint = 'enter your endpoint address'
access_token = 'enter your access token'
# Headers for the requests
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json'
}
# Fetching the model ID
get_model = requests.get(url=f'{endpoint}/v1/models', headers=headers)
model = get_model.json()['data'][0]['id']
# Data payload for the POST request
data = {
"model": model,
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Who won the World Cup in 2018?"
}
]
}
# Sending a POST request to /v1/chat/completions
response = requests.post(url=f'{endpoint}/v1/chat/completions', headers=headers, json=data)
# Handling the response
if response.status_code == 200:
print("Response:", response.json()['choices'][0]['message']['content'])
else:
print("Request failed with status code:", response.status_code)
Audio (speech2text)
Below is a sample Python code that you can use in a Jupyter notebook. It will transcribe the audio file into English text. Remember to fill in the following fields:
endpoint = 'enter your endpoint address'
- where to find the endpoint address?token = 'enter your access token'
- where to find the access token?
Use a ready-made notebook in Colab:
You can also copy the code below and paste it into your Jupyter Notebook.
import requests
endpoint = 'enter your endpoint address'
access_token = 'enter your access token'
# Headers for the requests
headers = {
'accept': 'application/json',
'Authorization': f'Bearer {access_token}'
}
url_to_file = 'https://docs.modelserve.ai/assets/modelserve-example.mp3' # Path to audio
file = requests.get(url_to_file)
files = {'file': ('modelserve-example.mp3', file.content, 'audio/mpeg')} # Filename
response = requests.post(f'{endpoint}/transcriptions/?response_format=text', headers=headers, files=files)
# Handling the response
try:
result = response.json()
print(result)
except ValueError:
print(f"Status code: {response.status_code}, Data: {response.content}")