Spaces:
Paused
Paused
add description
Browse files
app.py
CHANGED
|
@@ -12,6 +12,12 @@ from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
|
| 12 |
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
| 13 |
import spaces
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# Download model files
|
| 16 |
model_path = snapshot_download(repo_id="mistral-community/pixtral-12b-240910")
|
| 17 |
|
|
@@ -104,8 +110,6 @@ def load_model(params, model_path):
|
|
| 104 |
|
| 105 |
# Initialize the model
|
| 106 |
model = load_model(params, model_path)
|
| 107 |
-
|
| 108 |
-
# Initialize the tokenizer
|
| 109 |
tokenizer = MistralTokenizer.from_model("pixtral")
|
| 110 |
|
| 111 |
@spaces.GPU
|
|
@@ -134,18 +138,17 @@ def process_image_and_text(image, prompt):
|
|
| 134 |
|
| 135 |
# Process the image and generate text
|
| 136 |
with torch.no_grad():
|
| 137 |
-
model.cuda()
|
| 138 |
vision_output = model(image_tensor)
|
| 139 |
-
model.cpu()
|
| 140 |
-
# Add text generation logic here
|
| 141 |
generated_text = f"Generated text based on the image and prompt: {prompt}"
|
| 142 |
|
| 143 |
return generated_text, len(tokens), len(images)
|
| 144 |
|
| 145 |
# Gradio interface
|
| 146 |
with gr.Blocks() as demo:
|
| 147 |
-
gr.Markdown(
|
| 148 |
-
gr.Markdown(
|
| 149 |
|
| 150 |
with gr.Row():
|
| 151 |
with gr.Column(scale=1):
|
|
|
|
| 12 |
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
| 13 |
import spaces
|
| 14 |
|
| 15 |
+
title = "# **WIP / DEMO** 🙋🏻♂️Welcome to Tonic's Pixtral Image-to-Text Model Demo"
|
| 16 |
+
description = """Upload an image to encode it. This is a **work in progress** , just showing off some demo features here until it's ready.
|
| 17 |
+
### Join us :
|
| 18 |
+
🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
# Download model files
|
| 22 |
model_path = snapshot_download(repo_id="mistral-community/pixtral-12b-240910")
|
| 23 |
|
|
|
|
| 110 |
|
| 111 |
# Initialize the model
|
| 112 |
model = load_model(params, model_path)
|
|
|
|
|
|
|
| 113 |
tokenizer = MistralTokenizer.from_model("pixtral")
|
| 114 |
|
| 115 |
@spaces.GPU
|
|
|
|
| 138 |
|
| 139 |
# Process the image and generate text
|
| 140 |
with torch.no_grad():
|
| 141 |
+
model.cuda()
|
| 142 |
vision_output = model(image_tensor)
|
| 143 |
+
model.cpu()
|
|
|
|
| 144 |
generated_text = f"Generated text based on the image and prompt: {prompt}"
|
| 145 |
|
| 146 |
return generated_text, len(tokens), len(images)
|
| 147 |
|
| 148 |
# Gradio interface
|
| 149 |
with gr.Blocks() as demo:
|
| 150 |
+
gr.Markdown(title)
|
| 151 |
+
gr.Markdown(description)
|
| 152 |
|
| 153 |
with gr.Row():
|
| 154 |
with gr.Column(scale=1):
|