Spaces:
Paused
Paused
add loadimg from @not-lain
Browse files- app.py +19 -8
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -15,6 +15,9 @@ import math
|
|
| 15 |
from typing import List, Optional, Tuple
|
| 16 |
import gc
|
| 17 |
from contextlib import contextmanager
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
title = "# **WIP / DEMO** 🙋🏻♂️Welcome to Tonic's Pixtral Model Demo"
|
| 20 |
description = """
|
|
@@ -25,8 +28,8 @@ This demo showcases two capabilities of the Pixtral model:
|
|
| 25 |
### Join us :
|
| 26 |
🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
|
| 27 |
"""
|
| 28 |
-
|
| 29 |
-
model_path = snapshot_download(repo_id="
|
| 30 |
|
| 31 |
with open(f'{model_path}/params.json', 'r') as f:
|
| 32 |
params = json.load(f)
|
|
@@ -186,9 +189,12 @@ tokenizer = MistralTokenizer.from_model("pixtral")
|
|
| 186 |
def preprocess_image(image):
|
| 187 |
if image is None:
|
| 188 |
raise ValueError("No image provided")
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
| 192 |
return image_tensor
|
| 193 |
|
| 194 |
@contextmanager
|
|
@@ -206,7 +212,9 @@ def generate_text(image, prompt, max_tokens):
|
|
| 206 |
try:
|
| 207 |
with gpu_memory_manager():
|
| 208 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 209 |
-
|
|
|
|
|
|
|
| 210 |
model.to(device)
|
| 211 |
|
| 212 |
tokenized = tokenizer.encode_chat_completion(
|
|
@@ -242,8 +250,11 @@ def calculate_similarity(image1, image2):
|
|
| 242 |
try:
|
| 243 |
with gpu_memory_manager():
|
| 244 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 245 |
-
|
| 246 |
-
|
|
|
|
|
|
|
|
|
|
| 247 |
model.to(device)
|
| 248 |
|
| 249 |
with torch.no_grad():
|
|
|
|
| 15 |
from typing import List, Optional, Tuple
|
| 16 |
import gc
|
| 17 |
from contextlib import contextmanager
|
| 18 |
+
import os
|
| 19 |
+
from loadimg import load_img
|
| 20 |
+
# Add this near the top of the file
|
| 21 |
|
| 22 |
title = "# **WIP / DEMO** 🙋🏻♂️Welcome to Tonic's Pixtral Model Demo"
|
| 23 |
description = """
|
|
|
|
| 28 |
### Join us :
|
| 29 |
🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
|
| 30 |
"""
|
| 31 |
+
HUGGINGFACE_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
|
| 32 |
+
model_path = snapshot_download(repo_id="mistralai/Pixtral-12B-2409", token=HUGGINGFACE_TOKEN)
|
| 33 |
|
| 34 |
with open(f'{model_path}/params.json', 'r') as f:
|
| 35 |
params = json.load(f)
|
|
|
|
| 189 |
def preprocess_image(image):
|
| 190 |
if image is None:
|
| 191 |
raise ValueError("No image provided")
|
| 192 |
+
|
| 193 |
+
pil_image = load_img(image, output_type="pil", input_type="auto")
|
| 194 |
+
|
| 195 |
+
pil_image = pil_image.convert('RGB')
|
| 196 |
+
pil_image = pil_image.resize((params['vision_encoder']['image_size'], params['vision_encoder']['image_size']))
|
| 197 |
+
image_tensor = torch.tensor(np.array(pil_image)).permute(2, 0, 1).unsqueeze(0).float() / 255.0
|
| 198 |
return image_tensor
|
| 199 |
|
| 200 |
@contextmanager
|
|
|
|
| 212 |
try:
|
| 213 |
with gpu_memory_manager():
|
| 214 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 215 |
+
# Use load_img here
|
| 216 |
+
image_pil = load_img(image, output_type="pil", input_type="auto")
|
| 217 |
+
image_tensor = preprocess_image(image_pil).to(device)
|
| 218 |
model.to(device)
|
| 219 |
|
| 220 |
tokenized = tokenizer.encode_chat_completion(
|
|
|
|
| 250 |
try:
|
| 251 |
with gpu_memory_manager():
|
| 252 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 253 |
+
# Use load_img for both images
|
| 254 |
+
pil_image1 = load_img(image1, output_type="pil", input_type="auto")
|
| 255 |
+
pil_image2 = load_img(image2, output_type="pil", input_type="auto")
|
| 256 |
+
tensor1 = preprocess_image(pil_image1).to(device)
|
| 257 |
+
tensor2 = preprocess_image(pil_image2).to(device)
|
| 258 |
model.to(device)
|
| 259 |
|
| 260 |
with torch.no_grad():
|
requirements.txt
CHANGED
|
@@ -3,4 +3,5 @@ safetensors>=0.3.1
|
|
| 3 |
gradio>=3.32.0
|
| 4 |
Pillow>=9.0.0
|
| 5 |
numpy>=1.21.0
|
| 6 |
-
mistral_common
|
|
|
|
|
|
| 3 |
gradio>=3.32.0
|
| 4 |
Pillow>=9.0.0
|
| 5 |
numpy>=1.21.0
|
| 6 |
+
mistral_common
|
| 7 |
+
loadimg
|