Spaces:

Tonic
/

Pixtral

Paused

App Files Files Community

Tonic commited on Sep 12, 2024

Commit

af9af6d

verified ·

1 Parent(s): 56352f5

add loadimg from @not-lain

Browse files

Files changed (2) hide show

app.py +19 -8
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -15,6 +15,9 @@ import math
 from typing import List, Optional, Tuple
 import gc
 from contextlib import contextmanager
 title = "# **WIP / DEMO** 🙋🏻‍♂️Welcome to Tonic's Pixtral Model Demo"
 description = """
@@ -25,8 +28,8 @@ This demo showcases two capabilities of the Pixtral model:
 ### Join us :
 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
 """
-model_path = snapshot_download(repo_id="mistral-community/pixtral-12b-240910")
 with open(f'{model_path}/params.json', 'r') as f:
     params = json.load(f)
@@ -186,9 +189,12 @@ tokenizer = MistralTokenizer.from_model("pixtral")
 def preprocess_image(image):
     if image is None:
         raise ValueError("No image provided")
-    image = image.convert('RGB')
-    image = image.resize((params['vision_encoder']['image_size'], params['vision_encoder']['image_size']))
-    image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).unsqueeze(0).float() / 255.0
     return image_tensor
 @contextmanager
@@ -206,7 +212,9 @@ def generate_text(image, prompt, max_tokens):
     try:
         with gpu_memory_manager():
             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-            image_tensor = preprocess_image(image).to(device)
             model.to(device)
             tokenized = tokenizer.encode_chat_completion(
@@ -242,8 +250,11 @@ def calculate_similarity(image1, image2):
     try:
         with gpu_memory_manager():
             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-            tensor1 = preprocess_image(image1).to(device)
-            tensor2 = preprocess_image(image2).to(device)
             model.to(device)
             with torch.no_grad():

 from typing import List, Optional, Tuple
 import gc
 from contextlib import contextmanager
+import os
+from loadimg import load_img
+# Add this near the top of the file
 title = "# **WIP / DEMO** 🙋🏻‍♂️Welcome to Tonic's Pixtral Model Demo"
 description = """
 ### Join us :
 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/qdfnvSPcqP) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Build Tonic](https://git.tonic-ai.com/contribute)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
 """
+HUGGINGFACE_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
+model_path = snapshot_download(repo_id="mistralai/Pixtral-12B-2409", token=HUGGINGFACE_TOKEN)
 with open(f'{model_path}/params.json', 'r') as f:
     params = json.load(f)
 def preprocess_image(image):
     if image is None:
         raise ValueError("No image provided")
+    pil_image = load_img(image, output_type="pil", input_type="auto")
+    pil_image = pil_image.convert('RGB')
+    pil_image = pil_image.resize((params['vision_encoder']['image_size'], params['vision_encoder']['image_size']))
+    image_tensor = torch.tensor(np.array(pil_image)).permute(2, 0, 1).unsqueeze(0).float() / 255.0
     return image_tensor
 @contextmanager
     try:
         with gpu_memory_manager():
             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            # Use load_img here
+            image_pil = load_img(image, output_type="pil", input_type="auto")
+            image_tensor = preprocess_image(image_pil).to(device)
             model.to(device)
             tokenized = tokenizer.encode_chat_completion(
     try:
         with gpu_memory_manager():
             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            # Use load_img for both images
+            pil_image1 = load_img(image1, output_type="pil", input_type="auto")
+            pil_image2 = load_img(image2, output_type="pil", input_type="auto")
+            tensor1 = preprocess_image(pil_image1).to(device)
+            tensor2 = preprocess_image(pil_image2).to(device)
             model.to(device)
             with torch.no_grad():

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ safetensors>=0.3.1
 gradio>=3.32.0
 Pillow>=9.0.0
 numpy>=1.21.0
-mistral_common

 gradio>=3.32.0
 Pillow>=9.0.0
 numpy>=1.21.0
+mistral_common
+loadimg