chat-d32-demo

Running on Zero

burtenshaw HF Staff commited on Oct 15

Commit

c35409b

verified ·

1 Parent(s): cf6ec37

Upload folder using huggingface_hub

Files changed (2) hide show

app.py ADDED Viewed

+import spaces
+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+def load_model():
+    model_id = "nanochat-students/chat-d20"
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=False)
+    model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=False, dtype=torch.bfloat16).to(device)
+    model.eval()
+    return tokenizer, model
+tokenizer, model = load_model()
+@spaces.GPU
+def generate(prompt, history):
+    conversation = [
+        {"role": "user", "content": prompt},
+    ]
+    inputs = tokenizer.apply_chat_template(
+        conversation,
+        add_generation_prompt=True,
+        tokenize=True,
+        return_tensors="pt"
+    ).to(device)
+    with torch.no_grad():
+        outputs = model.generate(
+            inputs,
+            max_new_tokens=max_new_tokens,
+        )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+demo = gr.ChatInterface(fn=generate, type="messages", examples=["hello", "hola", "merhaba"], title="NanoChat")
+demo.launch()

requirements.txt ADDED Viewed

+torch
+git+https://github.com/huggingface/transformers.git@nanochat-implementation
+tiktoken