burtenshaw HF Staff commited on
Commit
c35409b
·
verified ·
1 Parent(s): cf6ec37

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +43 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+
7
+ def load_model():
8
+ model_id = "nanochat-students/chat-d20"
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=False)
12
+ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=False, dtype=torch.bfloat16).to(device)
13
+ model.eval()
14
+
15
+ return tokenizer, model
16
+
17
+ tokenizer, model = load_model()
18
+
19
+
20
+ @spaces.GPU
21
+ def generate(prompt, history):
22
+ conversation = [
23
+ {"role": "user", "content": prompt},
24
+ ]
25
+
26
+ inputs = tokenizer.apply_chat_template(
27
+ conversation,
28
+ add_generation_prompt=True,
29
+ tokenize=True,
30
+ return_tensors="pt"
31
+ ).to(device)
32
+
33
+ with torch.no_grad():
34
+ outputs = model.generate(
35
+ inputs,
36
+ max_new_tokens=max_new_tokens,
37
+ )
38
+
39
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+
41
+
42
+ demo = gr.ChatInterface(fn=generate, type="messages", examples=["hello", "hola", "merhaba"], title="NanoChat")
43
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ git+https://github.com/huggingface/transformers.git@nanochat-implementation
3
+ tiktoken