Spaces:

Javedalam
/

Qvac_genesis_i

Sleeping

App Files Files Community

Javedalam commited on 21 days ago

Commit

adfd866

verified ·

1 Parent(s): 9a1e742

Create app.py

Browse files

Files changed (1) hide show

app.py +118 -16

app.py CHANGED Viewed

@@ -1,22 +1,124 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-model_id = "qvac/genesisI-model"
-tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch.bfloat16,  # or torch.float16 on T4
-    device_map="auto"
 )
-prompt = "Explain precision vs. recall in one paragraph."
-inputs = tok(prompt, return_tensors="pt").to(model.device)
-out = model.generate(
-    **inputs,
-    max_new_tokens=256,
-    do_sample=True,
-    top_p=0.9,
-    temperature=0.7,
-)
-print(tok.decode(out[0], skip_special_tokens=True))

 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import gradio as gr
+MODEL_ID = "qvac/genesis-i-model"  # HF repo id
+print("Loading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+print("Detecting device & dtype...")
+if torch.cuda.is_available():
+    # Prefer BF16 on modern GPUs, else fall back to FP16
+    try:
+        bf16_ok = torch.cuda.is_bf16_supported()
+    except AttributeError:
+        bf16_ok = False
+    torch_dtype = torch.bfloat16 if bf16_ok else torch.float16
+    device_map = "auto"
+else:
+    # CPU Space or no GPU: use full precision
+    torch_dtype = torch.float32
+    device_map = "cpu"
+print(f"Loading model on {device_map} with dtype={torch_dtype}...")
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch_dtype,
+    device_map=device_map,
 )
+model.eval()
+def generate(
+    prompt: str,
+    temperature: float = 0.7,
+    top_p: float = 0.9,
+    max_new_tokens: int = 256,
+):
+    if not prompt.strip():
+        return "Please enter a prompt."
+    inputs = tokenizer(prompt, return_tensors="pt")
+    # Move inputs to the same device as the model
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
+    with torch.no_grad():
+        output_ids = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            do_sample=True,
+            temperature=temperature,
+            top_p=top_p,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # Return ONLY the completion after the original prompt, for cleanliness
+    if text.startswith(prompt):
+        text = text[len(prompt):].lstrip()
+    return text
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # QVAC Genesis I – Educational LLM Demo
+        Model: **qvac/genesis-i-model**
+        Trained on the QVAC Genesis I synthetic educational dataset (STEM-heavy).
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=3):
+            prompt = gr.Textbox(
+                label="Prompt",
+                placeholder="Ask a STEM question, e.g. 'Explain Gibbs free energy to a high school student.'",
+                lines=6,
+            )
+            temperature = gr.Slider(
+                minimum=0.1,
+                maximum=1.2,
+                value=0.7,
+                step=0.05,
+                label="Temperature (creativity)",
+            )
+            top_p = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.9,
+                step=0.05,
+                label="Top-p (nucleus sampling)",
+            )
+            max_new_tokens = gr.Slider(
+                minimum=16,
+                maximum=512,
+                value=256,
+                step=16,
+                label="Max new tokens",
+            )
+            submit = gr.Button("Generate")
+        with gr.Column(scale=4):
+            output = gr.Textbox(
+                label="Model output",
+                lines=18,
+            )
+    submit.click(
+        fn=generate,
+        inputs=[prompt, temperature, top_p, max_new_tokens],
+        outputs=output,
+    )
+    # Press Enter in the prompt box to generate
+    prompt.submit(
+        fn=generate,
+        inputs=[prompt, temperature, top_p, max_new_tokens],
+        outputs=output,
+    )
+demo.queue().launch()