Qvac_genesis_i / app.py
Javedalam's picture
Create app.py
adfd866 verified
raw
history blame
3.41 kB
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
MODEL_ID = "qvac/genesis-i-model" # HF repo id
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
print("Detecting device & dtype...")
if torch.cuda.is_available():
# Prefer BF16 on modern GPUs, else fall back to FP16
try:
bf16_ok = torch.cuda.is_bf16_supported()
except AttributeError:
bf16_ok = False
torch_dtype = torch.bfloat16 if bf16_ok else torch.float16
device_map = "auto"
else:
# CPU Space or no GPU: use full precision
torch_dtype = torch.float32
device_map = "cpu"
print(f"Loading model on {device_map} with dtype={torch_dtype}...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch_dtype,
device_map=device_map,
)
model.eval()
def generate(
prompt: str,
temperature: float = 0.7,
top_p: float = 0.9,
max_new_tokens: int = 256,
):
if not prompt.strip():
return "Please enter a prompt."
inputs = tokenizer(prompt, return_tensors="pt")
# Move inputs to the same device as the model
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
output_ids = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer.eos_token_id,
)
text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Return ONLY the completion after the original prompt, for cleanliness
if text.startswith(prompt):
text = text[len(prompt):].lstrip()
return text
with gr.Blocks() as demo:
gr.Markdown(
"""
# QVAC Genesis I – Educational LLM Demo
Model: **qvac/genesis-i-model**
Trained on the QVAC Genesis I synthetic educational dataset (STEM-heavy).
"""
)
with gr.Row():
with gr.Column(scale=3):
prompt = gr.Textbox(
label="Prompt",
placeholder="Ask a STEM question, e.g. 'Explain Gibbs free energy to a high school student.'",
lines=6,
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.2,
value=0.7,
step=0.05,
label="Temperature (creativity)",
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.9,
step=0.05,
label="Top-p (nucleus sampling)",
)
max_new_tokens = gr.Slider(
minimum=16,
maximum=512,
value=256,
step=16,
label="Max new tokens",
)
submit = gr.Button("Generate")
with gr.Column(scale=4):
output = gr.Textbox(
label="Model output",
lines=18,
)
submit.click(
fn=generate,
inputs=[prompt, temperature, top_p, max_new_tokens],
outputs=output,
)
# Press Enter in the prompt box to generate
prompt.submit(
fn=generate,
inputs=[prompt, temperature, top_p, max_new_tokens],
outputs=output,
)
demo.queue().launch()