Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import gradio as gr | |
| MODEL_ID = "qvac/genesis-i-model" # HF repo id | |
| print("Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| print("Detecting device & dtype...") | |
| if torch.cuda.is_available(): | |
| # Prefer BF16 on modern GPUs, else fall back to FP16 | |
| try: | |
| bf16_ok = torch.cuda.is_bf16_supported() | |
| except AttributeError: | |
| bf16_ok = False | |
| torch_dtype = torch.bfloat16 if bf16_ok else torch.float16 | |
| device_map = "auto" | |
| else: | |
| # CPU Space or no GPU: use full precision | |
| torch_dtype = torch.float32 | |
| device_map = "cpu" | |
| print(f"Loading model on {device_map} with dtype={torch_dtype}...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch_dtype, | |
| device_map=device_map, | |
| ) | |
| model.eval() | |
| def generate( | |
| prompt: str, | |
| temperature: float = 0.7, | |
| top_p: float = 0.9, | |
| max_new_tokens: int = 256, | |
| ): | |
| if not prompt.strip(): | |
| return "Please enter a prompt." | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| # Move inputs to the same device as the model | |
| inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| output_ids = model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| do_sample=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| text = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| # Return ONLY the completion after the original prompt, for cleanliness | |
| if text.startswith(prompt): | |
| text = text[len(prompt):].lstrip() | |
| return text | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # QVAC Genesis I – Educational LLM Demo | |
| Model: **qvac/genesis-i-model** | |
| Trained on the QVAC Genesis I synthetic educational dataset (STEM-heavy). | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| placeholder="Ask a STEM question, e.g. 'Explain Gibbs free energy to a high school student.'", | |
| lines=6, | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.2, | |
| value=0.7, | |
| step=0.05, | |
| label="Temperature (creativity)", | |
| ) | |
| top_p = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.9, | |
| step=0.05, | |
| label="Top-p (nucleus sampling)", | |
| ) | |
| max_new_tokens = gr.Slider( | |
| minimum=16, | |
| maximum=512, | |
| value=256, | |
| step=16, | |
| label="Max new tokens", | |
| ) | |
| submit = gr.Button("Generate") | |
| with gr.Column(scale=4): | |
| output = gr.Textbox( | |
| label="Model output", | |
| lines=18, | |
| ) | |
| submit.click( | |
| fn=generate, | |
| inputs=[prompt, temperature, top_p, max_new_tokens], | |
| outputs=output, | |
| ) | |
| # Press Enter in the prompt box to generate | |
| prompt.submit( | |
| fn=generate, | |
| inputs=[prompt, temperature, top_p, max_new_tokens], | |
| outputs=output, | |
| ) | |
| demo.queue().launch() |