Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import gradio as gr | |
| MODEL_ID = "qvac/genesis-i-model" | |
| # ---------------------- | |
| # Load tokenizer & model | |
| # ---------------------- | |
| print("Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| # Make sure we have a pad token | |
| if tokenizer.pad_token is None and tokenizer.eos_token is not None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| print("Detecting device & dtype...") | |
| if torch.cuda.is_available(): | |
| # On ZeroGPU / real GPU: use bf16 if supported, else fp16 | |
| try: | |
| bf16_ok = torch.cuda.is_bf16_supported() | |
| except AttributeError: | |
| bf16_ok = False | |
| torch_dtype = torch.bfloat16 if bf16_ok else torch.float16 | |
| device_map = "auto" | |
| else: | |
| # CPU fallback | |
| torch_dtype = torch.float32 | |
| device_map = "cpu" | |
| print(f"Loading model on {device_map} with dtype={torch_dtype}...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch_dtype, | |
| device_map=device_map, | |
| ) | |
| model.eval() | |
| # ---------------------- | |
| # Helper: build chat input | |
| # ---------------------- | |
| def build_inputs(prompt: str): | |
| """ | |
| Build input_ids using the model's chat_template. | |
| We give it a simple system + user conversation and | |
| ask tokenizer to add the assistant generation prompt. | |
| """ | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are an educational AI tutor. " | |
| "Explain clearly and precisely, focusing on math, science, " | |
| "engineering, programming, and medical education. " | |
| "Show intermediate steps when useful, but avoid rambling." | |
| ), | |
| }, | |
| { | |
| "role": "user", | |
| "content": prompt, | |
| }, | |
| ] | |
| input_ids = tokenizer.apply_chat_template( | |
| messages, | |
| add_generation_prompt=True, | |
| return_tensors="pt", | |
| ) | |
| return input_ids.to(model.device) | |
| # ---------------------- | |
| # Generation function | |
| # ---------------------- | |
| def generate( | |
| prompt: str, | |
| temperature: float = 0.7, | |
| top_p: float = 0.9, | |
| max_new_tokens: int = 256, | |
| ): | |
| if not prompt.strip(): | |
| return "Please enter a prompt." | |
| input_ids = build_inputs(prompt) | |
| with torch.no_grad(): | |
| output_ids = model.generate( | |
| input_ids=input_ids, | |
| max_new_tokens=max_new_tokens, | |
| do_sample=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| repetition_penalty=1.1, # light anti-repetition | |
| pad_token_id=tokenizer.pad_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| ) | |
| # Keep only the newly generated tokens (assistant part) | |
| new_tokens = output_ids[0, input_ids.shape[-1]:] | |
| text = tokenizer.decode(new_tokens, skip_special_tokens=True) | |
| text = text.strip() | |
| if not text: | |
| text = "[Empty response]" | |
| return text | |
| # ---------------------- | |
| # Gradio UI | |
| # ---------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # QVAC Genesis I – Educational LLM Demo | |
| Model: **qvac/genesis-i-model** | |
| Trained on the QVAC Genesis I synthetic educational dataset (STEM-heavy). | |
| Ask it math, science, engineering, or medical education questions. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| placeholder="Example: Explain why 2 + 2 = 4 in a way a 10-year-old can understand.", | |
| lines=6, | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.2, | |
| value=0.7, | |
| step=0.05, | |
| label="Temperature (creativity)", | |
| ) | |
| top_p = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.9, | |
| step=0.05, | |
| label="Top-p (nucleus sampling)", | |
| ) | |
| max_new_tokens = gr.Slider( | |
| minimum=16, | |
| maximum=512, | |
| value=256, | |
| step=16, | |
| label="Max new tokens", | |
| ) | |
| submit = gr.Button("Generate") | |
| with gr.Column(scale=4): | |
| output = gr.Textbox( | |
| label="Model output", | |
| lines=18, | |
| ) | |
| submit.click( | |
| fn=generate, | |
| inputs=[prompt, temperature, top_p, max_new_tokens], | |
| outputs=output, | |
| ) | |
| # Press Enter in the prompt box to generate | |
| prompt.submit( | |
| fn=generate, | |
| inputs=[prompt, temperature, top_p, max_new_tokens], | |
| outputs=output, | |
| ) | |
| demo.queue().launch() |