Spaces:

Javedalam
/

Qvac_genesis_i

Sleeping

App Files Files Community

Qvac_genesis_i / app.py

Javedalam

Create app.py

55dbe5b verified 21 days ago

raw

history blame contribute delete

4.76 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import gradio as gr

	MODEL_ID = "qvac/genesis-i-model"

	# ----------------------
	# Load tokenizer & model
	# ----------------------
	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)

	# Make sure we have a pad token
	if tokenizer.pad_token is None and tokenizer.eos_token is not None:
	tokenizer.pad_token = tokenizer.eos_token

	print("Detecting device & dtype...")
	if torch.cuda.is_available():
	# On ZeroGPU / real GPU: use bf16 if supported, else fp16
	try:
	bf16_ok = torch.cuda.is_bf16_supported()
	except AttributeError:
	bf16_ok = False

	torch_dtype = torch.bfloat16 if bf16_ok else torch.float16
	device_map = "auto"
	else:
	# CPU fallback
	torch_dtype = torch.float32
	device_map = "cpu"

	print(f"Loading model on {device_map} with dtype={torch_dtype}...")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch_dtype,
	device_map=device_map,
	)
	model.eval()

	# ----------------------
	# Helper: build chat input
	# ----------------------
	def build_inputs(prompt: str):
	"""
	Build input_ids using the model's chat_template.
	We give it a simple system + user conversation and
	ask tokenizer to add the assistant generation prompt.
	"""
	messages = [
	{
	"role": "system",
	"content": (
	"You are an educational AI tutor. "
	"Explain clearly and precisely, focusing on math, science, "
	"engineering, programming, and medical education. "
	"Show intermediate steps when useful, but avoid rambling."
	),
	},
	{
	"role": "user",
	"content": prompt,
	},
	]

	input_ids = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	return_tensors="pt",
	)

	return input_ids.to(model.device)

	# ----------------------
	# Generation function
	# ----------------------
	def generate(
	prompt: str,
	temperature: float = 0.7,
	top_p: float = 0.9,
	max_new_tokens: int = 256,
	):
	if not prompt.strip():
	return "Please enter a prompt."

	input_ids = build_inputs(prompt)

	with torch.no_grad():
	output_ids = model.generate(
	input_ids=input_ids,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	temperature=temperature,
	top_p=top_p,
	repetition_penalty=1.1, # light anti-repetition
	pad_token_id=tokenizer.pad_token_id,
	eos_token_id=tokenizer.eos_token_id,
	)

	# Keep only the newly generated tokens (assistant part)
	new_tokens = output_ids[0, input_ids.shape[-1]:]
	text = tokenizer.decode(new_tokens, skip_special_tokens=True)
	text = text.strip()

	if not text:
	text = "[Empty response]"
	return text

	# ----------------------
	# Gradio UI
	# ----------------------
	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# QVAC Genesis I – Educational LLM Demo

	Model: qvac/genesis-i-model
	Trained on the QVAC Genesis I synthetic educational dataset (STEM-heavy).
	Ask it math, science, engineering, or medical education questions.
	"""
	)

	with gr.Row():
	with gr.Column(scale=3):
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Example: Explain why 2 + 2 = 4 in a way a 10-year-old can understand.",
	lines=6,
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.2,
	value=0.7,
	step=0.05,
	label="Temperature (creativity)",
	)
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.9,
	step=0.05,
	label="Top-p (nucleus sampling)",
	)
	max_new_tokens = gr.Slider(
	minimum=16,
	maximum=512,
	value=256,
	step=16,
	label="Max new tokens",
	)
	submit = gr.Button("Generate")

	with gr.Column(scale=4):
	output = gr.Textbox(
	label="Model output",
	lines=18,
	)

	submit.click(
	fn=generate,
	inputs=[prompt, temperature, top_p, max_new_tokens],
	outputs=output,
	)

	# Press Enter in the prompt box to generate
	prompt.submit(
	fn=generate,
	inputs=[prompt, temperature, top_p, max_new_tokens],
	outputs=output,
	)

	demo.queue().launch()