Spaces:

Javedalam
/

Qvac_genesis_i

Sleeping

App Files Files Community

Qvac_genesis_i / app.py

Javedalam

Create app.py

adfd866 verified 26 days ago

raw

history blame

3.41 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import gradio as gr

	MODEL_ID = "qvac/genesis-i-model" # HF repo id

	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)

	print("Detecting device & dtype...")
	if torch.cuda.is_available():
	# Prefer BF16 on modern GPUs, else fall back to FP16
	try:
	bf16_ok = torch.cuda.is_bf16_supported()
	except AttributeError:
	bf16_ok = False

	torch_dtype = torch.bfloat16 if bf16_ok else torch.float16
	device_map = "auto"
	else:
	# CPU Space or no GPU: use full precision
	torch_dtype = torch.float32
	device_map = "cpu"

	print(f"Loading model on {device_map} with dtype={torch_dtype}...")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch_dtype,
	device_map=device_map,
	)
	model.eval()


	def generate(
	prompt: str,
	temperature: float = 0.7,
	top_p: float = 0.9,
	max_new_tokens: int = 256,
	):
	if not prompt.strip():
	return "Please enter a prompt."

	inputs = tokenizer(prompt, return_tensors="pt")
	# Move inputs to the same device as the model
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	with torch.no_grad():
	output_ids = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	temperature=temperature,
	top_p=top_p,
	pad_token_id=tokenizer.eos_token_id,
	)

	text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
	# Return ONLY the completion after the original prompt, for cleanliness
	if text.startswith(prompt):
	text = text[len(prompt):].lstrip()

	return text


	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# QVAC Genesis I – Educational LLM Demo

	Model: qvac/genesis-i-model
	Trained on the QVAC Genesis I synthetic educational dataset (STEM-heavy).
	"""
	)

	with gr.Row():
	with gr.Column(scale=3):
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Ask a STEM question, e.g. 'Explain Gibbs free energy to a high school student.'",
	lines=6,
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.2,
	value=0.7,
	step=0.05,
	label="Temperature (creativity)",
	)
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.9,
	step=0.05,
	label="Top-p (nucleus sampling)",
	)
	max_new_tokens = gr.Slider(
	minimum=16,
	maximum=512,
	value=256,
	step=16,
	label="Max new tokens",
	)
	submit = gr.Button("Generate")

	with gr.Column(scale=4):
	output = gr.Textbox(
	label="Model output",
	lines=18,
	)

	submit.click(
	fn=generate,
	inputs=[prompt, temperature, top_p, max_new_tokens],
	outputs=output,
	)

	# Press Enter in the prompt box to generate
	prompt.submit(
	fn=generate,
	inputs=[prompt, temperature, top_p, max_new_tokens],
	outputs=output,
	)

	demo.queue().launch()