Spaces:

enoch10jason
/

grammar-correction-api

Build error

grammar-correction-api / download_models.py

Enoch Jason J

Finalize Hub-based deployment strategy

1ab6f41 2 months ago

1.62 kB

	import os
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel

	# This script is run during the Docker build process to pre-download models.

	GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
	BASE_MODEL_PATH = "unsloth/gemma-2b-it"
	# This correctly points to your fine-tuned model on the Hugging Face Hub.
	LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"

	hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
	if not hf_token:
	raise ValueError("HUGGING_FACE_HUB_TOKEN environment variable is required to download models.")

	print("--- Starting Model Pre-downloading ---")

	# 1. Download Gender Model
	print(f"Downloading: {GENDER_MODEL_PATH}")
	AutoTokenizer.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
	AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
	print("✅ Gender model downloaded.")

	# 2. Download Grammar Base Model
	print(f"Downloading base model: {BASE_MODEL_PATH}")
	# We need to load the base model into memory to attach the adapter to it for caching.
	base_model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL_PATH,
	token=hf_token,
	dtype=torch.float32,
	)
	AutoTokenizer.from_pretrained(BASE_MODEL_PATH, token=hf_token)
	print("✅ Base model downloaded.")

	# 3. Download Your Fine-Tuned LoRA Adapter
	print(f"Downloading LoRA adapter: {LORA_ADAPTER_PATH}")
	# This step downloads your private adapter and links it to the base model, caching it.
	PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
	print("✅ LoRA adapter downloaded.")


	print("--- Model Pre-downloading Complete ---")