Spaces:

enoch10jason
/

grammar-correction-api

Build error

App Files Files Community

Enoch Jason J commited on Oct 7

Commit

6401a84

1 Parent(s): 053c667

Deploy final app and ignore local artifacts

Browse files

Files changed (5) hide show

.gitignore +2 -0
Dockerfile +8 -4
app.py +11 -14
download_models.py +9 -4
upload_lora.py +39 -0

.gitignore CHANGED Viewed

@@ -67,3 +67,5 @@ dist/
 *~
 *.doc

 *~
 *.doc
+gemma-grammar-lora/

Dockerfile CHANGED Viewed

@@ -10,19 +10,23 @@ COPY download_models.py .
 RUN pip install --no-cache-dir -r requirements_local.txt
 # --- Pre-download and Cache Models ---
-# FIX: The RUN command is now updated to securely access the HF_TOKEN secret.
-# 1. --mount=type=secret... makes the secret available at a temporary path.
-# 2. The environment variable is set by reading from that secret path.
 RUN --mount=type=cache,target=/root/.cache/huggingface \
     --mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
     HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) python download_models.py
-# Copy the main application code
 COPY app.py .
 # Expose the port the app runs on
 EXPOSE 8000
 # Command to run the application
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

 RUN pip install --no-cache-dir -r requirements_local.txt
 # --- Pre-download and Cache Models ---
+# The RUN command securely accesses the HF_TOKEN secret.
 RUN --mount=type=cache,target=/root/.cache/huggingface \
     --mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
     HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) python download_models.py
+# --- Copy Application Files ---
+# FIX: Standardized to use 'main.py'
 COPY app.py .
+# FIX: Copy the local LoRA adapter from the build context.
+# This assumes you've moved 'gemma-grammar-lora' into your project folder.
+COPY ./gemma-grammar-lora /app/gemma-grammar-lora
 # Expose the port the app runs on
 EXPOSE 8000
 # Command to run the application
+# FIX: Standardized to use 'app:app'
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

app.py CHANGED Viewed

@@ -8,10 +8,11 @@ import os
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
-# --- Model Paths (will be loaded from local cache) ---
 GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
 BASE_MODEL_PATH = "unsloth/gemma-2b-it"
-LORA_ADAPTER_PATH = "unsloth/gemma-2b-it-lora-test"
 # --- Global variables for models ---
 grammar_model = None
@@ -22,26 +23,23 @@ device = "cpu"
 print("--- Starting Model Loading ---")
-# The token is only used during the build, not at runtime.
-hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
 try:
-    # Load models from the local cache inside the container. Startup is now fast.
     print(f"Loading gender model from cache: {GENDER_MODEL_PATH}")
-    gender_tokenizer = AutoTokenizer.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
-    gender_model = AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH, token=hf_token).to(device)
     print("✅ Gender verifier model loaded successfully!")
     print(f"Loading base model for grammar correction from cache: {BASE_MODEL_PATH}")
     base_model = AutoModelForCausalLM.from_pretrained(
         BASE_MODEL_PATH,
-        token=hf_token,
         dtype=torch.float32,
     ).to(device)
-    grammar_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, token=hf_token)
     print(f"Applying LoRA adapter from cache: {LORA_ADAPTER_PATH}")
-    grammar_model = PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token).to(device)
     print("✅ Grammar correction model loaded successfully!")
     if grammar_tokenizer.pad_token is None:
@@ -67,7 +65,7 @@ class CorrectionResponse(BaseModel):
     original_text: str
     corrected_text: str
-# --- Helper Functions for Text Cleaning ---
 def clean_grammar_response(text: str) -> str:
     if "Response:" in text:
         parts = text.split("Response:")
@@ -90,8 +88,7 @@ def correct_gender_rules(text: str) -> str:
         text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
     return text
-# --- API Endpoints ---
 @app.post("/correct_grammar", response_model=CorrectionResponse)
 async def handle_grammar_correction(request: CorrectionRequest):
     if not grammar_model or not grammar_tokenizer:

 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
+# --- Model Paths ---
 GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
 BASE_MODEL_PATH = "unsloth/gemma-2b-it"
+# FIX: This now correctly points to your model on the Hugging Face Hub.
+LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
 # --- Global variables for models ---
 grammar_model = None
 print("--- Starting Model Loading ---")
 try:
+    # Models are loaded from the pre-downloaded cache inside the image.
+    # No token is needed at runtime.
     print(f"Loading gender model from cache: {GENDER_MODEL_PATH}")
+    gender_tokenizer = AutoTokenizer.from_pretrained(GENDER_MODEL_PATH)
+    gender_model = AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH).to(device)
     print("✅ Gender verifier model loaded successfully!")
     print(f"Loading base model for grammar correction from cache: {BASE_MODEL_PATH}")
     base_model = AutoModelForCausalLM.from_pretrained(
         BASE_MODEL_PATH,
         dtype=torch.float32,
     ).to(device)
+    grammar_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH)
     print(f"Applying LoRA adapter from cache: {LORA_ADAPTER_PATH}")
+    grammar_model = PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH).to(device)
     print("✅ Grammar correction model loaded successfully!")
     if grammar_tokenizer.pad_token is None:
     original_text: str
     corrected_text: str
+# --- Helper Functions (No changes needed) ---
 def clean_grammar_response(text: str) -> str:
     if "Response:" in text:
         parts = text.split("Response:")
         text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
     return text
+# --- API Endpoints (No changes needed) ---
 @app.post("/correct_grammar", response_model=CorrectionResponse)
 async def handle_grammar_correction(request: CorrectionRequest):
     if not grammar_model or not grammar_tokenizer:

download_models.py CHANGED Viewed

@@ -7,7 +7,8 @@ from peft import PeftModel
 GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
 BASE_MODEL_PATH = "unsloth/gemma-2b-it"
-LORA_ADAPTER_PATH = "unsloth/gemma-2b-it-lora-test"
 hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
 if not hf_token:
@@ -21,8 +22,9 @@ AutoTokenizer.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
 AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
 print("✅ Gender model downloaded.")
-# 2. Download Grammar Model (Base + Adapter)
-print(f"Downloading: {BASE_MODEL_PATH}")
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL_PATH,
     token=hf_token,
@@ -31,9 +33,12 @@ base_model = AutoModelForCausalLM.from_pretrained(
 AutoTokenizer.from_pretrained(BASE_MODEL_PATH, token=hf_token)
 print("✅ Base model downloaded.")
-print(f"Downloading: {LORA_ADAPTER_PATH}")
 PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
 print("✅ LoRA adapter downloaded.")
 print("--- Model Pre-downloading Complete ---")

 GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
 BASE_MODEL_PATH = "unsloth/gemma-2b-it"
+# This now points to your fine-tuned model on the Hugging Face Hub.
+LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
 hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
 if not hf_token:
 AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
 print("✅ Gender model downloaded.")
+# 2. Download Grammar Base Model
+print(f"Downloading base model: {BASE_MODEL_PATH}")
+# We need to load the base model into memory to attach the adapter to it for caching.
 base_model = AutoModelForCausalLM.from_pretrained(
     BASE_MODEL_PATH,
     token=hf_token,
 AutoTokenizer.from_pretrained(BASE_MODEL_PATH, token=hf_token)
 print("✅ Base model downloaded.")
+# 3. Download Your Fine-Tuned LoRA Adapter
+print(f"Downloading LoRA adapter: {LORA_ADAPTER_PATH}")
+# This step downloads the adapter and links it to the base model, caching it.
 PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
 print("✅ LoRA adapter downloaded.")
 print("--- Model Pre-downloading Complete ---")

upload_lora.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from huggingface_hub import HfApi, create_repo
+import os
+    # --- Configuration ---
+    # 1. Set the path to the local folder containing your clean LoRA adapter.
+    #    (Ensure you have removed checkpoints and optimizer files).
+LOCAL_LORA_PATH = "./gemma-grammar-lora"
+    # 2. Define the name for your new model repository on the Hub.
+    #    It's standard to use "YourUsername/YourModelName".
+REPO_ID = "enoch10jason/gemma-grammar-lora"
+    # --- Upload Script ---
+def main():
+        # Ensure the local path exists
+    if not os.path.isdir(LOCAL_LORA_PATH):
+        print(f"❌ Error: Local LoRA path not found at '{LOCAL_LORA_PATH}'")
+        print("Please ensure your clean 'gemma-grammar-lora' folder is inside your project directory.")
+        return
+    api = HfApi()
+        # Create the repository on the Hugging Face Hub (can be private)
+    create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True, private=True)
+    print(f"Uploading files from '{LOCAL_LORA_PATH}' to '{REPO_ID}'...")
+        # Upload the entire folder. This will automatically use Git LFS for large files.
+    api.upload_folder(
+        folder_path=LOCAL_LORA_PATH,
+        repo_id=REPO_ID,
+        repo_type="model",
+    )
+    print(f"✅ LoRA adapter uploaded successfully to: https://huggingface.co/{REPO_ID}")
+if __name__ == "__main__":
+    main()