Enoch Jason J commited on
Commit
6401a84
Β·
1 Parent(s): 053c667

Deploy final app and ignore local artifacts

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. Dockerfile +8 -4
  3. app.py +11 -14
  4. download_models.py +9 -4
  5. upload_lora.py +39 -0
.gitignore CHANGED
@@ -67,3 +67,5 @@ dist/
67
  *~
68
 
69
  *.doc
 
 
 
67
  *~
68
 
69
  *.doc
70
+
71
+ gemma-grammar-lora/
Dockerfile CHANGED
@@ -10,19 +10,23 @@ COPY download_models.py .
10
  RUN pip install --no-cache-dir -r requirements_local.txt
11
 
12
  # --- Pre-download and Cache Models ---
13
- # FIX: The RUN command is now updated to securely access the HF_TOKEN secret.
14
- # 1. --mount=type=secret... makes the secret available at a temporary path.
15
- # 2. The environment variable is set by reading from that secret path.
16
  RUN --mount=type=cache,target=/root/.cache/huggingface \
17
  --mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
18
  HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) python download_models.py
19
 
20
- # Copy the main application code
 
21
  COPY app.py .
22
 
 
 
 
 
23
  # Expose the port the app runs on
24
  EXPOSE 8000
25
 
26
  # Command to run the application
 
27
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
28
 
 
10
  RUN pip install --no-cache-dir -r requirements_local.txt
11
 
12
  # --- Pre-download and Cache Models ---
13
+ # The RUN command securely accesses the HF_TOKEN secret.
 
 
14
  RUN --mount=type=cache,target=/root/.cache/huggingface \
15
  --mount=type=secret,id=HUGGING_FACE_HUB_TOKEN \
16
  HUGGING_FACE_HUB_TOKEN=$(cat /run/secrets/HUGGING_FACE_HUB_TOKEN) python download_models.py
17
 
18
+ # --- Copy Application Files ---
19
+ # FIX: Standardized to use 'main.py'
20
  COPY app.py .
21
 
22
+ # FIX: Copy the local LoRA adapter from the build context.
23
+ # This assumes you've moved 'gemma-grammar-lora' into your project folder.
24
+ COPY ./gemma-grammar-lora /app/gemma-grammar-lora
25
+
26
  # Expose the port the app runs on
27
  EXPOSE 8000
28
 
29
  # Command to run the application
30
+ # FIX: Standardized to use 'app:app'
31
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
32
 
app.py CHANGED
@@ -8,10 +8,11 @@ import os
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
  from peft import PeftModel
10
 
11
- # --- Model Paths (will be loaded from local cache) ---
12
  GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
13
  BASE_MODEL_PATH = "unsloth/gemma-2b-it"
14
- LORA_ADAPTER_PATH = "unsloth/gemma-2b-it-lora-test"
 
15
 
16
  # --- Global variables for models ---
17
  grammar_model = None
@@ -22,26 +23,23 @@ device = "cpu"
22
 
23
  print("--- Starting Model Loading ---")
24
 
25
- # The token is only used during the build, not at runtime.
26
- hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
27
-
28
  try:
29
- # Load models from the local cache inside the container. Startup is now fast.
 
30
  print(f"Loading gender model from cache: {GENDER_MODEL_PATH}")
31
- gender_tokenizer = AutoTokenizer.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
32
- gender_model = AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH, token=hf_token).to(device)
33
  print("βœ… Gender verifier model loaded successfully!")
34
 
35
  print(f"Loading base model for grammar correction from cache: {BASE_MODEL_PATH}")
36
  base_model = AutoModelForCausalLM.from_pretrained(
37
  BASE_MODEL_PATH,
38
- token=hf_token,
39
  dtype=torch.float32,
40
  ).to(device)
41
- grammar_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, token=hf_token)
42
 
43
  print(f"Applying LoRA adapter from cache: {LORA_ADAPTER_PATH}")
44
- grammar_model = PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token).to(device)
45
  print("βœ… Grammar correction model loaded successfully!")
46
 
47
  if grammar_tokenizer.pad_token is None:
@@ -67,7 +65,7 @@ class CorrectionResponse(BaseModel):
67
  original_text: str
68
  corrected_text: str
69
 
70
- # --- Helper Functions for Text Cleaning ---
71
  def clean_grammar_response(text: str) -> str:
72
  if "Response:" in text:
73
  parts = text.split("Response:")
@@ -90,8 +88,7 @@ def correct_gender_rules(text: str) -> str:
90
  text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
91
  return text
92
 
93
- # --- API Endpoints ---
94
-
95
  @app.post("/correct_grammar", response_model=CorrectionResponse)
96
  async def handle_grammar_correction(request: CorrectionRequest):
97
  if not grammar_model or not grammar_tokenizer:
 
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
  from peft import PeftModel
10
 
11
+ # --- Model Paths ---
12
  GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
13
  BASE_MODEL_PATH = "unsloth/gemma-2b-it"
14
+ # FIX: This now correctly points to your model on the Hugging Face Hub.
15
+ LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
16
 
17
  # --- Global variables for models ---
18
  grammar_model = None
 
23
 
24
  print("--- Starting Model Loading ---")
25
 
 
 
 
26
  try:
27
+ # Models are loaded from the pre-downloaded cache inside the image.
28
+ # No token is needed at runtime.
29
  print(f"Loading gender model from cache: {GENDER_MODEL_PATH}")
30
+ gender_tokenizer = AutoTokenizer.from_pretrained(GENDER_MODEL_PATH)
31
+ gender_model = AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH).to(device)
32
  print("βœ… Gender verifier model loaded successfully!")
33
 
34
  print(f"Loading base model for grammar correction from cache: {BASE_MODEL_PATH}")
35
  base_model = AutoModelForCausalLM.from_pretrained(
36
  BASE_MODEL_PATH,
 
37
  dtype=torch.float32,
38
  ).to(device)
39
+ grammar_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH)
40
 
41
  print(f"Applying LoRA adapter from cache: {LORA_ADAPTER_PATH}")
42
+ grammar_model = PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH).to(device)
43
  print("βœ… Grammar correction model loaded successfully!")
44
 
45
  if grammar_tokenizer.pad_token is None:
 
65
  original_text: str
66
  corrected_text: str
67
 
68
+ # --- Helper Functions (No changes needed) ---
69
  def clean_grammar_response(text: str) -> str:
70
  if "Response:" in text:
71
  parts = text.split("Response:")
 
88
  text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
89
  return text
90
 
91
+ # --- API Endpoints (No changes needed) ---
 
92
  @app.post("/correct_grammar", response_model=CorrectionResponse)
93
  async def handle_grammar_correction(request: CorrectionRequest):
94
  if not grammar_model or not grammar_tokenizer:
download_models.py CHANGED
@@ -7,7 +7,8 @@ from peft import PeftModel
7
 
8
  GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
9
  BASE_MODEL_PATH = "unsloth/gemma-2b-it"
10
- LORA_ADAPTER_PATH = "unsloth/gemma-2b-it-lora-test"
 
11
 
12
  hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
13
  if not hf_token:
@@ -21,8 +22,9 @@ AutoTokenizer.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
21
  AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
22
  print("βœ… Gender model downloaded.")
23
 
24
- # 2. Download Grammar Model (Base + Adapter)
25
- print(f"Downloading: {BASE_MODEL_PATH}")
 
26
  base_model = AutoModelForCausalLM.from_pretrained(
27
  BASE_MODEL_PATH,
28
  token=hf_token,
@@ -31,9 +33,12 @@ base_model = AutoModelForCausalLM.from_pretrained(
31
  AutoTokenizer.from_pretrained(BASE_MODEL_PATH, token=hf_token)
32
  print("βœ… Base model downloaded.")
33
 
34
- print(f"Downloading: {LORA_ADAPTER_PATH}")
 
 
35
  PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
36
  print("βœ… LoRA adapter downloaded.")
37
 
 
38
  print("--- Model Pre-downloading Complete ---")
39
 
 
7
 
8
  GENDER_MODEL_PATH = "google/gemma-3-270m-qat-q4_0-unquantized"
9
  BASE_MODEL_PATH = "unsloth/gemma-2b-it"
10
+ # This now points to your fine-tuned model on the Hugging Face Hub.
11
+ LORA_ADAPTER_PATH = "enoch10jason/gemma-grammar-lora"
12
 
13
  hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
14
  if not hf_token:
 
22
  AutoModelForCausalLM.from_pretrained(GENDER_MODEL_PATH, token=hf_token)
23
  print("βœ… Gender model downloaded.")
24
 
25
+ # 2. Download Grammar Base Model
26
+ print(f"Downloading base model: {BASE_MODEL_PATH}")
27
+ # We need to load the base model into memory to attach the adapter to it for caching.
28
  base_model = AutoModelForCausalLM.from_pretrained(
29
  BASE_MODEL_PATH,
30
  token=hf_token,
 
33
  AutoTokenizer.from_pretrained(BASE_MODEL_PATH, token=hf_token)
34
  print("βœ… Base model downloaded.")
35
 
36
+ # 3. Download Your Fine-Tuned LoRA Adapter
37
+ print(f"Downloading LoRA adapter: {LORA_ADAPTER_PATH}")
38
+ # This step downloads the adapter and links it to the base model, caching it.
39
  PeftModel.from_pretrained(base_model, LORA_ADAPTER_PATH, token=hf_token)
40
  print("βœ… LoRA adapter downloaded.")
41
 
42
+
43
  print("--- Model Pre-downloading Complete ---")
44
 
upload_lora.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi, create_repo
2
+ import os
3
+
4
+ # --- Configuration ---
5
+ # 1. Set the path to the local folder containing your clean LoRA adapter.
6
+ # (Ensure you have removed checkpoints and optimizer files).
7
+ LOCAL_LORA_PATH = "./gemma-grammar-lora"
8
+
9
+ # 2. Define the name for your new model repository on the Hub.
10
+ # It's standard to use "YourUsername/YourModelName".
11
+ REPO_ID = "enoch10jason/gemma-grammar-lora"
12
+
13
+ # --- Upload Script ---
14
+ def main():
15
+ # Ensure the local path exists
16
+ if not os.path.isdir(LOCAL_LORA_PATH):
17
+ print(f"❌ Error: Local LoRA path not found at '{LOCAL_LORA_PATH}'")
18
+ print("Please ensure your clean 'gemma-grammar-lora' folder is inside your project directory.")
19
+ return
20
+
21
+ api = HfApi()
22
+
23
+ # Create the repository on the Hugging Face Hub (can be private)
24
+ create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True, private=True)
25
+
26
+ print(f"Uploading files from '{LOCAL_LORA_PATH}' to '{REPO_ID}'...")
27
+
28
+ # Upload the entire folder. This will automatically use Git LFS for large files.
29
+ api.upload_folder(
30
+ folder_path=LOCAL_LORA_PATH,
31
+ repo_id=REPO_ID,
32
+ repo_type="model",
33
+ )
34
+
35
+ print(f"βœ… LoRA adapter uploaded successfully to: https://huggingface.co/{REPO_ID}")
36
+
37
+ if __name__ == "__main__":
38
+ main()
39
+