Spaces:

Sneha7
/

phi2-helpfulness-grpo-demo

Runtime error

App Files Files Community

Sneha7 commited on 6 days ago

Commit

19afcd9

verified ·

1 Parent(s): 5981a94

Update policy.py

Browse files

Files changed (1) hide show

policy.py +25 -26

policy.py CHANGED Viewed

@@ -13,48 +13,47 @@ def load_policy_model():
         torch_dtype=torch.float16
     )
-    # -----------------------------------------
-    # 1. UNTIE LM HEAD FROM EMBEDDINGS
-    # -----------------------------------------
-    # Phi-2 ties lm_head.weight = embed_tokens.weight
-    # We replace lm_head with a *separate* nn.Linear so gradients do NOT flow to embeddings.
-    old_lm_head = model.lm_head
-    vocab_size, hidden_size = old_lm_head.weight.shape
     print(">>> UNTIEING LM HEAD...")
     new_lm_head = torch.nn.Linear(hidden_size, vocab_size, bias=True)
     new_lm_head.weight.data = old_lm_head.weight.data.clone()
     if old_lm_head.bias is not None:
         new_lm_head.bias.data = old_lm_head.bias.data.clone()
     model.lm_head = new_lm_head.to(model.device)
-    # -----------------------------------------
-    # 2. FREEZE EVERYTHING
-    # -----------------------------------------
     for name, param in model.named_parameters():
         param.requires_grad = False
-    # -----------------------------------------
-    # 3. UNFREEZE ONLY THE UNTIED LM HEAD
-    # -----------------------------------------
     for name, param in model.named_parameters():
-        if "lm_head" in name:
             param.requires_grad = True
             print("TRAINABLE:", name)
-    # -----------------------------------------
-    # 4. VERIFY FINAL PARAM COUNT
-    # -----------------------------------------
-    trainable_params = [p for p in model.parameters() if p.requires_grad]
-    total = sum(p.numel() for p in trainable_params)
     print(">>> FINAL TRAINABLE PARAM COUNT:", total)
-    # -----------------------------------------
-    # 5. OPTIMIZER
-    # -----------------------------------------
-    optimizer = torch.optim.Adam(trainable_params, lr=1e-4)
-    model.optimizer = optimizer
-    print(">>> POLICY MODEL READY.")
     return model, tokenizer

         torch_dtype=torch.float16
     )
+    # -----------------------------------------------------------
+    # 1. Identify the REAL lm_head and embedding weights
+    # -----------------------------------------------------------
+    embed = model.model.embed_tokens
+    old_lm_head = model.lm_head  # This is actually tied to embed
     print(">>> UNTIEING LM HEAD...")
+    # -----------------------------------------------------------
+    # 2. Create a new untied lm_head
+    # -----------------------------------------------------------
+    vocab_size, hidden_size = old_lm_head.weight.shape
     new_lm_head = torch.nn.Linear(hidden_size, vocab_size, bias=True)
     new_lm_head.weight.data = old_lm_head.weight.data.clone()
     if old_lm_head.bias is not None:
         new_lm_head.bias.data = old_lm_head.bias.data.clone()
+    # Replace tied head with untied one
     model.lm_head = new_lm_head.to(model.device)
+    # -----------------------------------------------------------
+    # 3. Freeze EVERYTHING
+    # -----------------------------------------------------------
     for name, param in model.named_parameters():
         param.requires_grad = False
+    # -----------------------------------------------------------
+    # 4. Unfreeze ONLY the new lm_head
+    # -----------------------------------------------------------
     for name, param in model.named_parameters():
+        if name.startswith("lm_head"):
             param.requires_grad = True
             print("TRAINABLE:", name)
+    # -----------------------------------------------------------
+    # 5. Count trainable params
+    # -----------------------------------------------------------
+    trainable = [p for p in model.parameters() if p.requires_grad]
+    total = sum(p.numel() for p in trainable)
     print(">>> FINAL TRAINABLE PARAM COUNT:", total)
+    model.optimizer = torch.optim.Adam(trainable, lr=1e-4)
     return model, tokenizer