Spaces:

Sneha7
/

phi2-helpfulness-grpo-demo

Runtime error

Sneha7 commited on 6 days ago

Commit

bb39a36

verified ·

1 Parent(s): 8dfd928

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,29 +4,34 @@ from reward_fn import reward_fn
 from grpo_train import grpo_step
 import matplotlib.pyplot as plt
 model, tokenizer = load_policy_model()
 reward_history = []
 def plot_rewards(history):
     fig = plt.figure()
-    plt.plot(history, marker='o')
     plt.title("Reward History")
     plt.xlabel("Step")
     plt.ylabel("Reward")
     return fig
 def run_step(prompt):
     result = grpo_step(model, tokenizer, prompt, reward_fn)
     reward_history.append(float(result["reward"]))
     reward_plot = plot_rewards(reward_history)
     return result["text"], result["reward"], result["kl"], result["loss"], reward_plot
 with gr.Blocks() as demo:
     gr.Markdown("# 🤝 GRPO with Phi-2 — Helpfulness Reward Demo")
-    prompt = gr.Textbox(label="Prompt", placeholder="Ask something the model should answer helpfully...")
     run_btn = gr.Button("Run GRPO Step")
     output = gr.Textbox(label="Model Output")
@@ -36,20 +41,10 @@ with gr.Blocks() as demo:
     plot = gr.Plot(label="Reward Over Time")
-    def update_plot(xs, ys):
-        import matplotlib.pyplot as plt
-        fig, ax = plt.subplots()
-        ax.plot(xs, ys)
-        ax.set_title("Reward Trend")
-        ax.set_xlabel("Step")
-        ax.set_ylabel("Reward")
-        return fig
     run_btn.click(
         fn=run_step,
         inputs=[prompt],
         outputs=[output, reward_box, kl_box, loss_box, plot],
-        postprocess=update_plot
     )
 demo.launch()

 from grpo_train import grpo_step
 import matplotlib.pyplot as plt
 model, tokenizer = load_policy_model()
 reward_history = []
 def plot_rewards(history):
     fig = plt.figure()
+    plt.plot(history, marker="o")
     plt.title("Reward History")
     plt.xlabel("Step")
     plt.ylabel("Reward")
     return fig
 def run_step(prompt):
     result = grpo_step(model, tokenizer, prompt, reward_fn)
     reward_history.append(float(result["reward"]))
     reward_plot = plot_rewards(reward_history)
     return result["text"], result["reward"], result["kl"], result["loss"], reward_plot
 with gr.Blocks() as demo:
     gr.Markdown("# 🤝 GRPO with Phi-2 — Helpfulness Reward Demo")
+    prompt = gr.Textbox(
+        label="Prompt",
+        placeholder="Ask something the model should answer helpfully...",
+    )
     run_btn = gr.Button("Run GRPO Step")
     output = gr.Textbox(label="Model Output")
     plot = gr.Plot(label="Reward Over Time")
     run_btn.click(
         fn=run_step,
         inputs=[prompt],
         outputs=[output, reward_box, kl_box, loss_box, plot],
     )
 demo.launch()