Sneha7 commited on
Commit
bdbce23
Β·
verified Β·
1 Parent(s): 9402437

Create reward_fn.py

Browse files
Files changed (1) hide show
  1. reward_fn.py +36 -0
reward_fn.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+
4
+ # Load reward model once
5
+ rm_name = "OpenAssistant/reward-model-deberta-v3-large-v2"
6
+
7
+ rm_tokenizer = AutoTokenizer.from_pretrained(rm_name)
8
+ rm_model = AutoModelForSequenceClassification.from_pretrained(
9
+ rm_name,
10
+ torch_dtype=torch.float32
11
+ ).eval()
12
+
13
+ # For Spaces: keep reward model on CPU to save VRAM
14
+ device = torch.device("cpu")
15
+ rm_model.to(device)
16
+
17
+
18
+ def reward_fn(text: str) -> float:
19
+ """
20
+ Returns a scalar helpfulness reward from a trained reward model.
21
+ Higher = more helpful, clearer, more aligned response.
22
+ """
23
+ inputs = rm_tokenizer(
24
+ text,
25
+ return_tensors="pt",
26
+ truncation=True,
27
+ max_length=512
28
+ ).to(device)
29
+
30
+ with torch.no_grad():
31
+ scores = rm_model(**inputs).logits
32
+
33
+ # Reward = model's score for "helpful" class
34
+ reward = float(scores[0].item())
35
+
36
+ return reward