Spaces:
Runtime error
Runtime error
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| # Load reward model once | |
| rm_name = "OpenAssistant/reward-model-deberta-v3-large-v2" | |
| rm_tokenizer = AutoTokenizer.from_pretrained(rm_name) | |
| rm_model = AutoModelForSequenceClassification.from_pretrained( | |
| rm_name, | |
| torch_dtype=torch.float32 | |
| ).eval() | |
| # For Spaces: keep reward model on CPU to save VRAM | |
| device = torch.device("cpu") | |
| rm_model.to(device) | |
| def reward_fn(text: str) -> float: | |
| """ | |
| Returns a scalar helpfulness reward from a trained reward model. | |
| Higher = more helpful, clearer, more aligned response. | |
| """ | |
| inputs = rm_tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=512 | |
| ).to(device) | |
| with torch.no_grad(): | |
| scores = rm_model(**inputs).logits | |
| # Reward = model's score for "helpful" class | |
| reward = float(scores[0].item()) | |
| return reward | |