Spaces:
Runtime error
Runtime error
christodoulos.constantinides@ibm.com
commited on
Commit
·
e83e5e0
1
Parent(s):
2d95777
udpate
Browse files- src/leaderboard/read_evals.py +1 -1
- src/populate.py +2 -2
src/leaderboard/read_evals.py
CHANGED
|
@@ -109,7 +109,7 @@ class EvalResult:
|
|
| 109 |
|
| 110 |
def to_dict(self, task_class):
|
| 111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 112 |
-
average = sum([v for v in self.results.values() if v is not None]) / len(
|
| 113 |
data_dict = {
|
| 114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 115 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
|
| 109 |
|
| 110 |
def to_dict(self, task_class):
|
| 111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
| 112 |
+
average = sum([v for v in self.results.values() if v is not None]) / len(task_class)
|
| 113 |
data_dict = {
|
| 114 |
"eval_name": self.eval_name, # not a column, just a save name,
|
| 115 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
src/populate.py
CHANGED
|
@@ -4,7 +4,7 @@ import os
|
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
| 7 |
-
from src.display.utils import
|
| 8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
| 9 |
|
| 10 |
|
|
@@ -17,7 +17,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 17 |
|
| 18 |
df = pd.DataFrame.from_records(all_data_json)
|
| 19 |
print(df)
|
| 20 |
-
df = df.sort_values(by=[
|
| 21 |
df = df[cols].round(decimals=2)
|
| 22 |
|
| 23 |
# filter out if any of the benchmarks have not been produced
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
| 7 |
+
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
| 8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
| 9 |
|
| 10 |
|
|
|
|
| 17 |
|
| 18 |
df = pd.DataFrame.from_records(all_data_json)
|
| 19 |
print(df)
|
| 20 |
+
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 21 |
df = df[cols].round(decimals=2)
|
| 22 |
|
| 23 |
# filter out if any of the benchmarks have not been produced
|