Spaces:
Running
Running
zhimin-z
commited on
Commit
·
5dda6e0
1
Parent(s):
eb0d8e0
refine
Browse files
app.py
CHANGED
|
@@ -398,7 +398,7 @@ def get_leaderboard_data(vote_entry=None, use_cache=True):
|
|
| 398 |
{
|
| 399 |
"Elo Score": 2,
|
| 400 |
"Conversation Efficiency Index": 2,
|
| 401 |
-
"
|
| 402 |
"Average Win Rate": 2,
|
| 403 |
"Bradley-Terry Coefficient": 2,
|
| 404 |
"Eigenvector Centrality Value": 2,
|
|
@@ -426,7 +426,7 @@ def get_leaderboard_data(vote_entry=None, use_cache=True):
|
|
| 426 |
"Website",
|
| 427 |
"Elo Score",
|
| 428 |
"Conversation Efficiency Index",
|
| 429 |
-
"
|
| 430 |
"Average Win Rate",
|
| 431 |
"Average Failure Rate",
|
| 432 |
"Bradley-Terry Coefficient",
|
|
@@ -569,7 +569,7 @@ def get_leaderboard_data(vote_entry=None, use_cache=True):
|
|
| 569 |
"Website": website_values,
|
| 570 |
"Elo Score": elo_scores.values,
|
| 571 |
"Conversation Efficiency Index": cei_result.values,
|
| 572 |
-
"
|
| 573 |
"Average Win Rate": avr_scores.values,
|
| 574 |
"Bradley-Terry Coefficient": bt_scores.values,
|
| 575 |
"Eigenvector Centrality Value": eigen_scores.values,
|
|
@@ -709,7 +709,7 @@ with gr.Blocks(title="SWE-Model-Arena", theme=gr.themes.Soft()) as app:
|
|
| 709 |
"Website",
|
| 710 |
"Elo Score",
|
| 711 |
"Conversation Efficiency Index",
|
| 712 |
-
"
|
| 713 |
],
|
| 714 |
search_columns=["Model", "Website"],
|
| 715 |
filter_columns=[
|
|
@@ -730,12 +730,12 @@ with gr.Blocks(title="SWE-Model-Arena", theme=gr.themes.Soft()) as app:
|
|
| 730 |
label="Conversation Efficiency Index"
|
| 731 |
),
|
| 732 |
ColumnFilter(
|
| 733 |
-
"
|
| 734 |
min=0.0,
|
| 735 |
max=1.0,
|
| 736 |
default=[0.0, 1.0],
|
| 737 |
type="slider",
|
| 738 |
-
label="
|
| 739 |
),
|
| 740 |
ColumnFilter(
|
| 741 |
"Average Win Rate",
|
|
|
|
| 398 |
{
|
| 399 |
"Elo Score": 2,
|
| 400 |
"Conversation Efficiency Index": 2,
|
| 401 |
+
"Consistency Score": 2,
|
| 402 |
"Average Win Rate": 2,
|
| 403 |
"Bradley-Terry Coefficient": 2,
|
| 404 |
"Eigenvector Centrality Value": 2,
|
|
|
|
| 426 |
"Website",
|
| 427 |
"Elo Score",
|
| 428 |
"Conversation Efficiency Index",
|
| 429 |
+
"Consistency Score",
|
| 430 |
"Average Win Rate",
|
| 431 |
"Average Failure Rate",
|
| 432 |
"Bradley-Terry Coefficient",
|
|
|
|
| 569 |
"Website": website_values,
|
| 570 |
"Elo Score": elo_scores.values,
|
| 571 |
"Conversation Efficiency Index": cei_result.values,
|
| 572 |
+
"Consistency Score": mcs_result.values,
|
| 573 |
"Average Win Rate": avr_scores.values,
|
| 574 |
"Bradley-Terry Coefficient": bt_scores.values,
|
| 575 |
"Eigenvector Centrality Value": eigen_scores.values,
|
|
|
|
| 709 |
"Website",
|
| 710 |
"Elo Score",
|
| 711 |
"Conversation Efficiency Index",
|
| 712 |
+
"Consistency Score",
|
| 713 |
],
|
| 714 |
search_columns=["Model", "Website"],
|
| 715 |
filter_columns=[
|
|
|
|
| 730 |
label="Conversation Efficiency Index"
|
| 731 |
),
|
| 732 |
ColumnFilter(
|
| 733 |
+
"Consistency Score",
|
| 734 |
min=0.0,
|
| 735 |
max=1.0,
|
| 736 |
default=[0.0, 1.0],
|
| 737 |
type="slider",
|
| 738 |
+
label="Consistency Score"
|
| 739 |
),
|
| 740 |
ColumnFilter(
|
| 741 |
"Average Win Rate",
|