Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import json
|
| 4 |
-
from constants import
|
| 5 |
from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
|
| 6 |
from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message
|
| 7 |
from datetime import datetime, timezone
|
|
@@ -48,6 +48,26 @@ column_names = {
|
|
| 48 |
"Spanish_male": "Spanish male",
|
| 49 |
"Vietnamese_female": "Vietnamese female",
|
| 50 |
"Vietnamese_male": "Vietnamese male",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
}
|
| 52 |
|
| 53 |
african_cols = ["Ghanain English female", "Kenyan English female", "Kenyan English male", "Nigerian English female", "Nigerian English male"]
|
|
@@ -57,13 +77,15 @@ latin_american_cols = ["Latin American female", "Latin American male"]
|
|
| 57 |
british_cols = ["Irish English female", "Irish English male", "Scottish English male", "Southern British English male"]
|
| 58 |
european_cols = ["Eastern European male", "European male", "French female", "Italian female", "Spanish female", "Spanish male", "Catalan female", "Bulgarian female", "Bulgarian male", "Lithuanian male", "Romanian female"]
|
| 59 |
asian_cols = ["Chinese female", "Chinese male", "Indonesian female", "Vietnamese female", "Vietnamese male", "Indian English female", "Indian English male"]
|
| 60 |
-
|
| 61 |
|
| 62 |
-
if not
|
| 63 |
-
raise Exception(f"CSV file {
|
| 64 |
|
| 65 |
# Get csv with data and parse columns
|
| 66 |
-
original_df = pd.read_csv(
|
|
|
|
|
|
|
| 67 |
|
| 68 |
# Formats the columns
|
| 69 |
def formatter(x):
|
|
@@ -79,8 +101,17 @@ for col in original_df.columns:
|
|
| 79 |
else:
|
| 80 |
original_df[col] = original_df[col].apply(formatter) # For numerical values
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
original_df.rename(columns=column_names, inplace=True)
|
| 83 |
original_df.sort_values(by='Average WER ⬇️', inplace=True)
|
|
|
|
|
|
|
|
|
|
| 84 |
female_cols = [col for col in original_df.columns if 'female' == col.split(' ')[-1]]
|
| 85 |
male_cols = [col for col in original_df.columns if 'male' == col.split(' ')[-1]]
|
| 86 |
|
|
@@ -150,13 +181,14 @@ TYPES = [c.type for c in fields(AutoEvalColumn)]
|
|
| 150 |
with gr.Blocks(css=LEADERBOARD_CSS) as demo:
|
| 151 |
# gr.HTML(BANNER, elem_id="banner")
|
| 152 |
# Write a header with the title
|
| 153 |
-
gr.Markdown("<h1
|
| 154 |
|
| 155 |
|
| 156 |
gr.Markdown(EXPLANATION, elem_classes="markdown-text")
|
| 157 |
|
| 158 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 159 |
-
with gr.TabItem("🏅
|
|
|
|
| 160 |
# Add column filter dropdown
|
| 161 |
column_filter = gr.Dropdown(
|
| 162 |
choices=["All", "Female", "Male", "African", "North American", "Caribbean", "Latin American", "British", "European", "Asian"] + [v for k,v in column_names.items() if k != "model"],
|
|
@@ -217,4 +249,37 @@ with gr.Blocks(css=LEADERBOARD_CSS) as demo:
|
|
| 217 |
outputs=[leaderboard_table]
|
| 218 |
)
|
| 219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
demo.launch(ssr_mode=False)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import json
|
| 4 |
+
from constants import LEADERBOARD_CSS, EXPLANATION, EXPLANATION_EDACC, EXPLANATION_AFRI
|
| 5 |
from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
|
| 6 |
from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message
|
| 7 |
from datetime import datetime, timezone
|
|
|
|
| 48 |
"Spanish_male": "Spanish male",
|
| 49 |
"Vietnamese_female": "Vietnamese female",
|
| 50 |
"Vietnamese_male": "Vietnamese male",
|
| 51 |
+
"agatu_test": "Agatu",
|
| 52 |
+
"angas_test": "Angas",
|
| 53 |
+
"bajju_test": "Bajju",
|
| 54 |
+
"bini_test": "Bini",
|
| 55 |
+
"brass_test": "Brass",
|
| 56 |
+
"delta_test": "Delta",
|
| 57 |
+
"eggon_test": "Eggon",
|
| 58 |
+
"ekene_test": "Ekene",
|
| 59 |
+
"ekpeye_test": "Ekpeye",
|
| 60 |
+
"gbagyi_test": "Gbagyi",
|
| 61 |
+
"igarra_test": "Igarra",
|
| 62 |
+
"ijaw-nembe_test": "Ijaw-Nembe",
|
| 63 |
+
"ikulu_test": "Ikulu",
|
| 64 |
+
"jaba_test": "Jaba",
|
| 65 |
+
"jukun_test": "Jukun",
|
| 66 |
+
"khana_test": "Khana",
|
| 67 |
+
"mada_test": "Mada",
|
| 68 |
+
"mwaghavul_test": "Mwaghavul",
|
| 69 |
+
"ukwuani_test": "Ukwuani",
|
| 70 |
+
"yoruba-hausa_test": "Yoruba-Hausa",
|
| 71 |
}
|
| 72 |
|
| 73 |
african_cols = ["Ghanain English female", "Kenyan English female", "Kenyan English male", "Nigerian English female", "Nigerian English male"]
|
|
|
|
| 77 |
british_cols = ["Irish English female", "Irish English male", "Scottish English male", "Southern British English male"]
|
| 78 |
european_cols = ["Eastern European male", "European male", "French female", "Italian female", "Spanish female", "Spanish male", "Catalan female", "Bulgarian female", "Bulgarian male", "Lithuanian male", "Romanian female"]
|
| 79 |
asian_cols = ["Chinese female", "Chinese male", "Indonesian female", "Vietnamese female", "Vietnamese male", "Indian English female", "Indian English male"]
|
| 80 |
+
eval_queue_repo_edacc, requested_models, csv_results_edacc, csv_results_afrispeech = load_all_info_from_dataset_hub()
|
| 81 |
|
| 82 |
+
if not csv_results_edacc.exists():
|
| 83 |
+
raise Exception(f"CSV file {csv_results_edacc} does not exist locally")
|
| 84 |
|
| 85 |
# Get csv with data and parse columns
|
| 86 |
+
original_df = pd.read_csv(csv_results_edacc)
|
| 87 |
+
|
| 88 |
+
afrispeech_df = pd.read_csv(csv_results_afrispeech)
|
| 89 |
|
| 90 |
# Formats the columns
|
| 91 |
def formatter(x):
|
|
|
|
| 101 |
else:
|
| 102 |
original_df[col] = original_df[col].apply(formatter) # For numerical values
|
| 103 |
|
| 104 |
+
for col in afrispeech_df.columns:
|
| 105 |
+
if col == "model":
|
| 106 |
+
afrispeech_df[col] = afrispeech_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
|
| 107 |
+
else:
|
| 108 |
+
afrispeech_df[col] = afrispeech_df[col].apply(formatter) # For numerical values
|
| 109 |
+
|
| 110 |
original_df.rename(columns=column_names, inplace=True)
|
| 111 |
original_df.sort_values(by='Average WER ⬇️', inplace=True)
|
| 112 |
+
afrispeech_df.rename(columns=column_names, inplace=True)
|
| 113 |
+
afrispeech_df.sort_values(by='Average WER ⬇️', inplace=True)
|
| 114 |
+
|
| 115 |
female_cols = [col for col in original_df.columns if 'female' == col.split(' ')[-1]]
|
| 116 |
male_cols = [col for col in original_df.columns if 'male' == col.split(' ')[-1]]
|
| 117 |
|
|
|
|
| 181 |
with gr.Blocks(css=LEADERBOARD_CSS) as demo:
|
| 182 |
# gr.HTML(BANNER, elem_id="banner")
|
| 183 |
# Write a header with the title
|
| 184 |
+
gr.Markdown("<h1>🤫 How Biased is Whisper?</h1>", elem_classes="markdown-text")
|
| 185 |
|
| 186 |
|
| 187 |
gr.Markdown(EXPLANATION, elem_classes="markdown-text")
|
| 188 |
|
| 189 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 190 |
+
with gr.TabItem("🏅 Edacc Results", elem_id="od-benchmark-tab-table", id=0):
|
| 191 |
+
gr.Markdown(EXPLANATION_EDACC, elem_classes="markdown-text")
|
| 192 |
# Add column filter dropdown
|
| 193 |
column_filter = gr.Dropdown(
|
| 194 |
choices=["All", "Female", "Male", "African", "North American", "Caribbean", "Latin American", "British", "European", "Asian"] + [v for k,v in column_names.items() if k != "model"],
|
|
|
|
| 249 |
outputs=[leaderboard_table]
|
| 250 |
)
|
| 251 |
|
| 252 |
+
with gr.TabItem("🏅 Afrispeech Results", elem_id="od-benchmark-tab-table", id=1):
|
| 253 |
+
gr.Markdown(EXPLANATION_AFRI, elem_classes="markdown-text")
|
| 254 |
+
# Add column filter dropdown
|
| 255 |
+
afrispeech_column_filter = gr.Dropdown(
|
| 256 |
+
choices=["All"] + [v for k,v in column_names.items() if k != "model" and v in afrispeech_df.columns],
|
| 257 |
+
label="Filter by column",
|
| 258 |
+
multiselect=True,
|
| 259 |
+
value=["All"],
|
| 260 |
+
elem_id="afrispeech-column-filter"
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
leaderboard_table = gr.components.Dataframe(
|
| 264 |
+
value=afrispeech_df,
|
| 265 |
+
datatype=TYPES,
|
| 266 |
+
elem_id="leaderboard-table",
|
| 267 |
+
interactive=False,
|
| 268 |
+
visible=True,
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
# Update table columns when filter changes
|
| 272 |
+
def update_afrispeech_table(cols):
|
| 273 |
+
if "All" in cols:
|
| 274 |
+
return gr.Dataframe(value=afrispeech_df)
|
| 275 |
+
|
| 276 |
+
selected_cols = ["Model"] + cols # Always include the Model column
|
| 277 |
+
return gr.Dataframe(value=afrispeech_df[selected_cols])
|
| 278 |
+
|
| 279 |
+
afrispeech_column_filter.change(
|
| 280 |
+
fn=update_afrispeech_table,
|
| 281 |
+
inputs=[afrispeech_column_filter],
|
| 282 |
+
outputs=[leaderboard_table]
|
| 283 |
+
)
|
| 284 |
+
|
| 285 |
demo.launch(ssr_mode=False)
|