Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import json | |
from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS, LEADERBOARD_CSS | |
from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub | |
from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message | |
from datetime import datetime, timezone | |
LAST_UPDATED = "Nov 22th 2024" | |
column_names = { | |
"MODEL": "Model", | |
"Avg. WER": "Average WER β¬οΈ", | |
"RTFx": "RTFx β¬οΈοΈ", | |
"AMI WER": "AMI", | |
"Earnings22 WER": "Earnings22", | |
"Gigaspeech WER": "Gigaspeech", | |
"LS Clean WER": "LS Clean", | |
"LS Other WER": "LS Other", | |
"SPGISpeech WER": "SPGISpeech", | |
"Tedlium WER": "Tedlium", | |
"Voxpopuli WER": "Voxpopuli", | |
} | |
whisper_column_names = { | |
"MODEL": "Model", | |
"Avg. WER": "Average WER β¬οΈ", | |
"RTFx": "RTFx β¬οΈοΈ", | |
"Backend": "Backend", | |
"Hardware": "Device", | |
"AMI WER": "AMI", | |
"Earnings22 WER": "Earnings22", | |
"Gigaspeech WER": "Gigaspeech", | |
"LS Clean WER": "LS Clean", | |
"LS Other WER": "LS Other", | |
"SPGISpeech WER": "SPGISpeech", | |
"Tedlium WER": "Tedlium", | |
"Voxpopuli WER": "Voxpopuli", | |
} | |
eval_queue_repo, requested_models, csv_results, whisper_eval_queue_repo, whisper_csv_results = load_all_info_from_dataset_hub() | |
if not csv_results.exists(): | |
raise Exception(f"CSV file {csv_results} does not exist locally") | |
if not whisper_csv_results.exists(): | |
raise Exception(f"CSV file {whisper_csv_results} does not exist locally") | |
# Get csv with data and parse columns | |
original_df = pd.read_csv(csv_results) | |
whisper_df = pd.read_csv(whisper_csv_results) | |
# Formats the columns | |
def formatter(x): | |
if type(x) is str: | |
x = x | |
else: | |
x = round(x, 2) | |
return x | |
for col in original_df.columns: | |
if col == "model": | |
original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x))) | |
else: | |
original_df[col] = original_df[col].apply(formatter) # For numerical values | |
whisper_df[col] = whisper_df[col].apply(formatter) # For numerical values | |
original_df.rename(columns=column_names, inplace=True) | |
original_df.sort_values(by='Average WER β¬οΈ', inplace=True) | |
whisper_df.rename(columns=whisper_column_names, inplace=True) | |
whisper_df.sort_values(by='Average WER β¬οΈ', inplace=True) | |
COLS = [c.name for c in fields(AutoEvalColumn)] | |
TYPES = [c.type for c in fields(AutoEvalColumn)] | |
def request_model(model_text, chbcoco2017): | |
# Determine the selected checkboxes | |
dataset_selection = [] | |
if chbcoco2017: | |
dataset_selection.append("ESB Datasets tests only") | |
if len(dataset_selection) == 0: | |
return styled_error("You need to select at least one dataset") | |
base_model_on_hub, error_msg = is_model_on_hub(model_text) | |
if not base_model_on_hub: | |
return styled_error(f"Base model '{model_text}' {error_msg}") | |
# Construct the output dictionary | |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | |
required_datasets = ', '.join(dataset_selection) | |
eval_entry = { | |
"date": current_time, | |
"model": model_text, | |
"datasets_selected": required_datasets | |
} | |
# Prepare file path | |
DIR_OUTPUT_REQUESTS.mkdir(parents=True, exist_ok=True) | |
fn_datasets = '@ '.join(dataset_selection) | |
filename = model_text.replace("/","@") + "@@" + fn_datasets | |
if filename in requested_models: | |
return styled_error(f"A request for this model '{model_text}' and dataset(s) was already made.") | |
try: | |
filename_ext = filename + ".txt" | |
out_filepath = DIR_OUTPUT_REQUESTS / filename_ext | |
# Write the results to a text file | |
with open(out_filepath, "w") as f: | |
f.write(json.dumps(eval_entry)) | |
upload_file(filename, out_filepath) | |
# Include file in the list of uploaded files | |
requested_models.append(filename) | |
# Remove the local file | |
out_filepath.unlink() | |
return styled_message("π€ Your request has been submitted and will be evaluated soon!</p>") | |
except Exception as e: | |
return styled_error(f"Error submitting request!") | |
with gr.Blocks(css=LEADERBOARD_CSS) as demo: | |
gr.HTML(BANNER, elem_id="banner") | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.TabItem("π Leaderboard", elem_id="od-benchmark-tab-table", id=0): | |
leaderboard_table = gr.components.Dataframe( | |
value=original_df, | |
datatype=TYPES, | |
elem_id="leaderboard-table", | |
interactive=False, | |
visible=True, | |
) | |
with gr.TabItem("π Whisper Model Leaderboard", elem_id="whisper-backends-tab", id=1): | |
gr.Markdown("## Whisper Model Performance Across Different Backends", elem_classes="markdown-text") | |
gr.Markdown("This table shows how different Whisper model implementations compare in terms of performance and speed.", elem_classes="markdown-text") | |
with gr.Row(): | |
backend_filter = gr.Dropdown( | |
choices=["All"] + sorted(whisper_df["Backend"].unique().tolist()), | |
value="All", | |
label="Filter by Backend", | |
elem_id="backend-filter", | |
multiselect=True # Enable multiple selection | |
) | |
device_choices = ["All"] + sorted(whisper_df["Device"].unique().tolist()) if "Device" in whisper_df.columns else ["All"] | |
device_filter = gr.Dropdown( | |
choices=device_choices, | |
value="All", | |
label="Filter by Device", | |
elem_id="device-filter", | |
multiselect=True # Enable multiple selection | |
) | |
whisper_table = gr.components.Dataframe( | |
value=whisper_df, | |
datatype=TYPES, | |
elem_id="whisper-table", | |
interactive=False, | |
visible=True, | |
) | |
def filter_whisper_table(backends, devices): | |
filtered_df = whisper_df.copy() | |
# Handle backend filtering | |
if backends and "All" not in backends: | |
filtered_df = filtered_df[filtered_df["Backend"].isin(backends)] | |
# Handle device filtering | |
if devices and "All" not in devices and "Device" in filtered_df.columns: | |
filtered_df = filtered_df[filtered_df["Device"].isin(devices)] | |
return filtered_df | |
backend_filter.change( | |
filter_whisper_table, | |
inputs=[backend_filter, device_filter], | |
outputs=whisper_table | |
) | |
device_filter.change( | |
filter_whisper_table, | |
inputs=[backend_filter, device_filter], | |
outputs=whisper_table | |
) | |
with gr.TabItem("π Metrics", elem_id="od-benchmark-tab-table", id=2): | |
gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text") | |
with gr.TabItem("βοΈβ¨ Request a model here!", elem_id="od-benchmark-tab-table", id=3): | |
with gr.Column(): | |
gr.Markdown("# βοΈβ¨ Request results for a new model here!", elem_classes="markdown-text") | |
with gr.Column(): | |
gr.Markdown("Select a dataset:", elem_classes="markdown-text") | |
with gr.Column(): | |
model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)") | |
chb_coco2017 = gr.Checkbox(label="COCO validation 2017 dataset", visible=False, value=True, interactive=False) | |
with gr.Column(): | |
mdw_submission_result = gr.Markdown() | |
btn_submitt = gr.Button(value="π Request") | |
btn_submitt.click(request_model, | |
[model_name_textbox, chb_coco2017], | |
mdw_submission_result) | |
gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text") | |
with gr.Row(): | |
with gr.Accordion("π Citation", open=False): | |
gr.Textbox( | |
value=CITATION_TEXT, lines=7, | |
label="Copy the BibTeX snippet to cite this source", | |
elem_id="citation-button", | |
show_copy_button=True, | |
) | |
demo.launch(ssr_mode=False) | |