import gradio as gr import pandas as pd from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, INTRODUCTION_TEXT, ABOUT_TEXT, TITLE, ) from src.display.css_html_js import custom_css def load_results(): """Load and process results from CSV file""" try: df = pd.read_csv("results.csv") # Get WER by dataset for each model wer_by_dataset = df.pivot_table( index='model_id', columns='dataset', values='wer', aggfunc='mean' ).round(2) # Calculate overall average WER wer_by_dataset['Average WER'] = df.groupby('model_id')['wer'].mean().round(2) # Calculate RTFx properly: sum(total_audio_length) / sum(total_time) audio_time_sums = df.groupby('model_id').agg({ 'total_audio_length': 'sum', 'total_time': 'sum' }) rtfx_calculated = (audio_time_sums['total_audio_length'] / audio_time_sums['total_time']).round(2) # Combine all metrics model_stats = wer_by_dataset.copy() model_stats['RTFx'] = rtfx_calculated # Set RTFx to NA for ElevenLabs (API-based, not local model) elevenlabs_mask = model_stats.index.str.contains('elevenlabs', case=False, na=False) model_stats.loc[elevenlabs_mask, 'RTFx'] = 'N/A' # Sort by average WER (lower is better) model_stats = model_stats.sort_values('Average WER') # Reset index to make model_id a column model_stats = model_stats.reset_index() # Reorder columns: Model, Average WER first, then Datarisas, then other datasets, then RTFx dataset_columns = [col for col in model_stats.columns if col not in ['model_id', 'Average WER', 'RTFx']] # Put datarisas first, then other datasets datarisas_col = [col for col in dataset_columns if 'datarisas' in col.lower()] other_dataset_cols = [col for col in dataset_columns if 'datarisas' not in col.lower()] ordered_dataset_cols = datarisas_col + other_dataset_cols new_column_order = ['model_id', 'Average WER'] + ordered_dataset_cols + ['RTFx'] model_stats = model_stats[new_column_order] # Convert model names to appropriate links def create_model_link(model_name): if 'elevenlabs' in model_name.lower(): return f'{model_name}' else: return f'{model_name}' model_stats['model_id'] = model_stats['model_id'].apply(create_model_link) # Rename columns for better display column_mapping = {'model_id': 'Model', 'Average WER': 'Average WER ⬇️', 'RTFx': 'RTFx ⬆️'} # Add arrows to dataset WER columns for col in dataset_columns: column_mapping[col] = f'{col.replace("_", " ").title()} WER ⬇️' model_stats = model_stats.rename(columns=column_mapping) return model_stats except FileNotFoundError: # Return empty dataframe if CSV doesn't exist return pd.DataFrame(columns=['Model', 'Average WER ⬇️', 'RTFx ⬆️']) # Load results leaderboard_df = load_results() demo = gr.Blocks(css=custom_css) with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏅 Chilean Spanish ASR Leaderboard", elem_id="leaderboard-tab", id=0): gr.Dataframe( value=leaderboard_df, interactive=False, wrap=True, datatype=["markdown"] + ["number"] * (len(leaderboard_df.columns) - 1) ) with gr.TabItem("📝 About", elem_id="about-tab", id=1): gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=20, elem_id="citation-button", show_copy_button=True, ) demo.launch()