Spaces:

evgueni-p
/

fbmc-chronos2

Sleeping

Evgueni Poloukarov commited on 24 days ago

Commit

2a32f6f

1 Parent(s): 6331963

refactor: improve Marimo notebook readability with proper number formatting

Fixed excessive decimal places throughout the notebook:
- Round all MAE/RMSE values to 1 decimal place (MW precision)
- Round percentage increases to 1 decimal place
- Format chart tooltips with .1f precision
- Clean up table displays for better readability

Changes:
- Load: Round all mae_d1-d14, mae_overall, rmse_overall at load time
- Daily MAE: Round mean/median values to 1 decimal
- Degradation table: Round pct_increase to 1 decimal
- Best/worst performers: Round all MAE/RMSE columns
- Outliers table: Round all MAE/RMSE columns
- Heatmap: Ensure MAE values rounded to 1 decimal
- All charts: Format tooltips with .1f for consistency

Result: All numbers now display with sensible precision (e.g., 15.9 MW instead of 15.923705433358656 MW)

Files changed (1) hide show

notebooks/october_2024_evaluation.py +57 -23

notebooks/october_2024_evaluation.py CHANGED Viewed

@@ -46,11 +46,19 @@ def _(mo):
 def _(Path, pl):
     # Load evaluation results
     results_path = Path(__file__).parent.parent / 'results' / 'october_2024_multivariate.csv'
-    eval_df = pl.read_csv(results_path)
     print(f"Loaded {len(eval_df)} border evaluations")
     print(f"Columns: {eval_df.columns}")
-    eval_df.head()
     return (eval_df,)
@@ -99,7 +107,9 @@ def _(alt, eval_df):
     hist_chart = alt.Chart(eval_df.to_pandas()).mark_bar().encode(
         x=alt.X('mae_d1:Q', bin=alt.Bin(maxbins=20), title='D+1 MAE (MW)'),
         y=alt.Y('count()', title='Number of Borders'),
-        tooltip=['count()']
     ).properties(
         width=600,
         height=300,
@@ -123,9 +133,13 @@ def _(mo):
 @app.cell
-def _(eval_df):
-    # Top 10 best performers
-    best_performers = eval_df.sort('mae_d1').head(10)
     best_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
     return
@@ -143,9 +157,13 @@ def _(mo):
 @app.cell
-def _(eval_df):
-    # Top 10 worst performers
-    worst_performers = eval_df.sort('mae_d1', descending=True).head(10)
     worst_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
     return
@@ -166,12 +184,12 @@ def _(mo):
 @app.cell
 def _(eval_df, pl):
-    # Calculate mean MAE for each day
     daily_mae_data = []
     for day in range(1, 15):
         col_name = f'mae_d{day}'
-        mean_mae = eval_df[col_name].mean()
-        median_mae = eval_df[col_name].median()
         daily_mae_data.append({
             'day': day,
             'mean_mae': mean_mae,
@@ -189,7 +207,11 @@ def _(alt, daily_mae_df):
     degradation_chart = alt.Chart(daily_mae_df.to_pandas()).mark_line(point=True).encode(
         x=alt.X('day:Q', title='Forecast Day', scale=alt.Scale(domain=[1, 14])),
         y=alt.Y('mean_mae:Q', title='Mean MAE (MW)', scale=alt.Scale(zero=True)),
-        tooltip=['day', 'mean_mae', 'median_mae']
     ).properties(
         width=700,
         height=400,
@@ -202,12 +224,12 @@ def _(alt, daily_mae_df):
 @app.cell
 def _(daily_mae_df, mo, pl):
-    # MAE degradation table with explicit baseline
     mae_list = daily_mae_df['mean_mae'].to_list()
     baseline_mae = mae_list[0]
     degradation_table = daily_mae_df.with_columns([
-        ((pl.col('mean_mae') - baseline_mae) / baseline_mae * 100).alias('pct_increase')
     ])
     # Extract specific days for readability
@@ -222,10 +244,10 @@ def _(daily_mae_df, mo, pl):
     {mo.as_html(degradation_table.to_pandas())}
     **Key Observations**:
-    - D+1 baseline: {degradation_d1_mae:.2f} MW
     - D+2 degradation: {((degradation_d2_mae - degradation_d1_mae) / degradation_d1_mae * 100):.1f}%
-    - D+14 final: {degradation_d14_mae:.2f} MW (+{((degradation_d14_mae - degradation_d1_mae) / degradation_d1_mae * 100):.1f}%)
-    - Largest jump: D+8 at {degradation_d8_mae:.2f} MW (investigate cause)
     """)
     return
@@ -249,14 +271,15 @@ def _(eval_df, pl):
     # Reshape data for heatmap (unpivot daily MAE columns)
     heatmap_data = eval_df.select(['border'] + [f'mae_d{i}' for i in range(1, 15)])
-    # Unpivot to long format
     heatmap_long = heatmap_data.unpivot(
         index='border',
         on=[f'mae_d{i}' for i in range(1, 15)],
         variable_name='day',
         value_name='mae'
     ).with_columns([
-        pl.col('day').str.replace('mae_d', '').cast(pl.Int32)
     ])
     heatmap_long.head()
@@ -299,8 +322,15 @@ def _(mo):
 @app.cell
 def _(eval_df, pl):
-    # Identify outliers
-    outliers = eval_df.filter(pl.col('mae_d1') > 150).sort('mae_d1', descending=True)
     outliers.select(['border', 'mae_d1', 'mae_d2', 'mae_d7', 'mae_d14', 'mae_overall', 'rmse_overall'])
     return (outliers,)
@@ -409,7 +439,11 @@ def _(alt, eval_df):
             alt.value('#e74c3c'),
             alt.value('#3498db')
         ),
-        tooltip=['border', 'mae_d1', 'mae_overall']
     ).properties(
         width=600,
         height=400,

 def _(Path, pl):
     # Load evaluation results
     results_path = Path(__file__).parent.parent / 'results' / 'october_2024_multivariate.csv'
+    eval_df_raw = pl.read_csv(results_path)
+    # Round all MAE and RMSE columns for readability
+    mae_cols = [f'mae_d{i}' for i in range(1, 15)] + ['mae_overall']
+    rmse_cols = ['rmse_overall']
+    eval_df = eval_df_raw.with_columns([
+        pl.col(col).round(1) for col in mae_cols + rmse_cols
+    ])
     print(f"Loaded {len(eval_df)} border evaluations")
     print(f"Columns: {eval_df.columns}")
+    eval_df.head(38)
     return (eval_df,)
     hist_chart = alt.Chart(eval_df.to_pandas()).mark_bar().encode(
         x=alt.X('mae_d1:Q', bin=alt.Bin(maxbins=20), title='D+1 MAE (MW)'),
         y=alt.Y('count()', title='Number of Borders'),
+        tooltip=[
+            alt.Tooltip('count()', title='Number of Borders')
+        ]
     ).properties(
         width=600,
         height=300,
 @app.cell
+def _(eval_df, pl):
+    # Top 10 best performers (rounded for readability)
+    best_performers = eval_df.sort('mae_d1').head(10).with_columns([
+        pl.col('mae_d1').round(1),
+        pl.col('mae_overall').round(1),
+        pl.col('rmse_overall').round(1)
+    ])
     best_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
     return
 @app.cell
+def _(eval_df, pl):
+    # Top 10 worst performers (rounded for readability)
+    worst_performers = eval_df.sort('mae_d1', descending=True).head(10).with_columns([
+        pl.col('mae_d1').round(1),
+        pl.col('mae_overall').round(1),
+        pl.col('rmse_overall').round(1)
+    ])
     worst_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
     return
 @app.cell
 def _(eval_df, pl):
+    # Calculate mean MAE for each day (rounded for readability)
     daily_mae_data = []
     for day in range(1, 15):
         col_name = f'mae_d{day}'
+        mean_mae = round(eval_df[col_name].mean(), 1)
+        median_mae = round(eval_df[col_name].median(), 1)
         daily_mae_data.append({
             'day': day,
             'mean_mae': mean_mae,
     degradation_chart = alt.Chart(daily_mae_df.to_pandas()).mark_line(point=True).encode(
         x=alt.X('day:Q', title='Forecast Day', scale=alt.Scale(domain=[1, 14])),
         y=alt.Y('mean_mae:Q', title='Mean MAE (MW)', scale=alt.Scale(zero=True)),
+        tooltip=[
+            alt.Tooltip('day:Q', title='Day'),
+            alt.Tooltip('mean_mae:Q', title='Mean MAE (MW)', format='.1f'),
+            alt.Tooltip('median_mae:Q', title='Median MAE (MW)', format='.1f')
+        ]
     ).properties(
         width=700,
         height=400,
 @app.cell
 def _(daily_mae_df, mo, pl):
+    # MAE degradation table with explicit baseline (rounded for readability)
     mae_list = daily_mae_df['mean_mae'].to_list()
     baseline_mae = mae_list[0]
     degradation_table = daily_mae_df.with_columns([
+        (((pl.col('mean_mae') - baseline_mae) / baseline_mae * 100).round(1)).alias('pct_increase')
     ])
     # Extract specific days for readability
     {mo.as_html(degradation_table.to_pandas())}
     **Key Observations**:
+    - D+1 baseline: {degradation_d1_mae:.1f} MW
     - D+2 degradation: {((degradation_d2_mae - degradation_d1_mae) / degradation_d1_mae * 100):.1f}%
+    - D+14 final: {degradation_d14_mae:.1f} MW (+{((degradation_d14_mae - degradation_d1_mae) / degradation_d1_mae * 100):.1f}%)
+    - Largest jump: D+8 at {degradation_d8_mae:.1f} MW (investigate cause)
     """)
     return
     # Reshape data for heatmap (unpivot daily MAE columns)
     heatmap_data = eval_df.select(['border'] + [f'mae_d{i}' for i in range(1, 15)])
+    # Unpivot to long format (already rounded in eval_df)
     heatmap_long = heatmap_data.unpivot(
         index='border',
         on=[f'mae_d{i}' for i in range(1, 15)],
         variable_name='day',
         value_name='mae'
     ).with_columns([
+        pl.col('day').str.replace('mae_d', '').cast(pl.Int32),
+        pl.col('mae').round(1)  # Ensure rounding for display
     ])
     heatmap_long.head()
 @app.cell
 def _(eval_df, pl):
+    # Identify outliers (rounded for readability)
+    outliers = eval_df.filter(pl.col('mae_d1') > 150).sort('mae_d1', descending=True).with_columns([
+        pl.col('mae_d1').round(1),
+        pl.col('mae_d2').round(1),
+        pl.col('mae_d7').round(1),
+        pl.col('mae_d14').round(1),
+        pl.col('mae_overall').round(1),
+        pl.col('rmse_overall').round(1)
+    ])
     outliers.select(['border', 'mae_d1', 'mae_d2', 'mae_d7', 'mae_d14', 'mae_overall', 'rmse_overall'])
     return (outliers,)
             alt.value('#e74c3c'),
             alt.value('#3498db')
         ),
+        tooltip=[
+            alt.Tooltip('border:N', title='Border'),
+            alt.Tooltip('mae_d1:Q', title='D+1 MAE (MW)', format='.1f'),
+            alt.Tooltip('mae_overall:Q', title='Overall MAE (MW)', format='.1f')
+        ]
     ).properties(
         width=600,
         height=400,