Evgueni Poloukarov commited on
Commit
2a32f6f
·
1 Parent(s): 6331963

refactor: improve Marimo notebook readability with proper number formatting

Browse files

Fixed excessive decimal places throughout the notebook:
- Round all MAE/RMSE values to 1 decimal place (MW precision)
- Round percentage increases to 1 decimal place
- Format chart tooltips with .1f precision
- Clean up table displays for better readability

Changes:
- Load: Round all mae_d1-d14, mae_overall, rmse_overall at load time
- Daily MAE: Round mean/median values to 1 decimal
- Degradation table: Round pct_increase to 1 decimal
- Best/worst performers: Round all MAE/RMSE columns
- Outliers table: Round all MAE/RMSE columns
- Heatmap: Ensure MAE values rounded to 1 decimal
- All charts: Format tooltips with .1f for consistency

Result: All numbers now display with sensible precision (e.g., 15.9 MW instead of 15.923705433358656 MW)

Files changed (1) hide show
  1. notebooks/october_2024_evaluation.py +57 -23
notebooks/october_2024_evaluation.py CHANGED
@@ -46,11 +46,19 @@ def _(mo):
46
  def _(Path, pl):
47
  # Load evaluation results
48
  results_path = Path(__file__).parent.parent / 'results' / 'october_2024_multivariate.csv'
49
- eval_df = pl.read_csv(results_path)
 
 
 
 
 
 
 
 
50
 
51
  print(f"Loaded {len(eval_df)} border evaluations")
52
  print(f"Columns: {eval_df.columns}")
53
- eval_df.head()
54
  return (eval_df,)
55
 
56
 
@@ -99,7 +107,9 @@ def _(alt, eval_df):
99
  hist_chart = alt.Chart(eval_df.to_pandas()).mark_bar().encode(
100
  x=alt.X('mae_d1:Q', bin=alt.Bin(maxbins=20), title='D+1 MAE (MW)'),
101
  y=alt.Y('count()', title='Number of Borders'),
102
- tooltip=['count()']
 
 
103
  ).properties(
104
  width=600,
105
  height=300,
@@ -123,9 +133,13 @@ def _(mo):
123
 
124
 
125
  @app.cell
126
- def _(eval_df):
127
- # Top 10 best performers
128
- best_performers = eval_df.sort('mae_d1').head(10)
 
 
 
 
129
  best_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
130
  return
131
 
@@ -143,9 +157,13 @@ def _(mo):
143
 
144
 
145
  @app.cell
146
- def _(eval_df):
147
- # Top 10 worst performers
148
- worst_performers = eval_df.sort('mae_d1', descending=True).head(10)
 
 
 
 
149
  worst_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
150
  return
151
 
@@ -166,12 +184,12 @@ def _(mo):
166
 
167
  @app.cell
168
  def _(eval_df, pl):
169
- # Calculate mean MAE for each day
170
  daily_mae_data = []
171
  for day in range(1, 15):
172
  col_name = f'mae_d{day}'
173
- mean_mae = eval_df[col_name].mean()
174
- median_mae = eval_df[col_name].median()
175
  daily_mae_data.append({
176
  'day': day,
177
  'mean_mae': mean_mae,
@@ -189,7 +207,11 @@ def _(alt, daily_mae_df):
189
  degradation_chart = alt.Chart(daily_mae_df.to_pandas()).mark_line(point=True).encode(
190
  x=alt.X('day:Q', title='Forecast Day', scale=alt.Scale(domain=[1, 14])),
191
  y=alt.Y('mean_mae:Q', title='Mean MAE (MW)', scale=alt.Scale(zero=True)),
192
- tooltip=['day', 'mean_mae', 'median_mae']
 
 
 
 
193
  ).properties(
194
  width=700,
195
  height=400,
@@ -202,12 +224,12 @@ def _(alt, daily_mae_df):
202
 
203
  @app.cell
204
  def _(daily_mae_df, mo, pl):
205
- # MAE degradation table with explicit baseline
206
  mae_list = daily_mae_df['mean_mae'].to_list()
207
  baseline_mae = mae_list[0]
208
 
209
  degradation_table = daily_mae_df.with_columns([
210
- ((pl.col('mean_mae') - baseline_mae) / baseline_mae * 100).alias('pct_increase')
211
  ])
212
 
213
  # Extract specific days for readability
@@ -222,10 +244,10 @@ def _(daily_mae_df, mo, pl):
222
  {mo.as_html(degradation_table.to_pandas())}
223
 
224
  **Key Observations**:
225
- - D+1 baseline: {degradation_d1_mae:.2f} MW
226
  - D+2 degradation: {((degradation_d2_mae - degradation_d1_mae) / degradation_d1_mae * 100):.1f}%
227
- - D+14 final: {degradation_d14_mae:.2f} MW (+{((degradation_d14_mae - degradation_d1_mae) / degradation_d1_mae * 100):.1f}%)
228
- - Largest jump: D+8 at {degradation_d8_mae:.2f} MW (investigate cause)
229
  """)
230
  return
231
 
@@ -249,14 +271,15 @@ def _(eval_df, pl):
249
  # Reshape data for heatmap (unpivot daily MAE columns)
250
  heatmap_data = eval_df.select(['border'] + [f'mae_d{i}' for i in range(1, 15)])
251
 
252
- # Unpivot to long format
253
  heatmap_long = heatmap_data.unpivot(
254
  index='border',
255
  on=[f'mae_d{i}' for i in range(1, 15)],
256
  variable_name='day',
257
  value_name='mae'
258
  ).with_columns([
259
- pl.col('day').str.replace('mae_d', '').cast(pl.Int32)
 
260
  ])
261
 
262
  heatmap_long.head()
@@ -299,8 +322,15 @@ def _(mo):
299
 
300
  @app.cell
301
  def _(eval_df, pl):
302
- # Identify outliers
303
- outliers = eval_df.filter(pl.col('mae_d1') > 150).sort('mae_d1', descending=True)
 
 
 
 
 
 
 
304
 
305
  outliers.select(['border', 'mae_d1', 'mae_d2', 'mae_d7', 'mae_d14', 'mae_overall', 'rmse_overall'])
306
  return (outliers,)
@@ -409,7 +439,11 @@ def _(alt, eval_df):
409
  alt.value('#e74c3c'),
410
  alt.value('#3498db')
411
  ),
412
- tooltip=['border', 'mae_d1', 'mae_overall']
 
 
 
 
413
  ).properties(
414
  width=600,
415
  height=400,
 
46
  def _(Path, pl):
47
  # Load evaluation results
48
  results_path = Path(__file__).parent.parent / 'results' / 'october_2024_multivariate.csv'
49
+ eval_df_raw = pl.read_csv(results_path)
50
+
51
+ # Round all MAE and RMSE columns for readability
52
+ mae_cols = [f'mae_d{i}' for i in range(1, 15)] + ['mae_overall']
53
+ rmse_cols = ['rmse_overall']
54
+
55
+ eval_df = eval_df_raw.with_columns([
56
+ pl.col(col).round(1) for col in mae_cols + rmse_cols
57
+ ])
58
 
59
  print(f"Loaded {len(eval_df)} border evaluations")
60
  print(f"Columns: {eval_df.columns}")
61
+ eval_df.head(38)
62
  return (eval_df,)
63
 
64
 
 
107
  hist_chart = alt.Chart(eval_df.to_pandas()).mark_bar().encode(
108
  x=alt.X('mae_d1:Q', bin=alt.Bin(maxbins=20), title='D+1 MAE (MW)'),
109
  y=alt.Y('count()', title='Number of Borders'),
110
+ tooltip=[
111
+ alt.Tooltip('count()', title='Number of Borders')
112
+ ]
113
  ).properties(
114
  width=600,
115
  height=300,
 
133
 
134
 
135
  @app.cell
136
+ def _(eval_df, pl):
137
+ # Top 10 best performers (rounded for readability)
138
+ best_performers = eval_df.sort('mae_d1').head(10).with_columns([
139
+ pl.col('mae_d1').round(1),
140
+ pl.col('mae_overall').round(1),
141
+ pl.col('rmse_overall').round(1)
142
+ ])
143
  best_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
144
  return
145
 
 
157
 
158
 
159
  @app.cell
160
+ def _(eval_df, pl):
161
+ # Top 10 worst performers (rounded for readability)
162
+ worst_performers = eval_df.sort('mae_d1', descending=True).head(10).with_columns([
163
+ pl.col('mae_d1').round(1),
164
+ pl.col('mae_overall').round(1),
165
+ pl.col('rmse_overall').round(1)
166
+ ])
167
  worst_performers.select(['border', 'mae_d1', 'mae_overall', 'rmse_overall'])
168
  return
169
 
 
184
 
185
  @app.cell
186
  def _(eval_df, pl):
187
+ # Calculate mean MAE for each day (rounded for readability)
188
  daily_mae_data = []
189
  for day in range(1, 15):
190
  col_name = f'mae_d{day}'
191
+ mean_mae = round(eval_df[col_name].mean(), 1)
192
+ median_mae = round(eval_df[col_name].median(), 1)
193
  daily_mae_data.append({
194
  'day': day,
195
  'mean_mae': mean_mae,
 
207
  degradation_chart = alt.Chart(daily_mae_df.to_pandas()).mark_line(point=True).encode(
208
  x=alt.X('day:Q', title='Forecast Day', scale=alt.Scale(domain=[1, 14])),
209
  y=alt.Y('mean_mae:Q', title='Mean MAE (MW)', scale=alt.Scale(zero=True)),
210
+ tooltip=[
211
+ alt.Tooltip('day:Q', title='Day'),
212
+ alt.Tooltip('mean_mae:Q', title='Mean MAE (MW)', format='.1f'),
213
+ alt.Tooltip('median_mae:Q', title='Median MAE (MW)', format='.1f')
214
+ ]
215
  ).properties(
216
  width=700,
217
  height=400,
 
224
 
225
  @app.cell
226
  def _(daily_mae_df, mo, pl):
227
+ # MAE degradation table with explicit baseline (rounded for readability)
228
  mae_list = daily_mae_df['mean_mae'].to_list()
229
  baseline_mae = mae_list[0]
230
 
231
  degradation_table = daily_mae_df.with_columns([
232
+ (((pl.col('mean_mae') - baseline_mae) / baseline_mae * 100).round(1)).alias('pct_increase')
233
  ])
234
 
235
  # Extract specific days for readability
 
244
  {mo.as_html(degradation_table.to_pandas())}
245
 
246
  **Key Observations**:
247
+ - D+1 baseline: {degradation_d1_mae:.1f} MW
248
  - D+2 degradation: {((degradation_d2_mae - degradation_d1_mae) / degradation_d1_mae * 100):.1f}%
249
+ - D+14 final: {degradation_d14_mae:.1f} MW (+{((degradation_d14_mae - degradation_d1_mae) / degradation_d1_mae * 100):.1f}%)
250
+ - Largest jump: D+8 at {degradation_d8_mae:.1f} MW (investigate cause)
251
  """)
252
  return
253
 
 
271
  # Reshape data for heatmap (unpivot daily MAE columns)
272
  heatmap_data = eval_df.select(['border'] + [f'mae_d{i}' for i in range(1, 15)])
273
 
274
+ # Unpivot to long format (already rounded in eval_df)
275
  heatmap_long = heatmap_data.unpivot(
276
  index='border',
277
  on=[f'mae_d{i}' for i in range(1, 15)],
278
  variable_name='day',
279
  value_name='mae'
280
  ).with_columns([
281
+ pl.col('day').str.replace('mae_d', '').cast(pl.Int32),
282
+ pl.col('mae').round(1) # Ensure rounding for display
283
  ])
284
 
285
  heatmap_long.head()
 
322
 
323
  @app.cell
324
  def _(eval_df, pl):
325
+ # Identify outliers (rounded for readability)
326
+ outliers = eval_df.filter(pl.col('mae_d1') > 150).sort('mae_d1', descending=True).with_columns([
327
+ pl.col('mae_d1').round(1),
328
+ pl.col('mae_d2').round(1),
329
+ pl.col('mae_d7').round(1),
330
+ pl.col('mae_d14').round(1),
331
+ pl.col('mae_overall').round(1),
332
+ pl.col('rmse_overall').round(1)
333
+ ])
334
 
335
  outliers.select(['border', 'mae_d1', 'mae_d2', 'mae_d7', 'mae_d14', 'mae_overall', 'rmse_overall'])
336
  return (outliers,)
 
439
  alt.value('#e74c3c'),
440
  alt.value('#3498db')
441
  ),
442
+ tooltip=[
443
+ alt.Tooltip('border:N', title='Border'),
444
+ alt.Tooltip('mae_d1:Q', title='D+1 MAE (MW)', format='.1f'),
445
+ alt.Tooltip('mae_overall:Q', title='Overall MAE (MW)', format='.1f')
446
+ ]
447
  ).properties(
448
  width=600,
449
  height=400,