Spaces:
Sleeping
Sleeping
atodorov284
commited on
Commit
Β·
5c3cd79
1
Parent(s):
18117cd
Put model development pipeline in a .py file. Rewrote README. Made a config folder to store hyperparameter space searches. Co-authored with Chris and Csenge. Several TODOs in the README.
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- README.md +20 -22
- air-quality-forecast/model_development.py +266 -0
- configs/hyperparameter_search_spaces.yaml +8 -0
- data/external/.gitkeep +0 -0
- data/interim/.gitkeep +0 -0
- data/interim/correlation_matrix.csv +0 -30
- {notebooks/mlartifacts/803928810609079892/e684490d78f54fcf8b51f3a2b8018698 β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e}/artifacts/estimator.html +1 -1
- {notebooks/mlartifacts/475209732522917118/d6de58a8b1b9445a8da3f306598e1754 β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e}/artifacts/model/MLmodel +4 -4
- {notebooks/mlartifacts/475209732522917118/6bfd7856e3624d38aadfd33e3fc79343/artifacts/best_model β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e/artifacts/model}/conda.yaml +0 -0
- {notebooks/mlartifacts/475209732522917118/6bfd7856e3624d38aadfd33e3fc79343/artifacts/best_model β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e/artifacts/model}/model.pkl +2 -2
- {notebooks/mlartifacts/475209732522917118/6bfd7856e3624d38aadfd33e3fc79343/artifacts/best_model β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e/artifacts/model}/python_env.yaml +0 -0
- {notebooks/mlartifacts/475209732522917118/6bfd7856e3624d38aadfd33e3fc79343/artifacts/best_model β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e/artifacts/model}/requirements.txt +0 -0
- {notebooks/mlartifacts/803928810609079892/bb1075c63ad14a51ba070d2a3e945f22 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/estimator.html +1 -1
- {notebooks/mlartifacts/475209732522917118/e8a145a55c094cdc9e55c7b9d5a89bf5 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/model/MLmodel +4 -4
- {notebooks/mlartifacts/475209732522917118/d6de58a8b1b9445a8da3f306598e1754 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/model/conda.yaml +0 -0
- {notebooks/mlartifacts/475209732522917118/d6de58a8b1b9445a8da3f306598e1754 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/model/model.pkl +2 -2
- {notebooks/mlartifacts/475209732522917118/d6de58a8b1b9445a8da3f306598e1754 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/model/python_env.yaml +0 -0
- {notebooks/mlartifacts/475209732522917118/d6de58a8b1b9445a8da3f306598e1754 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/model/requirements.txt +0 -0
- mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf/artifacts/feature_importance_weight.json +1 -0
- mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf/artifacts/feature_importance_weight.png +0 -0
- {notebooks/mlartifacts/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf}/artifacts/model/MLmodel +4 -4
- {notebooks/mlartifacts/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf}/artifacts/model/conda.yaml +0 -0
- {notebooks/mlartifacts/588532547813609546/4e8ce91d81c549cf80846c249e959c20 β mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf}/artifacts/model/model.xgb +2 -2
- {notebooks/mlartifacts/475209732522917118/e8a145a55c094cdc9e55c7b9d5a89bf5 β mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf}/artifacts/model/python_env.yaml +0 -0
- {notebooks/mlartifacts/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf}/artifacts/model/requirements.txt +0 -0
- mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c/artifacts/feature_importance_weight.json +1 -0
- mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c/artifacts/feature_importance_weight.png +0 -0
- {notebooks/mlartifacts/588532547813609546/4e8ce91d81c549cf80846c249e959c20 β mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c}/artifacts/model/MLmodel +4 -4
- {notebooks/mlartifacts/588532547813609546/4e8ce91d81c549cf80846c249e959c20 β mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c}/artifacts/model/conda.yaml +0 -0
- {notebooks/mlartifacts/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c}/artifacts/model/model.xgb +2 -2
- {notebooks/mlartifacts/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c}/artifacts/model/python_env.yaml +0 -0
- {notebooks/mlartifacts/588532547813609546/4e8ce91d81c549cf80846c249e959c20 β mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c}/artifacts/model/requirements.txt +0 -0
- mlruns/0/meta.yaml +6 -0
- {notebooks/mlruns/475209732522917118/d6de58a8b1b9445a8da3f306598e1754/inputs/c129156961c7c72977ff2820a1443bc0 β mlruns/149819317988706962/5479a322736e4663944d983720a6648e/inputs/9f74c11a603ae42026ea171546387a69}/meta.yaml +0 -0
- {notebooks/mlruns/475209732522917118/d6de58a8b1b9445a8da3f306598e1754/inputs/f9b16ea0f807fe917284d4acb7165ee9 β mlruns/149819317988706962/5479a322736e4663944d983720a6648e/inputs/ee9b180d5518cce671940233dfff09ce}/meta.yaml +0 -0
- {notebooks/mlruns/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlruns/149819317988706962/5479a322736e4663944d983720a6648e}/meta.yaml +7 -7
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/cpu_utilization_percentage +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/disk_available_megabytes +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/disk_usage_megabytes +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/disk_usage_percentage +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/gpu_0_memory_usage_megabytes +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/gpu_0_memory_usage_percentage +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/gpu_0_utilization_percentage +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/network_receive_megabytes +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/network_transmit_megabytes +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/system_memory_usage_megabytes +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/system_memory_usage_percentage +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/training_mean_absolute_error +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/training_mean_squared_error +1 -0
- mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/training_r2_score +1 -0
README.md
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
|
| 7 |
Air pollution is a significant environmental concern, especially in urban areas, where the high levels of nitrogen dioxide and ozone can have a negative impact on human health, the ecosystem and on the overall quality of life. Given these risks, monitoring and forecasting the level of air pollution is an important task in order to allow for timely actions to reduce the harmful effects.
|
| 8 |
|
| 9 |
-
In the Netherlands, cities like Utrecht experience challenges concerning air quality due to urbanization, transportation, and industrial activities. Developing a system that can provide accurate and robust real-time air quality monitoring and reliable forecasts for future pollution levels would allow authorities and residents to take preventive measures and adjust their future activities based on expected air quality. This project focuses on the time-series forecasting of air pollution levels, specifically NO
|
| 10 |
|
| 11 |
## How To Run This Code
|
| 12 |
|
|
@@ -21,49 +21,47 @@ The notebooks in this project were used as scratch for analysis and data merge a
|
|
| 21 |
βββ Makefile <- Makefile with convenience commands like `make data` or `make train`
|
| 22 |
βββ README.md <- The top-level README for developers using this project.
|
| 23 |
βββ data
|
| 24 |
-
β βββ
|
| 25 |
-
β βββ interim <- Intermediate data that has been transformed.
|
| 26 |
-
β βββ processed <- The final, canonical data sets for modeling.
|
| 27 |
β βββ raw <- The original, immutable data dump.
|
| 28 |
β
|
| 29 |
-
|
| 30 |
β
|
| 31 |
-
βββ
|
| 32 |
β
|
| 33 |
-
|
| 34 |
-
β
|
| 35 |
-
|
|
|
|
|
|
|
| 36 |
β
|
| 37 |
βββ pyproject.toml <- Project configuration file with package metadata for
|
| 38 |
β air-quality-forecast and configuration for tools like black
|
| 39 |
β
|
| 40 |
-
βββ references <- Data dictionaries, manuals, and all other explanatory materials.
|
| 41 |
β
|
| 42 |
-
βββ reports <- Generated analysis as HTML, PDF, LaTeX, etc.
|
| 43 |
-
β βββ figures <- Generated graphics and figures to be used in reporting
|
| 44 |
β
|
| 45 |
βββ requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
|
| 46 |
β generated with `pip freeze > requirements.txt`
|
| 47 |
β
|
| 48 |
βββ setup.cfg <- Configuration file for flake8
|
| 49 |
β
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
βββ air-quality-forecast <- Source code for use in this project.
|
| 51 |
β
|
| 52 |
βββ __init__.py <- Makes air-quality-forecast a Python module
|
| 53 |
β
|
| 54 |
-
βββ
|
| 55 |
-
β
|
| 56 |
-
βββ dataset.py <- Scripts to download or generate data
|
| 57 |
β
|
| 58 |
-
βββ
|
| 59 |
β
|
| 60 |
-
βββ
|
| 61 |
-
β βββ __init__.py
|
| 62 |
-
β βββ predict.py <- Code to run model inference with trained models
|
| 63 |
-
β βββ train.py <- Code to train models
|
| 64 |
β
|
| 65 |
-
βββ
|
| 66 |
-
```
|
| 67 |
|
| 68 |
--------
|
| 69 |
|
|
|
|
| 6 |
|
| 7 |
Air pollution is a significant environmental concern, especially in urban areas, where the high levels of nitrogen dioxide and ozone can have a negative impact on human health, the ecosystem and on the overall quality of life. Given these risks, monitoring and forecasting the level of air pollution is an important task in order to allow for timely actions to reduce the harmful effects.
|
| 8 |
|
| 9 |
+
In the Netherlands, cities like Utrecht experience challenges concerning air quality due to urbanization, transportation, and industrial activities. Developing a system that can provide accurate and robust real-time air quality monitoring and reliable forecasts for future pollution levels would allow authorities and residents to take preventive measures and adjust their future activities based on expected air quality. This project focuses on the time-series forecasting of air pollution levels, specifically NO<sub>2</sub> and O<sub>3</sub> concentrations, for the next three days. This task can be framed as a regression problem, where the goal is to predict continuous values based on historical environmental data. Moreover, it provides infrastructure for real-time prediction, based on recent measurements.
|
| 10 |
|
| 11 |
## How To Run This Code
|
| 12 |
|
|
|
|
| 21 |
βββ Makefile <- Makefile with convenience commands like `make data` or `make train`
|
| 22 |
βββ README.md <- The top-level README for developers using this project.
|
| 23 |
βββ data
|
| 24 |
+
β βββ processed <- The final, canonical data sets for modeling. Contains the train-test split.
|
|
|
|
|
|
|
| 25 |
β βββ raw <- The original, immutable data dump.
|
| 26 |
β
|
| 27 |
+
βββ.github <- Contains automated workflows for reproducibility and flake8 checks.
|
| 28 |
β
|
| 29 |
+
βββ docs <- TODO: A default mkdocs project; see www.mkdocs.org for details
|
| 30 |
β
|
| 31 |
+
ββββmlruns <- Contains all the experiments ran using mlflow.
|
| 32 |
+
β
|
| 33 |
+
ββββmlartifacts <- Contains the artifacts generated by mlflow experiments.
|
| 34 |
+
β
|
| 35 |
+
βββ notebooks <- Jupyter notebooks (not to be evaluated, source code is in air-quality-forecast)
|
| 36 |
β
|
| 37 |
βββ pyproject.toml <- Project configuration file with package metadata for
|
| 38 |
β air-quality-forecast and configuration for tools like black
|
| 39 |
β
|
| 40 |
+
βββ references <- TODO: Data dictionaries, manuals, and all other explanatory materials.
|
| 41 |
β
|
| 42 |
+
βββ reports <- TODO: Generated analysis as HTML, PDF, LaTeX, etc.
|
| 43 |
+
β βββ figures <- TODO: Generated graphics and figures to be used in reporting
|
| 44 |
β
|
| 45 |
βββ requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
|
| 46 |
β generated with `pip freeze > requirements.txt`
|
| 47 |
β
|
| 48 |
βββ setup.cfg <- Configuration file for flake8
|
| 49 |
β
|
| 50 |
+
βββ configs <- Configuration folder for the hyperparameter search space (for now)
|
| 51 |
+
β
|
| 52 |
+
βββ extra_scripts <- Some extra scripts in R and .tex to generate figures
|
| 53 |
+
β
|
| 54 |
βββ air-quality-forecast <- Source code for use in this project.
|
| 55 |
β
|
| 56 |
βββ __init__.py <- Makes air-quality-forecast a Python module
|
| 57 |
β
|
| 58 |
+
βββ data_pipeline.py <- Loads, extracts, and preprocesses the data. Final result is the train-test under data/processed
|
|
|
|
|
|
|
| 59 |
β
|
| 60 |
+
βββ model_development.py <- Trains the three models using k-fold CV and Bayesian hyperparameter tuning
|
| 61 |
β
|
| 62 |
+
βββ utils.py <- Utility functions, e.g. validation
|
|
|
|
|
|
|
|
|
|
| 63 |
β
|
| 64 |
+
βββ main.py <- To execute and start the project
|
|
|
|
| 65 |
|
| 66 |
--------
|
| 67 |
|
air-quality-forecast/model_development.py
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import mlflow
|
| 2 |
+
import mlflow.sklearn
|
| 3 |
+
import subprocess
|
| 4 |
+
import numpy as np
|
| 5 |
+
from sklearn.base import BaseEstimator
|
| 6 |
+
from sklearn.metrics import mean_squared_error, root_mean_squared_error
|
| 7 |
+
from sklearn.model_selection import TimeSeriesSplit
|
| 8 |
+
from skopt import BayesSearchCV
|
| 9 |
+
from typing import Dict, Any
|
| 10 |
+
import socket
|
| 11 |
+
import pandas as pd
|
| 12 |
+
import warnings
|
| 13 |
+
from sklearn.tree import DecisionTreeRegressor
|
| 14 |
+
from xgboost import XGBRegressor
|
| 15 |
+
from sklearn.ensemble import RandomForestRegressor
|
| 16 |
+
import yaml
|
| 17 |
+
import os
|
| 18 |
+
import sys
|
| 19 |
+
|
| 20 |
+
warnings.filterwarnings("ignore")
|
| 21 |
+
|
| 22 |
+
RANDOM_SEED = 4242
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class RegressorTrainer:
|
| 26 |
+
def __init__(
|
| 27 |
+
self,
|
| 28 |
+
experiment_name: str,
|
| 29 |
+
regressor: BaseEstimator,
|
| 30 |
+
param_space: Dict[str, Any],
|
| 31 |
+
cv_splits: int = 5,
|
| 32 |
+
n_iter: int = 50,
|
| 33 |
+
):
|
| 34 |
+
"""
|
| 35 |
+
Initialize the RegressorTrainer.
|
| 36 |
+
|
| 37 |
+
Parameters:
|
| 38 |
+
experiment_name (str): The name of the MLflow experiment.
|
| 39 |
+
regressor (sklearn model): The regressor to optimize.
|
| 40 |
+
param_space (dict): The parameter space for Bayesian optimization.
|
| 41 |
+
cv_splits (int): Number of splits for cross-validation (default is 5).
|
| 42 |
+
"""
|
| 43 |
+
self._experiment_name = experiment_name
|
| 44 |
+
self._regressor = regressor
|
| 45 |
+
self._param_space = param_space
|
| 46 |
+
self._cv_splits = cv_splits
|
| 47 |
+
self._bayes_search: BayesSearchCV | None = (
|
| 48 |
+
None # Will hold the BayesSearchCV object
|
| 49 |
+
)
|
| 50 |
+
self._x_train: np.ndarray | None = None
|
| 51 |
+
self._y_train: np.ndarray | None = None
|
| 52 |
+
self._x_test: np.ndarray | None = None
|
| 53 |
+
self._y_test: np.ndarray | None = None
|
| 54 |
+
self._n_iter = n_iter
|
| 55 |
+
|
| 56 |
+
def _port_in_use(self, port: int) -> bool:
|
| 57 |
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
| 58 |
+
# This actually returns an int and not a bool
|
| 59 |
+
return s.connect_ex(("localhost", port)) == 0
|
| 60 |
+
|
| 61 |
+
def _launch_mlflow_server(self) -> None:
|
| 62 |
+
port = 5000
|
| 63 |
+
if not self._port_in_use(port):
|
| 64 |
+
try:
|
| 65 |
+
subprocess.Popen(
|
| 66 |
+
["py", "-m", "mlflow", "ui", "--port", str(port)],
|
| 67 |
+
stdout=subprocess.PIPE,
|
| 68 |
+
stderr=subprocess.PIPE,
|
| 69 |
+
)
|
| 70 |
+
print(f"MLflow server launched at http://127.0.0.1:{port}")
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print("Error launching MLflow server:", e)
|
| 73 |
+
else:
|
| 74 |
+
print(f"MLflow server is running at http://127.0.0.1:{port}")
|
| 75 |
+
|
| 76 |
+
def _set_data(
|
| 77 |
+
self,
|
| 78 |
+
x_train: np.ndarray,
|
| 79 |
+
y_train: np.ndarray,
|
| 80 |
+
x_test: np.ndarray,
|
| 81 |
+
y_test: np.ndarray,
|
| 82 |
+
) -> None:
|
| 83 |
+
"""
|
| 84 |
+
Set the training and test data as class attributes.
|
| 85 |
+
|
| 86 |
+
Parameters:
|
| 87 |
+
x_train (np.ndarray): Training data features.
|
| 88 |
+
y_train (np.ndarray): Training data labels.
|
| 89 |
+
x_test (np.ndarray): Test data features.
|
| 90 |
+
y_test (np.ndarray): Test data labels.
|
| 91 |
+
"""
|
| 92 |
+
self._x_train = np.array(x_train)
|
| 93 |
+
self._y_train = np.array(y_train)
|
| 94 |
+
self._x_test = np.array(x_test)
|
| 95 |
+
self._y_test = np.array(y_test)
|
| 96 |
+
|
| 97 |
+
def _setup_mlflow(self) -> None:
|
| 98 |
+
"""Set up MLflow configuration."""
|
| 99 |
+
self._launch_mlflow_server()
|
| 100 |
+
mlflow.set_experiment(self._experiment_name)
|
| 101 |
+
mlflow.set_tracking_uri("http://localhost:5000/")
|
| 102 |
+
mlflow.enable_system_metrics_logging()
|
| 103 |
+
mlflow.autolog()
|
| 104 |
+
|
| 105 |
+
def _perform_search(self) -> None:
|
| 106 |
+
"""Perform Bayesian optimization for hyperparameters."""
|
| 107 |
+
if self._x_train is None or self._y_train is None:
|
| 108 |
+
raise ValueError("Training data has not been set. Call `_set_data` first.")
|
| 109 |
+
|
| 110 |
+
# Set up TimeSeriesSplit for cross-validation
|
| 111 |
+
time_series_split = TimeSeriesSplit(n_splits=self._cv_splits)
|
| 112 |
+
|
| 113 |
+
# Initialize and perform BayesSearchCV
|
| 114 |
+
self._bayes_search = BayesSearchCV(
|
| 115 |
+
estimator=self._regressor,
|
| 116 |
+
search_spaces=self._param_space,
|
| 117 |
+
n_iter=self._n_iter, # Number of iterations for the search
|
| 118 |
+
cv=time_series_split, # Cross-validation scheme
|
| 119 |
+
scoring="neg_mean_squared_error", # Metric for scoring
|
| 120 |
+
n_jobs=-1, # Use all available CPU cores
|
| 121 |
+
verbose=1, # To display progress
|
| 122 |
+
random_state=RANDOM_SEED, # Ensures reproducibility
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
self._bayes_search.fit(self._x_train, self._y_train)
|
| 126 |
+
|
| 127 |
+
def _evaluate_model(self) -> None:
|
| 128 |
+
"""Evaluate the best model on the test data and log metrics."""
|
| 129 |
+
if self._x_test is None or self._y_test is None:
|
| 130 |
+
raise ValueError("Test data has not been set. Call `_set_data` first.")
|
| 131 |
+
if self._bayes_search is None:
|
| 132 |
+
raise ValueError(
|
| 133 |
+
"Bayesian search has not been performed. Call `_perform_search` first."
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
best_regressor = self._bayes_search.best_estimator_
|
| 137 |
+
|
| 138 |
+
# Evaluate the best model
|
| 139 |
+
test_mse = mean_squared_error(
|
| 140 |
+
self._y_test, best_regressor.predict(self._x_test)
|
| 141 |
+
)
|
| 142 |
+
test_rmse = root_mean_squared_error(
|
| 143 |
+
self._y_test, best_regressor.predict(self._x_test)
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
print(
|
| 147 |
+
"Best hyperparameters found by Bayesian optimization:",
|
| 148 |
+
self._bayes_search.best_params_,
|
| 149 |
+
)
|
| 150 |
+
print("Test MSE: ", test_mse)
|
| 151 |
+
print("Test RMSE: ", test_rmse)
|
| 152 |
+
|
| 153 |
+
def run(
|
| 154 |
+
self,
|
| 155 |
+
x_train: np.ndarray,
|
| 156 |
+
y_train: np.ndarray,
|
| 157 |
+
x_test: np.ndarray,
|
| 158 |
+
y_test: np.ndarray,
|
| 159 |
+
) -> None:
|
| 160 |
+
self._set_data(x_train, y_train, x_test, y_test)
|
| 161 |
+
self._setup_mlflow()
|
| 162 |
+
self._perform_search()
|
| 163 |
+
self._evaluate_model()
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def set_path() -> None:
|
| 167 |
+
"""
|
| 168 |
+
Set the path to include the parent directory of the current file.
|
| 169 |
+
|
| 170 |
+
This is needed to import modules from the parent directory.
|
| 171 |
+
|
| 172 |
+
Parameters:
|
| 173 |
+
None
|
| 174 |
+
|
| 175 |
+
Returns:
|
| 176 |
+
None
|
| 177 |
+
"""
|
| 178 |
+
currentdir = os.path.dirname(os.path.realpath(__file__))
|
| 179 |
+
parentdir = os.path.dirname(currentdir)
|
| 180 |
+
sys.path.insert(0, parentdir)
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def run_bayesian_optimization(
|
| 184 |
+
x_train: np.ndarray,
|
| 185 |
+
y_train: np.ndarray,
|
| 186 |
+
x_test: np.ndarray,
|
| 187 |
+
y_test: np.ndarray,
|
| 188 |
+
experiment_name: str,
|
| 189 |
+
regressor: BaseEstimator,
|
| 190 |
+
param_space: Dict[str, Any],
|
| 191 |
+
n_iter: int,
|
| 192 |
+
) -> None:
|
| 193 |
+
"""
|
| 194 |
+
Run Bayesian optimization to search for the best hyperparameters for a given regressor.
|
| 195 |
+
|
| 196 |
+
Parameters:
|
| 197 |
+
x_train (np.ndarray): Training data features.
|
| 198 |
+
y_train (np.ndarray): Training data labels.
|
| 199 |
+
x_test (np.ndarray): Test data features.
|
| 200 |
+
y_test (np.ndarray): Test data labels.
|
| 201 |
+
experiment_name (str): The name of the MLflow experiment.
|
| 202 |
+
regressor (sklearn model): The regressor to optimize.
|
| 203 |
+
param_space (dict): The parameter space for Bayesian optimization.
|
| 204 |
+
n_iter (int): Number of iterations for the search.
|
| 205 |
+
|
| 206 |
+
"""
|
| 207 |
+
trainer = RegressorTrainer(
|
| 208 |
+
experiment_name=experiment_name,
|
| 209 |
+
regressor=regressor,
|
| 210 |
+
param_space=param_space,
|
| 211 |
+
n_iter=n_iter,
|
| 212 |
+
)
|
| 213 |
+
trainer.run(x_train, y_train, x_test, y_test)
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def train_all_models():
|
| 217 |
+
set_path()
|
| 218 |
+
|
| 219 |
+
np.random.seed(RANDOM_SEED)
|
| 220 |
+
|
| 221 |
+
x_train, y_train = (
|
| 222 |
+
pd.read_csv("data/processed/x_train.csv", index_col=0),
|
| 223 |
+
pd.read_csv("data/processed/y_train.csv", index_col=0),
|
| 224 |
+
)
|
| 225 |
+
x_test, y_test = (
|
| 226 |
+
pd.read_csv("data/processed/x_test.csv", index_col=0),
|
| 227 |
+
pd.read_csv("data/processed/y_test.csv", index_col=0),
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
with open("configs/hyperparameter_search_spaces.yaml", "r") as stream:
|
| 231 |
+
param_space_config = yaml.safe_load(stream)
|
| 232 |
+
|
| 233 |
+
run_bayesian_optimization(
|
| 234 |
+
x_train,
|
| 235 |
+
y_train,
|
| 236 |
+
x_test,
|
| 237 |
+
y_test,
|
| 238 |
+
experiment_name="DecisionTree-BayesianOptimization",
|
| 239 |
+
regressor=DecisionTreeRegressor(),
|
| 240 |
+
param_space=param_space_config["decision_tree"],
|
| 241 |
+
n_iter=1,
|
| 242 |
+
)
|
| 243 |
+
run_bayesian_optimization(
|
| 244 |
+
x_train,
|
| 245 |
+
y_train,
|
| 246 |
+
x_test,
|
| 247 |
+
y_test,
|
| 248 |
+
experiment_name="XGBoost-BayesianOptimization",
|
| 249 |
+
regressor=XGBRegressor(),
|
| 250 |
+
param_space=param_space_config["xgboost"],
|
| 251 |
+
n_iter=1,
|
| 252 |
+
)
|
| 253 |
+
run_bayesian_optimization(
|
| 254 |
+
x_train,
|
| 255 |
+
y_train,
|
| 256 |
+
x_test,
|
| 257 |
+
y_test,
|
| 258 |
+
experiment_name="RandomForest-BayesianOptimization",
|
| 259 |
+
regressor=RandomForestRegressor(),
|
| 260 |
+
param_space=param_space_config["random_forest"],
|
| 261 |
+
n_iter=1,
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
if __name__ == "__main__":
|
| 266 |
+
train_all_models()
|
configs/hyperparameter_search_spaces.yaml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
decision_tree:
|
| 2 |
+
max_depth: [20, 50]
|
| 3 |
+
|
| 4 |
+
random_forest:
|
| 5 |
+
max_depth: [20, 50]
|
| 6 |
+
|
| 7 |
+
xgboost:
|
| 8 |
+
max_depth: [20, 50]
|
data/external/.gitkeep
DELETED
|
File without changes
|
data/interim/.gitkeep
DELETED
|
File without changes
|
data/interim/correlation_matrix.csv
DELETED
|
@@ -1,30 +0,0 @@
|
|
| 1 |
-
,pm25,pm10,o3,no2,so2,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,precip,precipprob,precipcover,snow,snowdepth,windgust,windspeed,winddir,sealevelpressure,cloudcover,visibility,solarradiation,solarenergy,uvindex,severerisk,moonphase
|
| 2 |
-
pm25,1,0.602,-0.239,0.397,0.044,-0.293,-0.434,-0.38,-0.293,-0.419,-0.374,-0.361,0.177,-0.224,-0.24,-0.241,0.103,0.099,-0.333,-0.243,-0.225,0.274,-0.106,-0.599,-0.223,-0.224,-0.209,0.053,0.07
|
| 3 |
-
pm10,0.602,1,-0.146,0.504,0.085,-0.114,-0.245,-0.191,-0.116,-0.227,-0.184,-0.212,0.018,-0.182,-0.252,-0.229,0.052,0.036,-0.251,-0.199,-0.157,0.246,-0.175,-0.356,-0.097,-0.097,-0.074,0.113,0.02
|
| 4 |
-
o3,-0.239,-0.146,1,-0.505,0.001,0.599,0.412,0.555,0.577,0.42,0.533,0.356,-0.587,-0.048,-0.166,-0.133,-0.051,-0.053,-0.006,-0.018,-0.062,0.036,-0.275,0.418,0.636,0.636,0.608,0.073,0.029
|
| 5 |
-
no2,0.397,0.504,-0.505,1,0.012,-0.384,-0.453,-0.444,-0.377,-0.43,-0.424,-0.379,0.285,-0.113,-0.083,-0.076,0.038,0.039,-0.243,-0.209,-0.038,0.146,-0.088,-0.35,-0.372,-0.372,-0.358,-0.026,0.018
|
| 6 |
-
so2,0.044,0.085,0.001,0.012,1,0.044,0.023,0.037,0.052,0.036,0.046,0.045,-0.003,-0.021,-0.018,-0.022,-0.016,-0.033,-0.019,-0.045,0.082,0.03,-0.008,-0.103,0.038,0.037,0.058,,0.078
|
| 7 |
-
tempmax,-0.293,-0.114,0.599,-0.384,0.044,1,0.828,0.967,0.993,0.861,0.964,0.849,-0.494,-0.063,-0.203,-0.24,-0.165,-0.173,-0.139,-0.172,0,0.045,-0.242,0.353,0.726,0.726,0.711,0.182,0.026
|
| 8 |
-
tempmin,-0.434,-0.245,0.412,-0.453,0.023,0.828,1,0.934,0.829,0.984,0.923,0.935,-0.233,0.117,0.077,-0.002,-0.141,-0.179,0.063,-0.002,0.147,-0.158,0.107,0.284,0.477,0.477,0.459,0.146,0.017
|
| 9 |
-
temp,-0.38,-0.191,0.555,-0.444,0.037,0.967,0.934,1,0.963,0.949,0.993,0.925,-0.412,0.009,-0.09,-0.146,-0.162,-0.186,-0.052,-0.102,0.071,-0.04,-0.097,0.35,0.656,0.657,0.636,0.166,0.021
|
| 10 |
-
feelslikemax,-0.293,-0.116,0.577,-0.377,0.052,0.993,0.829,0.963,1,0.867,0.97,0.857,-0.467,-0.055,-0.197,-0.235,-0.175,-0.19,-0.152,-0.19,0.007,0.049,-0.221,0.337,0.715,0.715,0.703,0.181,0.026
|
| 11 |
-
feelslikemin,-0.419,-0.227,0.42,-0.43,0.036,0.861,0.984,0.949,0.867,1,0.952,0.946,-0.242,0.078,0.026,-0.057,-0.162,-0.192,-0.028,-0.098,0.144,-0.099,0.066,0.281,0.528,0.528,0.512,0.144,0.014
|
| 12 |
-
feelslike,-0.374,-0.184,0.533,-0.424,0.046,0.964,0.923,0.993,0.97,0.952,1,0.927,-0.389,0,-0.104,-0.159,-0.176,-0.198,-0.097,-0.153,0.076,-0.016,-0.096,0.334,0.66,0.66,0.642,0.159,0.019
|
| 13 |
-
dew,-0.361,-0.212,0.356,-0.379,0.045,0.849,0.935,0.925,0.857,0.946,0.927,1,-0.04,0.13,0.072,0.001,-0.15,-0.185,-0.063,-0.128,0.154,-0.12,0.103,0.127,0.43,0.43,0.416,0.156,0.024
|
| 14 |
-
humidity,0.177,0.018,-0.587,0.285,-0.003,-0.494,-0.233,-0.412,-0.467,-0.242,-0.389,-0.04,1,0.285,0.391,0.387,0.076,0.05,-0.065,-0.081,0.155,-0.171,0.472,-0.658,-0.698,-0.698,-0.685,-0.063,0.007
|
| 15 |
-
precip,-0.224,-0.182,-0.048,-0.113,-0.021,-0.063,0.117,0.009,-0.055,0.078,0,0.13,0.285,1,0.48,0.682,0.039,0.001,0.374,0.32,0.12,-0.403,0.278,-0.119,-0.219,-0.219,-0.223,-0.043,-0.021
|
| 16 |
-
precipprob,-0.24,-0.252,-0.166,-0.083,-0.018,-0.203,0.077,-0.09,-0.197,0.026,-0.104,0.072,0.391,0.48,1,0.642,0.069,0.012,0.433,0.37,0.279,-0.476,0.419,-0.113,-0.356,-0.356,-0.355,-0.005,-0.02
|
| 17 |
-
precipcover,-0.241,-0.229,-0.133,-0.076,-0.022,-0.24,-0.002,-0.146,-0.235,-0.057,-0.159,0.001,0.387,0.682,0.642,1,0.101,0.034,0.41,0.335,0.187,-0.466,0.378,-0.175,-0.375,-0.375,-0.387,-0.047,-0.032
|
| 18 |
-
snow,0.103,0.052,-0.051,0.038,-0.016,-0.165,-0.141,-0.162,-0.175,-0.162,-0.176,-0.15,0.076,0.039,0.069,0.101,1,0.346,0.027,0.041,-0.057,-0.061,0.056,-0.096,-0.097,-0.097,-0.102,-0.011,0.034
|
| 19 |
-
snowdepth,0.099,0.036,-0.053,0.039,-0.033,-0.173,-0.179,-0.186,-0.19,-0.192,-0.198,-0.185,0.05,0.001,0.012,0.034,0.346,1,-0.012,0.016,-0.062,-0.007,0.002,-0.042,-0.076,-0.076,-0.075,-0.015,0.016
|
| 20 |
-
windgust,-0.333,-0.251,-0.006,-0.243,-0.019,-0.139,0.063,-0.052,-0.152,-0.028,-0.097,-0.063,-0.065,0.374,0.433,0.41,0.027,-0.012,1,0.907,0.191,-0.418,0.201,0.173,-0.174,-0.174,-0.176,-0.022,-0.021
|
| 21 |
-
windspeed,-0.243,-0.199,-0.018,-0.209,-0.045,-0.172,-0.002,-0.102,-0.19,-0.098,-0.153,-0.128,-0.081,0.32,0.37,0.335,0.041,0.016,0.907,1,0.101,-0.374,0.136,0.148,-0.187,-0.187,-0.184,-0.045,-0.015
|
| 22 |
-
winddir,-0.225,-0.157,-0.062,-0.038,0.082,0,0.147,0.071,0.007,0.144,0.076,0.154,0.155,0.12,0.279,0.187,-0.057,-0.062,0.191,0.101,1,-0.096,0.224,-0.003,-0.029,-0.028,-0.037,0.003,-0.047
|
| 23 |
-
sealevelpressure,0.274,0.246,0.036,0.146,0.03,0.045,-0.158,-0.04,0.049,-0.099,-0.016,-0.12,-0.171,-0.403,-0.476,-0.466,-0.061,-0.007,-0.418,-0.374,-0.096,1,-0.325,-0.045,0.216,0.216,0.213,-0.032,-0.005
|
| 24 |
-
cloudcover,-0.106,-0.175,-0.275,-0.088,-0.008,-0.242,0.107,-0.097,-0.221,0.066,-0.096,0.103,0.472,0.278,0.419,0.378,0.056,0.002,0.201,0.136,0.224,-0.325,1,-0.208,-0.452,-0.452,-0.438,-0.063,-0.026
|
| 25 |
-
visibility,-0.599,-0.356,0.418,-0.35,-0.103,0.353,0.284,0.35,0.337,0.281,0.334,0.127,-0.658,-0.119,-0.113,-0.175,-0.096,-0.042,0.173,0.148,-0.003,-0.045,-0.208,1,0.45,0.45,0.447,0.033,-0.023
|
| 26 |
-
solarradiation,-0.223,-0.097,0.636,-0.372,0.038,0.726,0.477,0.656,0.715,0.528,0.66,0.43,-0.698,-0.219,-0.356,-0.375,-0.097,-0.076,-0.174,-0.187,-0.029,0.216,-0.452,0.45,1,1,0.965,0.118,0.007
|
| 27 |
-
solarenergy,-0.224,-0.097,0.636,-0.372,0.037,0.726,0.477,0.657,0.715,0.528,0.66,0.43,-0.698,-0.219,-0.356,-0.375,-0.097,-0.076,-0.174,-0.187,-0.028,0.216,-0.452,0.45,1,1,0.965,0.119,0.007
|
| 28 |
-
uvindex,-0.209,-0.074,0.608,-0.358,0.058,0.711,0.459,0.636,0.703,0.512,0.642,0.416,-0.685,-0.223,-0.355,-0.387,-0.102,-0.075,-0.176,-0.184,-0.037,0.213,-0.438,0.447,0.965,0.965,1,0.115,0.004
|
| 29 |
-
severerisk,0.053,0.113,0.073,-0.026,,0.182,0.146,0.166,0.181,0.144,0.159,0.156,-0.063,-0.043,-0.005,-0.047,-0.011,-0.015,-0.022,-0.045,0.003,-0.032,-0.063,0.033,0.118,0.119,0.115,1,0.023
|
| 30 |
-
moonphase,0.07,0.02,0.029,0.018,0.078,0.026,0.017,0.021,0.026,0.014,0.019,0.024,0.007,-0.021,-0.02,-0.032,0.034,0.016,-0.021,-0.015,-0.047,-0.005,-0.026,-0.023,0.007,0.007,0.004,0.023,1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{notebooks/mlartifacts/803928810609079892/e684490d78f54fcf8b51f3a2b8018698 β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e}/artifacts/estimator.html
RENAMED
|
@@ -409,7 +409,7 @@ div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,
|
|
| 409 |
/* fitted */
|
| 410 |
background-color: var(--sklearn-color-fitted-level-3);
|
| 411 |
}
|
| 412 |
-
</style><div id="sk-container-id-2" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>
|
| 413 |
</body>
|
| 414 |
</html>
|
| 415 |
|
|
|
|
| 409 |
/* fitted */
|
| 410 |
background-color: var(--sklearn-color-fitted-level-3);
|
| 411 |
}
|
| 412 |
+
</style><div id="sk-container-id-2" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>RandomForestRegressor(max_depth=34)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-2" type="checkbox" checked><label for="sk-estimator-id-2" class="sk-toggleable__label fitted sk-toggleable__label-arrow fitted"> RandomForestRegressor<a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.5/modules/generated/sklearn.ensemble.RandomForestRegressor.html">?<span>Documentation for RandomForestRegressor</span></a><span class="sk-estimator-doc-link fitted">i<span>Fitted</span></span></label><div class="sk-toggleable__content fitted"><pre>RandomForestRegressor(max_depth=34)</pre></div> </div></div></div></div>
|
| 413 |
</body>
|
| 414 |
</html>
|
| 415 |
|
{notebooks/mlartifacts/475209732522917118/d6de58a8b1b9445a8da3f306598e1754 β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e}/artifacts/model/MLmodel
RENAMED
|
@@ -14,12 +14,12 @@ flavors:
|
|
| 14 |
serialization_format: cloudpickle
|
| 15 |
sklearn_version: 1.5.2
|
| 16 |
mlflow_version: 2.16.2
|
| 17 |
-
model_size_bytes:
|
| 18 |
-
model_uuid:
|
| 19 |
-
run_id:
|
| 20 |
signature:
|
| 21 |
inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 33]}}]'
|
| 22 |
outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1,
|
| 23 |
6]}}]'
|
| 24 |
params: null
|
| 25 |
-
utc_time_created: '2024-09-29
|
|
|
|
| 14 |
serialization_format: cloudpickle
|
| 15 |
sklearn_version: 1.5.2
|
| 16 |
mlflow_version: 2.16.2
|
| 17 |
+
model_size_bytes: 34743349
|
| 18 |
+
model_uuid: 2ece0aa5720e4d60a908faa9d0e3d000
|
| 19 |
+
run_id: 5479a322736e4663944d983720a6648e
|
| 20 |
signature:
|
| 21 |
inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 33]}}]'
|
| 22 |
outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1,
|
| 23 |
6]}}]'
|
| 24 |
params: null
|
| 25 |
+
utc_time_created: '2024-09-29 20:35:11.167642'
|
{notebooks/mlartifacts/475209732522917118/6bfd7856e3624d38aadfd33e3fc79343/artifacts/best_model β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e/artifacts/model}/conda.yaml
RENAMED
|
File without changes
|
{notebooks/mlartifacts/475209732522917118/6bfd7856e3624d38aadfd33e3fc79343/artifacts/best_model β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e/artifacts/model}/model.pkl
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bf0697af096e259e722ff4149548875d48514deded04ec7fac97f0eca1d5b20
|
| 3 |
+
size 34743349
|
{notebooks/mlartifacts/475209732522917118/6bfd7856e3624d38aadfd33e3fc79343/artifacts/best_model β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e/artifacts/model}/python_env.yaml
RENAMED
|
File without changes
|
{notebooks/mlartifacts/475209732522917118/6bfd7856e3624d38aadfd33e3fc79343/artifacts/best_model β mlartifacts/149819317988706962/5479a322736e4663944d983720a6648e/artifacts/model}/requirements.txt
RENAMED
|
File without changes
|
{notebooks/mlartifacts/803928810609079892/bb1075c63ad14a51ba070d2a3e945f22 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/estimator.html
RENAMED
|
@@ -409,7 +409,7 @@ div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,
|
|
| 409 |
/* fitted */
|
| 410 |
background-color: var(--sklearn-color-fitted-level-3);
|
| 411 |
}
|
| 412 |
-
</style><div id="sk-container-id-2" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>
|
| 413 |
</body>
|
| 414 |
</html>
|
| 415 |
|
|
|
|
| 409 |
/* fitted */
|
| 410 |
background-color: var(--sklearn-color-fitted-level-3);
|
| 411 |
}
|
| 412 |
+
</style><div id="sk-container-id-2" class="sk-top-container"><div class="sk-text-repr-fallback"><pre>RandomForestRegressor(max_depth=34)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class="sk-container" hidden><div class="sk-item"><div class="sk-estimator fitted sk-toggleable"><input class="sk-toggleable__control sk-hidden--visually" id="sk-estimator-id-2" type="checkbox" checked><label for="sk-estimator-id-2" class="sk-toggleable__label fitted sk-toggleable__label-arrow fitted"> RandomForestRegressor<a class="sk-estimator-doc-link fitted" rel="noreferrer" target="_blank" href="https://scikit-learn.org/1.5/modules/generated/sklearn.ensemble.RandomForestRegressor.html">?<span>Documentation for RandomForestRegressor</span></a><span class="sk-estimator-doc-link fitted">i<span>Fitted</span></span></label><div class="sk-toggleable__content fitted"><pre>RandomForestRegressor(max_depth=34)</pre></div> </div></div></div></div>
|
| 413 |
</body>
|
| 414 |
</html>
|
| 415 |
|
{notebooks/mlartifacts/475209732522917118/e8a145a55c094cdc9e55c7b9d5a89bf5 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/model/MLmodel
RENAMED
|
@@ -14,12 +14,12 @@ flavors:
|
|
| 14 |
serialization_format: cloudpickle
|
| 15 |
sklearn_version: 1.5.2
|
| 16 |
mlflow_version: 2.16.2
|
| 17 |
-
model_size_bytes:
|
| 18 |
-
model_uuid:
|
| 19 |
-
run_id:
|
| 20 |
signature:
|
| 21 |
inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 33]}}]'
|
| 22 |
outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1,
|
| 23 |
6]}}]'
|
| 24 |
params: null
|
| 25 |
-
utc_time_created: '2024-09-29
|
|
|
|
| 14 |
serialization_format: cloudpickle
|
| 15 |
sklearn_version: 1.5.2
|
| 16 |
mlflow_version: 2.16.2
|
| 17 |
+
model_size_bytes: 34762837
|
| 18 |
+
model_uuid: d6a4b42658cb4824b3125c7ec63d1ed4
|
| 19 |
+
run_id: d256fba7a7fe43c39749afef37154210
|
| 20 |
signature:
|
| 21 |
inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 33]}}]'
|
| 22 |
outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1,
|
| 23 |
6]}}]'
|
| 24 |
params: null
|
| 25 |
+
utc_time_created: '2024-09-29 20:41:11.947759'
|
{notebooks/mlartifacts/475209732522917118/d6de58a8b1b9445a8da3f306598e1754 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/model/conda.yaml
RENAMED
|
File without changes
|
{notebooks/mlartifacts/475209732522917118/d6de58a8b1b9445a8da3f306598e1754 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/model/model.pkl
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72ba8a18da708a814986b0edbe6b5a4a7e931fea667acabeb4040aee0c038e02
|
| 3 |
+
size 34762837
|
{notebooks/mlartifacts/475209732522917118/d6de58a8b1b9445a8da3f306598e1754 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/model/python_env.yaml
RENAMED
|
File without changes
|
{notebooks/mlartifacts/475209732522917118/d6de58a8b1b9445a8da3f306598e1754 β mlartifacts/149819317988706962/d256fba7a7fe43c39749afef37154210}/artifacts/model/requirements.txt
RENAMED
|
File without changes
|
mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf/artifacts/feature_importance_weight.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"f0": 30335.0, "f1": 12283.0, "f2": 8903.0, "f3": 6526.0, "f4": 8117.0, "f5": 7541.0, "f6": 6146.0, "f7": 6050.0, "f8": 3123.0, "f9": 3844.0, "f10": 6510.0, "f11": 4622.0, "f12": 3717.0, "f13": 3799.0, "f14": 2906.0, "f15": 3587.0, "f16": 3904.0, "f17": 4225.0, "f18": 3999.0, "f19": 2503.0, "f20": 2764.0, "f21": 4179.0, "f22": 4006.0, "f23": 3549.0, "f24": 3621.0, "f25": 2669.0, "f26": 2954.0, "f27": 3504.0, "f28": 3645.0, "f29": 3696.0, "f30": 2283.0, "f31": 2338.0, "f32": 3725.0}
|
mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf/artifacts/feature_importance_weight.png
ADDED
|
{notebooks/mlartifacts/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf}/artifacts/model/MLmodel
RENAMED
|
@@ -14,12 +14,12 @@ flavors:
|
|
| 14 |
model_format: xgb
|
| 15 |
xgb_version: 2.1.1
|
| 16 |
mlflow_version: 2.16.2
|
| 17 |
-
model_size_bytes:
|
| 18 |
-
model_uuid:
|
| 19 |
-
run_id:
|
| 20 |
signature:
|
| 21 |
inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 33]}}]'
|
| 22 |
outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1,
|
| 23 |
6]}}]'
|
| 24 |
params: null
|
| 25 |
-
utc_time_created: '2024-09-29
|
|
|
|
| 14 |
model_format: xgb
|
| 15 |
xgb_version: 2.1.1
|
| 16 |
mlflow_version: 2.16.2
|
| 17 |
+
model_size_bytes: 12354728
|
| 18 |
+
model_uuid: ae7825e16b794647905010b3fabdf5ee
|
| 19 |
+
run_id: 2ad059c5d4704ed088a288d572818bcf
|
| 20 |
signature:
|
| 21 |
inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 33]}}]'
|
| 22 |
outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1,
|
| 23 |
6]}}]'
|
| 24 |
params: null
|
| 25 |
+
utc_time_created: '2024-09-29 20:34:46.696536'
|
{notebooks/mlartifacts/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf}/artifacts/model/conda.yaml
RENAMED
|
File without changes
|
{notebooks/mlartifacts/588532547813609546/4e8ce91d81c549cf80846c249e959c20 β mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf}/artifacts/model/model.xgb
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c823873e6e051da216bde5618c3db8390d1cdc8e1edd143c59272d69548fc05f
|
| 3 |
+
size 12354728
|
{notebooks/mlartifacts/475209732522917118/e8a145a55c094cdc9e55c7b9d5a89bf5 β mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf}/artifacts/model/python_env.yaml
RENAMED
|
File without changes
|
{notebooks/mlartifacts/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlartifacts/674375719018272828/2ad059c5d4704ed088a288d572818bcf}/artifacts/model/requirements.txt
RENAMED
|
File without changes
|
mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c/artifacts/feature_importance_weight.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"f0": 30335.0, "f1": 12283.0, "f2": 8903.0, "f3": 6526.0, "f4": 8117.0, "f5": 7541.0, "f6": 6146.0, "f7": 6050.0, "f8": 3123.0, "f9": 3844.0, "f10": 6510.0, "f11": 4622.0, "f12": 3717.0, "f13": 3799.0, "f14": 2906.0, "f15": 3587.0, "f16": 3904.0, "f17": 4225.0, "f18": 3999.0, "f19": 2503.0, "f20": 2764.0, "f21": 4179.0, "f22": 4006.0, "f23": 3549.0, "f24": 3621.0, "f25": 2669.0, "f26": 2954.0, "f27": 3504.0, "f28": 3645.0, "f29": 3696.0, "f30": 2283.0, "f31": 2338.0, "f32": 3725.0}
|
mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c/artifacts/feature_importance_weight.png
ADDED
|
{notebooks/mlartifacts/588532547813609546/4e8ce91d81c549cf80846c249e959c20 β mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c}/artifacts/model/MLmodel
RENAMED
|
@@ -14,12 +14,12 @@ flavors:
|
|
| 14 |
model_format: xgb
|
| 15 |
xgb_version: 2.1.1
|
| 16 |
mlflow_version: 2.16.2
|
| 17 |
-
model_size_bytes:
|
| 18 |
-
model_uuid:
|
| 19 |
-
run_id:
|
| 20 |
signature:
|
| 21 |
inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 33]}}]'
|
| 22 |
outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1,
|
| 23 |
6]}}]'
|
| 24 |
params: null
|
| 25 |
-
utc_time_created: '2024-09-29
|
|
|
|
| 14 |
model_format: xgb
|
| 15 |
xgb_version: 2.1.1
|
| 16 |
mlflow_version: 2.16.2
|
| 17 |
+
model_size_bytes: 12354728
|
| 18 |
+
model_uuid: fa950ae2169a47128401ab637dab6f5c
|
| 19 |
+
run_id: 613e592c317949abb0b86f18100a354c
|
| 20 |
signature:
|
| 21 |
inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 33]}}]'
|
| 22 |
outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1,
|
| 23 |
6]}}]'
|
| 24 |
params: null
|
| 25 |
+
utc_time_created: '2024-09-29 20:40:34.164593'
|
{notebooks/mlartifacts/588532547813609546/4e8ce91d81c549cf80846c249e959c20 β mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c}/artifacts/model/conda.yaml
RENAMED
|
File without changes
|
{notebooks/mlartifacts/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c}/artifacts/model/model.xgb
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c823873e6e051da216bde5618c3db8390d1cdc8e1edd143c59272d69548fc05f
|
| 3 |
+
size 12354728
|
{notebooks/mlartifacts/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c}/artifacts/model/python_env.yaml
RENAMED
|
File without changes
|
{notebooks/mlartifacts/588532547813609546/4e8ce91d81c549cf80846c249e959c20 β mlartifacts/674375719018272828/613e592c317949abb0b86f18100a354c}/artifacts/model/requirements.txt
RENAMED
|
File without changes
|
mlruns/0/meta.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
artifact_location: file:///C:/Users/User/ML4I/air-quality-forecast/mlruns/0
|
| 2 |
+
creation_time: 1727640956735
|
| 3 |
+
experiment_id: '0'
|
| 4 |
+
last_update_time: 1727640956735
|
| 5 |
+
lifecycle_stage: active
|
| 6 |
+
name: Default
|
{notebooks/mlruns/475209732522917118/d6de58a8b1b9445a8da3f306598e1754/inputs/c129156961c7c72977ff2820a1443bc0 β mlruns/149819317988706962/5479a322736e4663944d983720a6648e/inputs/9f74c11a603ae42026ea171546387a69}/meta.yaml
RENAMED
|
File without changes
|
{notebooks/mlruns/475209732522917118/d6de58a8b1b9445a8da3f306598e1754/inputs/f9b16ea0f807fe917284d4acb7165ee9 β mlruns/149819317988706962/5479a322736e4663944d983720a6648e/inputs/ee9b180d5518cce671940233dfff09ce}/meta.yaml
RENAMED
|
File without changes
|
{notebooks/mlruns/588532547813609546/29a7ce3e5aff4004b017460bf6d2274b β mlruns/149819317988706962/5479a322736e4663944d983720a6648e}/meta.yaml
RENAMED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
-
artifact_uri: mlflow-artifacts:/
|
| 2 |
-
end_time:
|
| 3 |
entry_point_name: ''
|
| 4 |
-
experiment_id: '
|
| 5 |
lifecycle_stage: active
|
| 6 |
-
run_id:
|
| 7 |
-
run_name:
|
| 8 |
-
run_uuid:
|
| 9 |
source_name: ''
|
| 10 |
source_type: 4
|
| 11 |
source_version: ''
|
| 12 |
-
start_time:
|
| 13 |
status: 3
|
| 14 |
tags: []
|
| 15 |
user_id: User
|
|
|
|
| 1 |
+
artifact_uri: mlflow-artifacts:/149819317988706962/5479a322736e4663944d983720a6648e/artifacts
|
| 2 |
+
end_time: 1727642116447
|
| 3 |
entry_point_name: ''
|
| 4 |
+
experiment_id: '149819317988706962'
|
| 5 |
lifecycle_stage: active
|
| 6 |
+
run_id: 5479a322736e4663944d983720a6648e
|
| 7 |
+
run_name: spiffy-penguin-165
|
| 8 |
+
run_uuid: 5479a322736e4663944d983720a6648e
|
| 9 |
source_name: ''
|
| 10 |
source_type: 4
|
| 11 |
source_version: ''
|
| 12 |
+
start_time: 1727642102185
|
| 13 |
status: 3
|
| 14 |
tags: []
|
| 15 |
user_id: User
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/cpu_utilization_percentage
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642112343 47.2 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/disk_available_megabytes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642112343 90980.2 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/disk_usage_megabytes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642112343 420156.9 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/disk_usage_percentage
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642112343 82.2 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/gpu_0_memory_usage_megabytes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642112343 308.2 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/gpu_0_memory_usage_percentage
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642112343 14.4 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/gpu_0_utilization_percentage
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642112343 0.0 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/network_receive_megabytes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642112343 0.0 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/network_transmit_megabytes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642112343 0.0 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/system_memory_usage_megabytes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642112343 11362.1 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/system/system_memory_usage_percentage
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642112343 66.9 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/training_mean_absolute_error
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642111000 1.4734425695577313 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/training_mean_squared_error
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642111000 4.507790763032156 0
|
mlruns/149819317988706962/5479a322736e4663944d983720a6648e/metrics/training_r2_score
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1727642111000 0.9340423340912295 0
|