Spaces:
Runtime error
Runtime error
Dean
commited on
Commit
·
7e3c514
1
Parent(s):
c9eec48
Starting to apply fixes for the project to latest version
Browse files- .gitignore +0 -1
- data.dvc +0 -14
- dvc.lock +29 -12
- dvc.yaml +11 -2
- requirements.txt +0 -1
- src/models/evaluate_model.py +3 -3
- src/models/model.py +5 -7
- src/models/train_model.py +0 -8
.gitignore
CHANGED
|
@@ -93,6 +93,5 @@ coverage.xml
|
|
| 93 |
.vscode
|
| 94 |
/data
|
| 95 |
|
| 96 |
-
wandb/
|
| 97 |
summarization-dagshub/
|
| 98 |
/models
|
|
|
|
| 93 |
.vscode
|
| 94 |
/data
|
| 95 |
|
|
|
|
| 96 |
summarization-dagshub/
|
| 97 |
/models
|
data.dvc
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
deps:
|
| 2 |
-
- path: params.yml
|
| 3 |
-
md5: d0f3e81bc9191e752a69761045a449d9
|
| 4 |
-
size: 196
|
| 5 |
-
- path: src/data/make_dataset.py
|
| 6 |
-
md5: 9de71de0f8df5d0a7beb235ef7c7777d
|
| 7 |
-
size: 772
|
| 8 |
-
cmd: python src/data/make_dataset.py
|
| 9 |
-
outs:
|
| 10 |
-
- md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
|
| 11 |
-
nfiles: 3
|
| 12 |
-
path: data/raw
|
| 13 |
-
size: 1358833013
|
| 14 |
-
md5: ff502232006c7fbef1015b5aa5cc4bbb
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dvc.lock
CHANGED
|
@@ -10,19 +10,22 @@ stages:
|
|
| 10 |
md5: 0900e2bb330df94cb045faddd0b945d1
|
| 11 |
size: 1138285
|
| 12 |
- path: params.yml
|
| 13 |
-
md5:
|
| 14 |
-
size:
|
| 15 |
- path: src/models/train_model.py
|
| 16 |
-
md5:
|
| 17 |
-
size:
|
| 18 |
outs:
|
| 19 |
- path: models
|
| 20 |
-
md5:
|
| 21 |
-
size:
|
| 22 |
-
nfiles:
|
| 23 |
-
- path: reports/training_metrics.
|
| 24 |
-
md5:
|
| 25 |
-
size:
|
|
|
|
|
|
|
|
|
|
| 26 |
eval:
|
| 27 |
cmd: python src/models/evaluate_model.py
|
| 28 |
deps:
|
|
@@ -51,8 +54,8 @@ stages:
|
|
| 51 |
size: 0
|
| 52 |
nfiles: 0
|
| 53 |
- path: params.yml
|
| 54 |
-
md5:
|
| 55 |
-
size:
|
| 56 |
- path: src/data/process_data.py
|
| 57 |
md5: ba3ba7b7c8a905b736b6b0a28d2334c4
|
| 58 |
size: 623
|
|
@@ -66,3 +69,17 @@ stages:
|
|
| 66 |
- path: data/processed/validation.csv
|
| 67 |
md5: 0900e2bb330df94cb045faddd0b945d1
|
| 68 |
size: 1138285
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
md5: 0900e2bb330df94cb045faddd0b945d1
|
| 11 |
size: 1138285
|
| 12 |
- path: params.yml
|
| 13 |
+
md5: 8ac76f9483ae2d78cf89a2e2be4e8446
|
| 14 |
+
size: 189
|
| 15 |
- path: src/models/train_model.py
|
| 16 |
+
md5: d57b5ff84bc29a8ea75e191027d70148
|
| 17 |
+
size: 988
|
| 18 |
outs:
|
| 19 |
- path: models
|
| 20 |
+
md5: b8dd7baa6b7b85a7b4c2fcfbe3d831bf.dir
|
| 21 |
+
size: 243476333
|
| 22 |
+
nfiles: 5
|
| 23 |
+
- path: reports/training_metrics.csv
|
| 24 |
+
md5: f0c89a07561ca8aea8ab3f4764b648e7
|
| 25 |
+
size: 26
|
| 26 |
+
- path: reports/training_params.yml
|
| 27 |
+
md5: 8a80554c91d9fca8acb82f023de02f11
|
| 28 |
+
size: 3
|
| 29 |
eval:
|
| 30 |
cmd: python src/models/evaluate_model.py
|
| 31 |
deps:
|
|
|
|
| 54 |
size: 0
|
| 55 |
nfiles: 0
|
| 56 |
- path: params.yml
|
| 57 |
+
md5: 8ac76f9483ae2d78cf89a2e2be4e8446
|
| 58 |
+
size: 189
|
| 59 |
- path: src/data/process_data.py
|
| 60 |
md5: ba3ba7b7c8a905b736b6b0a28d2334c4
|
| 61 |
size: 623
|
|
|
|
| 69 |
- path: data/processed/validation.csv
|
| 70 |
md5: 0900e2bb330df94cb045faddd0b945d1
|
| 71 |
size: 1138285
|
| 72 |
+
download_data:
|
| 73 |
+
cmd: python src/data/make_dataset.py
|
| 74 |
+
deps:
|
| 75 |
+
- path: params.yml
|
| 76 |
+
md5: 8ac76f9483ae2d78cf89a2e2be4e8446
|
| 77 |
+
size: 189
|
| 78 |
+
- path: src/data/make_dataset.py
|
| 79 |
+
md5: 9de71de0f8df5d0a7beb235ef7c7777d
|
| 80 |
+
size: 772
|
| 81 |
+
outs:
|
| 82 |
+
- path: data/raw
|
| 83 |
+
md5: 2ab20ac1b58df875a590b07d0e04eb5b.dir
|
| 84 |
+
size: 1358833013
|
| 85 |
+
nfiles: 3
|
dvc.yaml
CHANGED
|
@@ -1,4 +1,11 @@
|
|
| 1 |
stages:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
process_data:
|
| 3 |
cmd: python src/data/process_data.py
|
| 4 |
deps:
|
|
@@ -25,8 +32,10 @@ stages:
|
|
| 25 |
outs:
|
| 26 |
- models:
|
| 27 |
persist: true
|
|
|
|
|
|
|
| 28 |
metrics:
|
| 29 |
-
- reports/training_metrics.
|
| 30 |
cache: false
|
| 31 |
eval:
|
| 32 |
cmd: python src/models/evaluate_model.py
|
|
@@ -36,6 +45,6 @@ stages:
|
|
| 36 |
- models
|
| 37 |
- src/models/evaluate_model.py
|
| 38 |
metrics:
|
| 39 |
-
- reports/metrics.
|
| 40 |
cache: false
|
| 41 |
|
|
|
|
| 1 |
stages:
|
| 2 |
+
download_data:
|
| 3 |
+
cmd: python src/data/make_dataset.py
|
| 4 |
+
deps:
|
| 5 |
+
- params.yml
|
| 6 |
+
- src/data/make_dataset.py
|
| 7 |
+
outs:
|
| 8 |
+
- data/raw
|
| 9 |
process_data:
|
| 10 |
cmd: python src/data/process_data.py
|
| 11 |
deps:
|
|
|
|
| 32 |
outs:
|
| 33 |
- models:
|
| 34 |
persist: true
|
| 35 |
+
- reports/training_params.yml:
|
| 36 |
+
cache: false
|
| 37 |
metrics:
|
| 38 |
+
- reports/training_metrics.csv:
|
| 39 |
cache: false
|
| 40 |
eval:
|
| 41 |
cmd: python src/models/evaluate_model.py
|
|
|
|
| 45 |
- models
|
| 46 |
- src/models/evaluate_model.py
|
| 47 |
metrics:
|
| 48 |
+
- reports/metrics.csv:
|
| 49 |
cache: false
|
| 50 |
|
requirements.txt
CHANGED
|
@@ -9,7 +9,6 @@ rouge_score
|
|
| 9 |
pyyaml
|
| 10 |
dvc
|
| 11 |
mlflow
|
| 12 |
-
wandb
|
| 13 |
|
| 14 |
# external requirements
|
| 15 |
click
|
|
|
|
| 9 |
pyyaml
|
| 10 |
dvc
|
| 11 |
mlflow
|
|
|
|
| 12 |
|
| 13 |
# external requirements
|
| 14 |
click
|
src/models/evaluate_model.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import
|
| 2 |
import yaml
|
| 3 |
|
| 4 |
from model import Summarization
|
|
@@ -18,8 +18,8 @@ def evaluate_model():
|
|
| 18 |
model.load_model(model_type=params['model_type'], model_dir=params['model_dir'])
|
| 19 |
results = model.evaluate(test_df=test_df, metrics=params['metric'])
|
| 20 |
|
| 21 |
-
with
|
| 22 |
-
|
| 23 |
|
| 24 |
|
| 25 |
if __name__ == '__main__':
|
|
|
|
| 1 |
+
from dagshub import dagshub_logger
|
| 2 |
import yaml
|
| 3 |
|
| 4 |
from model import Summarization
|
|
|
|
| 18 |
model.load_model(model_type=params['model_type'], model_dir=params['model_dir'])
|
| 19 |
results = model.evaluate(test_df=test_df, metrics=params['metric'])
|
| 20 |
|
| 21 |
+
with dagshub_logger(should_log_hparams=False) as logger:
|
| 22 |
+
logger.log_metrics(results)
|
| 23 |
|
| 24 |
|
| 25 |
if __name__ == '__main__':
|
src/models/model.py
CHANGED
|
@@ -7,7 +7,8 @@ from transformers import (
|
|
| 7 |
)
|
| 8 |
from torch.utils.data import Dataset, DataLoader
|
| 9 |
import pytorch_lightning as pl
|
| 10 |
-
from pytorch_lightning.loggers import MLFlowLogger
|
|
|
|
| 11 |
from pytorch_lightning import Trainer
|
| 12 |
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
|
| 13 |
from pytorch_lightning import LightningDataModule
|
|
@@ -15,8 +16,6 @@ from pytorch_lightning import LightningModule
|
|
| 15 |
from datasets import load_metric
|
| 16 |
from tqdm.auto import tqdm
|
| 17 |
|
| 18 |
-
# from dagshub.pytorch_lightning import DAGsHubLogger
|
| 19 |
-
|
| 20 |
|
| 21 |
torch.cuda.empty_cache()
|
| 22 |
pl.seed_everything(42)
|
|
@@ -330,9 +329,8 @@ class Summarization:
|
|
| 330 |
MLlogger = MLFlowLogger(experiment_name="Summarization",
|
| 331 |
tracking_uri="https://dagshub.com/gagan3012/summarization.mlflow")
|
| 332 |
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
# logger = DAGsHubLogger(metrics_path='reports/training_metrics.txt')
|
| 336 |
|
| 337 |
early_stop_callback = (
|
| 338 |
[
|
|
@@ -351,7 +349,7 @@ class Summarization:
|
|
| 351 |
gpus = -1 if use_gpu and torch.cuda.is_available() else 0
|
| 352 |
|
| 353 |
trainer = Trainer(
|
| 354 |
-
logger=[
|
| 355 |
callbacks=early_stop_callback,
|
| 356 |
max_epochs=max_epochs,
|
| 357 |
gpus=gpus,
|
|
|
|
| 7 |
)
|
| 8 |
from torch.utils.data import Dataset, DataLoader
|
| 9 |
import pytorch_lightning as pl
|
| 10 |
+
from pytorch_lightning.loggers import MLFlowLogger
|
| 11 |
+
from dagshub.pytorch_lightning import DAGsHubLogger
|
| 12 |
from pytorch_lightning import Trainer
|
| 13 |
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
|
| 14 |
from pytorch_lightning import LightningDataModule
|
|
|
|
| 16 |
from datasets import load_metric
|
| 17 |
from tqdm.auto import tqdm
|
| 18 |
|
|
|
|
|
|
|
| 19 |
|
| 20 |
torch.cuda.empty_cache()
|
| 21 |
pl.seed_everything(42)
|
|
|
|
| 329 |
MLlogger = MLFlowLogger(experiment_name="Summarization",
|
| 330 |
tracking_uri="https://dagshub.com/gagan3012/summarization.mlflow")
|
| 331 |
|
| 332 |
+
logger = DAGsHubLogger(metrics_path='reports/training_metrics.csv',
|
| 333 |
+
hparams_path='reports/training_params.yml')
|
|
|
|
| 334 |
|
| 335 |
early_stop_callback = (
|
| 336 |
[
|
|
|
|
| 349 |
gpus = -1 if use_gpu and torch.cuda.is_available() else 0
|
| 350 |
|
| 351 |
trainer = Trainer(
|
| 352 |
+
logger=[MLlogger, logger],
|
| 353 |
callbacks=early_stop_callback,
|
| 354 |
max_epochs=max_epochs,
|
| 355 |
gpus=gpus,
|
src/models/train_model.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
import json
|
| 2 |
-
|
| 3 |
import yaml
|
| 4 |
|
| 5 |
from model import Summarization
|
|
@@ -30,12 +28,6 @@ def train_model():
|
|
| 30 |
|
| 31 |
model.save_model(model_dir=params['model_dir'])
|
| 32 |
|
| 33 |
-
with open('wandb/latest-run/files/wandb-summary.json') as json_file:
|
| 34 |
-
data = json.load(json_file)
|
| 35 |
-
|
| 36 |
-
with open('reports/training_metrics.txt', 'w') as fp:
|
| 37 |
-
json.dump(data, fp)
|
| 38 |
-
|
| 39 |
|
| 40 |
if __name__ == '__main__':
|
| 41 |
train_model()
|
|
|
|
|
|
|
|
|
|
| 1 |
import yaml
|
| 2 |
|
| 3 |
from model import Summarization
|
|
|
|
| 28 |
|
| 29 |
model.save_model(model_dir=params['model_dir'])
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
if __name__ == '__main__':
|
| 33 |
train_model()
|