Spaces:
Runtime error
Runtime error
Pipeline updates
Browse files- .dvc/config +2 -0
- dvc.yaml +0 -1
- src/data/make_dataset.py +8 -6
- src/models/model.py +3 -3
.dvc/config
CHANGED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
['remote "origin"']
|
| 2 |
+
url = https://dagshub.com/gagan3012/summarization.dvc
|
dvc.yaml
CHANGED
|
@@ -22,7 +22,6 @@ stages:
|
|
| 22 |
process_data:
|
| 23 |
cmd: python src/data/make_dataset.py
|
| 24 |
deps:
|
| 25 |
-
- data/raw
|
| 26 |
- src/data/make_dataset.py
|
| 27 |
outs:
|
| 28 |
- data/processed:
|
|
|
|
| 22 |
process_data:
|
| 23 |
cmd: python src/data/make_dataset.py
|
| 24 |
deps:
|
|
|
|
| 25 |
- src/data/make_dataset.py
|
| 26 |
outs:
|
| 27 |
- data/processed:
|
src/data/make_dataset.py
CHANGED
|
@@ -2,14 +2,16 @@ from datasets import load_dataset
|
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
|
| 5 |
-
def make_dataset(dataset='cnn_dailymail', split='train'
|
| 6 |
"""make dataset for summarisation"""
|
| 7 |
-
dataset = load_dataset(dataset, split=split
|
| 8 |
df = pd.DataFrame()
|
| 9 |
-
df['input_text'] = dataset['
|
| 10 |
-
df['output_text'] = dataset['
|
| 11 |
-
|
| 12 |
|
| 13 |
|
| 14 |
if __name__ == '__main__':
|
| 15 |
-
make_dataset(dataset='cnn_dailymail', split='train'
|
|
|
|
|
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
|
| 5 |
+
def make_dataset(dataset='cnn_dailymail', split='train'):
|
| 6 |
"""make dataset for summarisation"""
|
| 7 |
+
dataset = load_dataset(dataset, '3.0.0', split=split)
|
| 8 |
df = pd.DataFrame()
|
| 9 |
+
df['input_text'] = dataset['article']
|
| 10 |
+
df['output_text'] = dataset['highlights']
|
| 11 |
+
df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split, split))
|
| 12 |
|
| 13 |
|
| 14 |
if __name__ == '__main__':
|
| 15 |
+
make_dataset(dataset='cnn_dailymail', split='train')
|
| 16 |
+
make_dataset(dataset='cnn_dailymail', split='test')
|
| 17 |
+
make_dataset(dataset='cnn_dailymail', split='validation')
|
src/models/model.py
CHANGED
|
@@ -303,9 +303,9 @@ class Summarization:
|
|
| 303 |
tokenizer=self.tokenizer, model=self.model, output=outputdir
|
| 304 |
)
|
| 305 |
|
| 306 |
-
|
| 307 |
|
| 308 |
-
logger = DAGsHubLogger()
|
| 309 |
|
| 310 |
early_stop_callback = (
|
| 311 |
[
|
|
@@ -324,7 +324,7 @@ class Summarization:
|
|
| 324 |
gpus = 1 if use_gpu else 0
|
| 325 |
|
| 326 |
trainer = Trainer(
|
| 327 |
-
logger=logger,
|
| 328 |
callbacks=early_stop_callback,
|
| 329 |
max_epochs=max_epochs,
|
| 330 |
gpus=gpus,
|
|
|
|
| 303 |
tokenizer=self.tokenizer, model=self.model, output=outputdir
|
| 304 |
)
|
| 305 |
|
| 306 |
+
MLlogger = MLFlowLogger(experiment_name="Summarization",tracking_uri="https://dagshub.com/gagan3012/summarization.mlflow")
|
| 307 |
|
| 308 |
+
logger = DAGsHubLogger(metrics_path='reports/metrics.txt')
|
| 309 |
|
| 310 |
early_stop_callback = (
|
| 311 |
[
|
|
|
|
| 324 |
gpus = 1 if use_gpu else 0
|
| 325 |
|
| 326 |
trainer = Trainer(
|
| 327 |
+
logger=[logger,MLlogger],
|
| 328 |
callbacks=early_stop_callback,
|
| 329 |
max_epochs=max_epochs,
|
| 330 |
gpus=gpus,
|