Spaces:
Runtime error
Runtime error
split added
Browse files- src/data/make_dataset.py +5 -1
src/data/make_dataset.py
CHANGED
|
@@ -1,15 +1,19 @@
|
|
| 1 |
import yaml
|
| 2 |
from datasets import load_dataset
|
| 3 |
import pandas as pd
|
|
|
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
def make_dataset(dataset='cnn_dailymail', split='train'):
|
| 7 |
"""make dataset for summarisation"""
|
|
|
|
|
|
|
| 8 |
dataset = load_dataset(dataset, '3.0.0', split=split)
|
| 9 |
df = pd.DataFrame()
|
| 10 |
df['article'] = dataset['article']
|
| 11 |
df['highlights'] = dataset['highlights']
|
| 12 |
-
|
| 13 |
|
| 14 |
|
| 15 |
if __name__ == '__main__':
|
|
|
|
| 1 |
import yaml
|
| 2 |
from datasets import load_dataset
|
| 3 |
import pandas as pd
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
|
| 7 |
|
| 8 |
def make_dataset(dataset='cnn_dailymail', split='train'):
|
| 9 |
"""make dataset for summarisation"""
|
| 10 |
+
if not os.path.exists('data/raw'):
|
| 11 |
+
os.makedirs('data/raw')
|
| 12 |
dataset = load_dataset(dataset, '3.0.0', split=split)
|
| 13 |
df = pd.DataFrame()
|
| 14 |
df['article'] = dataset['article']
|
| 15 |
df['highlights'] = dataset['highlights']
|
| 16 |
+
df.to_csv('data/raw/{}.csv'.format(split))
|
| 17 |
|
| 18 |
|
| 19 |
if __name__ == '__main__':
|