Spaces:
Runtime error
Runtime error
Fixes and updates
Browse files- app.py +14 -1
- setup.py +1 -1
- src/data/process_data.py +9 -10
- src/models/predict_model.py +1 -1
- t5s/cli.py +13 -13
app.py
CHANGED
|
@@ -1,6 +1,19 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
def visualize():
|
| 6 |
st.write("# Summarization UI")
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import yaml
|
| 3 |
|
| 4 |
+
from src.models.model import Summarization
|
| 5 |
+
|
| 6 |
+
def predict_model(text: str):
|
| 7 |
+
"""
|
| 8 |
+
Predict the summary of the given text.
|
| 9 |
+
"""
|
| 10 |
+
with open("model_params.yml") as f:
|
| 11 |
+
params = yaml.safe_load(f)
|
| 12 |
+
|
| 13 |
+
model = Summarization()
|
| 14 |
+
model.load_model(model_type=params["model_type"], model_dir="gagan3012/summarsiation")
|
| 15 |
+
pre_summary = model.predict(text)
|
| 16 |
+
return pre_summary
|
| 17 |
|
| 18 |
def visualize():
|
| 19 |
st.write("# Summarization UI")
|
setup.py
CHANGED
|
@@ -12,7 +12,7 @@ with open('requirements.txt') as f:
|
|
| 12 |
setup(
|
| 13 |
name='t5s',
|
| 14 |
packages=find_packages(include=['t5s*']),
|
| 15 |
-
version='2.0.
|
| 16 |
description="T5 Summarisation Using Pytorch Lightning",
|
| 17 |
license='MIT License',
|
| 18 |
classifiers=[
|
|
|
|
| 12 |
setup(
|
| 13 |
name='t5s',
|
| 14 |
packages=find_packages(include=['t5s*']),
|
| 15 |
+
version='2.0.5',
|
| 16 |
description="T5 Summarisation Using Pytorch Lightning",
|
| 17 |
license='MIT License',
|
| 18 |
classifiers=[
|
src/data/process_data.py
CHANGED
|
@@ -2,18 +2,17 @@ import pandas as pd
|
|
| 2 |
import yaml
|
| 3 |
|
| 4 |
|
| 5 |
-
def process_data(split="train"):
|
| 6 |
-
|
| 7 |
-
with open("data_params.yml") as f:
|
| 8 |
-
params = yaml.safe_load(f)
|
| 9 |
-
|
| 10 |
df = pd.read_csv("data/raw/{}.csv".format(split))
|
| 11 |
df.columns = ["Unnamed: 0", "input_text", "output_text"]
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
|
| 16 |
if __name__ == "__main__":
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import yaml
|
| 3 |
|
| 4 |
|
| 5 |
+
def process_data(frac=0.1, split="train"):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
df = pd.read_csv("data/raw/{}.csv".format(split))
|
| 7 |
df.columns = ["Unnamed: 0", "input_text", "output_text"]
|
| 8 |
+
df_new = df.sample(frac=frac, replace=True, random_state=1)
|
| 9 |
+
df_new.to_csv("data/processed/{}.csv".format(split))
|
| 10 |
|
| 11 |
|
| 12 |
if __name__ == "__main__":
|
| 13 |
+
with open("data_params.yml") as f:
|
| 14 |
+
params = yaml.safe_load(f)
|
| 15 |
+
|
| 16 |
+
process_data(frac=params['split'], split="train")
|
| 17 |
+
process_data(frac=params['split'], split="test")
|
| 18 |
+
process_data(frac=params['split'], split="validation")
|
src/models/predict_model.py
CHANGED
|
@@ -11,6 +11,6 @@ def predict_model(text: str):
|
|
| 11 |
params = yaml.safe_load(f)
|
| 12 |
|
| 13 |
model = Summarization()
|
| 14 |
-
model.load_model(model_type=params["model_type"], model_dir="
|
| 15 |
pre_summary = model.predict(text)
|
| 16 |
return pre_summary
|
|
|
|
| 11 |
params = yaml.safe_load(f)
|
| 12 |
|
| 13 |
model = Summarization()
|
| 14 |
+
model.load_model(model_type=params["model_type"], model_dir=params["model_dir"])
|
| 15 |
pre_summary = model.predict(text)
|
| 16 |
return pre_summary
|
t5s/cli.py
CHANGED
|
@@ -22,16 +22,16 @@ parser_start.add_argument(
|
|
| 22 |
"-d",
|
| 23 |
"--dataset",
|
| 24 |
default="cnn_dailymail",
|
| 25 |
-
help="Enter the name of the dataset to be used",type=str
|
| 26 |
)
|
| 27 |
|
| 28 |
-
parser_start.add_argument("-s", "--split", default=0.001, help="Enter the split required",type=float)
|
| 29 |
|
| 30 |
parser_start.add_argument(
|
| 31 |
"-n", "--name", default="summarsiation", help="Enter the name of the model"
|
| 32 |
)
|
| 33 |
parser_start.add_argument(
|
| 34 |
-
"-mt", "--model_type", default="t5", help="Enter the model type",type=str
|
| 35 |
)
|
| 36 |
parser_start.add_argument(
|
| 37 |
"-m",
|
|
@@ -113,25 +113,25 @@ class Run(object):
|
|
| 113 |
elif arguments["command"] == "start":
|
| 114 |
os.chdir("./summarization/")
|
| 115 |
print("""
|
| 116 |
-
|
| 117 |
[-m MODEL_NAME] [-e EPOCHS] [-lr LEARNING_RATE]
|
| 118 |
[-b BATCH_SIZE]
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
Enter the name of the dataset to be used
|
| 123 |
-
|
| 124 |
Enter the split required
|
| 125 |
-
|
| 126 |
-
|
| 127 |
Enter the model type
|
| 128 |
-
|
| 129 |
Enter the model to be used eg t5-base
|
| 130 |
-
|
| 131 |
Enter the number of epochs
|
| 132 |
-
|
| 133 |
Enter the number of epochs
|
| 134 |
-
|
| 135 |
Enter the number of batches
|
| 136 |
""")
|
| 137 |
start(arguments=arguments)
|
|
|
|
| 22 |
"-d",
|
| 23 |
"--dataset",
|
| 24 |
default="cnn_dailymail",
|
| 25 |
+
help="Enter the name of the dataset to be used", type=str
|
| 26 |
)
|
| 27 |
|
| 28 |
+
parser_start.add_argument("-s", "--split", default=0.001, help="Enter the split required", type=float)
|
| 29 |
|
| 30 |
parser_start.add_argument(
|
| 31 |
"-n", "--name", default="summarsiation", help="Enter the name of the model"
|
| 32 |
)
|
| 33 |
parser_start.add_argument(
|
| 34 |
+
"-mt", "--model_type", default="t5", help="Enter the model type", type=str
|
| 35 |
)
|
| 36 |
parser_start.add_argument(
|
| 37 |
"-m",
|
|
|
|
| 113 |
elif arguments["command"] == "start":
|
| 114 |
os.chdir("./summarization/")
|
| 115 |
print("""
|
| 116 |
+
usage: t5s start [-h] [-d DATASET] [-s SPLIT] [-n NAME] [-mt MODEL_TYPE]
|
| 117 |
[-m MODEL_NAME] [-e EPOCHS] [-lr LEARNING_RATE]
|
| 118 |
[-b BATCH_SIZE]
|
| 119 |
|
| 120 |
+
-h, --help show this help message and exit
|
| 121 |
+
-d DATASET, --dataset DATASET
|
| 122 |
Enter the name of the dataset to be used
|
| 123 |
+
-s SPLIT, --split SPLIT
|
| 124 |
Enter the split required
|
| 125 |
+
-n NAME, --name NAME Enter the name of the model
|
| 126 |
+
-mt MODEL_TYPE, --model_type MODEL_TYPE
|
| 127 |
Enter the model type
|
| 128 |
+
-m MODEL_NAME, --model_name MODEL_NAME
|
| 129 |
Enter the model to be used eg t5-base
|
| 130 |
+
-e EPOCHS, --epochs EPOCHS
|
| 131 |
Enter the number of epochs
|
| 132 |
+
-lr LEARNING_RATE, --learning-rate LEARNING_RATE
|
| 133 |
Enter the number of epochs
|
| 134 |
+
-b BATCH_SIZE, --batch-size BATCH_SIZE
|
| 135 |
Enter the number of batches
|
| 136 |
""")
|
| 137 |
start(arguments=arguments)
|