Az-r-ow
commited on
Commit
·
f017054
1
Parent(s):
c462adf
data: added multiple sample files + unlabeled sentences
Browse files- .gitignore +4 -1
- conv_tagged_file_to_bio.py +5 -0
- data/bio/fr.bio/10k_samples.bio +0 -0
- data/scripting_lcs_1/10k_samples.txt +0 -0
.gitignore
CHANGED
|
@@ -175,4 +175,7 @@ output.*
|
|
| 175 |
.DS_Store
|
| 176 |
|
| 177 |
# Remove vscode settings
|
| 178 |
-
.vscode
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
.DS_Store
|
| 176 |
|
| 177 |
# Remove vscode settings
|
| 178 |
+
.vscode
|
| 179 |
+
|
| 180 |
+
# Remove macos ds store
|
| 181 |
+
.DS_Store
|
conv_tagged_file_to_bio.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from app.travel_resolver.libs.nlp.data_processing import from_tagged_file_to_bio_file
|
| 2 |
|
| 3 |
|
|
|
|
| 4 |
INPUT_FILES = [
|
| 5 |
"./data/scripting_lcs_1/1k_train_large_samples.txt",
|
| 6 |
"./data/scripting_lcs_1/10k_train_small_samples.txt",
|
|
@@ -14,6 +15,10 @@ OUTPUT_FILES = [
|
|
| 14 |
"./data/bio/fr.bio/100_eval_large_samples.bio",
|
| 15 |
"./data/bio/fr.bio/800_eval_small_samples.bio",
|
| 16 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
tag_entities_pairs = [("<Dep>", "LOC-DEP"), ("<Arr>", "LOC-ARR")]
|
| 19 |
|
|
|
|
| 1 |
from app.travel_resolver.libs.nlp.data_processing import from_tagged_file_to_bio_file
|
| 2 |
|
| 3 |
|
| 4 |
+
<<<<<<< HEAD
|
| 5 |
INPUT_FILES = [
|
| 6 |
"./data/scripting_lcs_1/1k_train_large_samples.txt",
|
| 7 |
"./data/scripting_lcs_1/10k_train_small_samples.txt",
|
|
|
|
| 15 |
"./data/bio/fr.bio/100_eval_large_samples.bio",
|
| 16 |
"./data/bio/fr.bio/800_eval_small_samples.bio",
|
| 17 |
]
|
| 18 |
+
=======
|
| 19 |
+
INPUT_FILE = "./data/french_text/1k_unlabeled_samples.txt"
|
| 20 |
+
OUTPUT_FILE = "./data/bio/fr.bio/1k_unlabeled_samples.bio"
|
| 21 |
+
>>>>>>> e25bd1c (data: added multiple sample files + unlabeled sentences)
|
| 22 |
|
| 23 |
tag_entities_pairs = [("<Dep>", "LOC-DEP"), ("<Arr>", "LOC-ARR")]
|
| 24 |
|
data/bio/fr.bio/10k_samples.bio
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/scripting_lcs_1/10k_samples.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|