Az-r-ow commited on
Commit
f017054
·
1 Parent(s): c462adf

data: added multiple sample files + unlabeled sentences

Browse files
.gitignore CHANGED
@@ -175,4 +175,7 @@ output.*
175
  .DS_Store
176
 
177
  # Remove vscode settings
178
- .vscode
 
 
 
 
175
  .DS_Store
176
 
177
  # Remove vscode settings
178
+ .vscode
179
+
180
+ # Remove macos ds store
181
+ .DS_Store
conv_tagged_file_to_bio.py CHANGED
@@ -1,6 +1,7 @@
1
  from app.travel_resolver.libs.nlp.data_processing import from_tagged_file_to_bio_file
2
 
3
 
 
4
  INPUT_FILES = [
5
  "./data/scripting_lcs_1/1k_train_large_samples.txt",
6
  "./data/scripting_lcs_1/10k_train_small_samples.txt",
@@ -14,6 +15,10 @@ OUTPUT_FILES = [
14
  "./data/bio/fr.bio/100_eval_large_samples.bio",
15
  "./data/bio/fr.bio/800_eval_small_samples.bio",
16
  ]
 
 
 
 
17
 
18
  tag_entities_pairs = [("<Dep>", "LOC-DEP"), ("<Arr>", "LOC-ARR")]
19
 
 
1
  from app.travel_resolver.libs.nlp.data_processing import from_tagged_file_to_bio_file
2
 
3
 
4
+ <<<<<<< HEAD
5
  INPUT_FILES = [
6
  "./data/scripting_lcs_1/1k_train_large_samples.txt",
7
  "./data/scripting_lcs_1/10k_train_small_samples.txt",
 
15
  "./data/bio/fr.bio/100_eval_large_samples.bio",
16
  "./data/bio/fr.bio/800_eval_small_samples.bio",
17
  ]
18
+ =======
19
+ INPUT_FILE = "./data/french_text/1k_unlabeled_samples.txt"
20
+ OUTPUT_FILE = "./data/bio/fr.bio/1k_unlabeled_samples.bio"
21
+ >>>>>>> e25bd1c (data: added multiple sample files + unlabeled sentences)
22
 
23
  tag_entities_pairs = [("<Dep>", "LOC-DEP"), ("<Arr>", "LOC-ARR")]
24
 
data/bio/fr.bio/10k_samples.bio ADDED
The diff for this file is too large to render. See raw diff
 
data/scripting_lcs_1/10k_samples.txt ADDED
The diff for this file is too large to render. See raw diff