Az-r-ow
commited on
Commit
·
8a27130
1
Parent(s):
af3f8e5
Added nltk.txt file
Browse files
app/travel_resolver/libs/nlp/ner/data_processing.py
CHANGED
|
@@ -4,9 +4,12 @@ from tqdm import tqdm
|
|
| 4 |
|
| 5 |
# Will download the necessary resources for nltk
|
| 6 |
# Should skip if resources found
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def get_tagged_content(sentence: str, tag: str) -> str | None:
|
|
@@ -251,7 +254,7 @@ def from_bio_file_to_examples(file_path: str) -> tuple:
|
|
| 251 |
|
| 252 |
|
| 253 |
def from_examples_to_tf_dataset(
|
| 254 |
-
inputs: tuple[list[list[int]], list[list[int]]]
|
| 255 |
) -> tf.data.Dataset:
|
| 256 |
"""
|
| 257 |
Given a tuple of inputs and labels, convert the tuple to a TensorFlow dataset.
|
|
|
|
| 4 |
|
| 5 |
# Will download the necessary resources for nltk
|
| 6 |
# Should skip if resources found
|
| 7 |
+
try:
|
| 8 |
+
stopwords = nltk.corpus.stopwords.words("french")
|
| 9 |
+
except LookupError:
|
| 10 |
+
nltk.download("punkt_tab")
|
| 11 |
+
nltk.download("stopwords")
|
| 12 |
+
stopwords = nltk.corpus.stopwords.words("french")
|
| 13 |
|
| 14 |
|
| 15 |
def get_tagged_content(sentence: str, tag: str) -> str | None:
|
|
|
|
| 254 |
|
| 255 |
|
| 256 |
def from_examples_to_tf_dataset(
|
| 257 |
+
inputs: tuple[list[list[int]], list[list[int]]],
|
| 258 |
) -> tf.data.Dataset:
|
| 259 |
"""
|
| 260 |
Given a tuple of inputs and labels, convert the tuple to a TensorFlow dataset.
|
nltk.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
stopwords
|
| 2 |
+
punkt_tab
|