Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,25 +19,37 @@ def preprocess_image_for_ocr(image):
|
|
| 19 |
return preprocessed_pil
|
| 20 |
|
| 21 |
|
|
|
|
| 22 |
def extract_medication_lines(text):
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
)
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
|
|
|
|
| 19 |
return preprocessed_pil
|
| 20 |
|
| 21 |
|
| 22 |
+
|
| 23 |
def extract_medication_lines(text):
|
| 24 |
+
"""
|
| 25 |
+
Extracts Rx lines with:
|
| 26 |
+
- form + name (+ repeated form)
|
| 27 |
+
- optional numeric + unit or number/slash as dose
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
# Forms and name up to 4 tokens, allow form at start or end (or both)
|
| 31 |
+
form = r"(TAB(L?ET)?|CAP(SULE)?|SYRUP|SYP|DROP(S)?|INJ(CTION)?|OINTMENT|CREAM|GEL|PATCH|SOL(UTION)?|ORAL)"
|
| 32 |
+
name = r"([A-Z0-9\-/]+(?:\s+[A-Z0-9\-/]+){0,4})"
|
| 33 |
+
opt_form = fr"(?:\s+{form})?" # form can repeat at end
|
| 34 |
+
# Dose: optional
|
| 35 |
+
opt_dose = r"(?:\s*\d{1,4}(?:\/\d{1,4})?\s*(mg|ml|mcg|g|kg|units|IU|%|))?"
|
| 36 |
+
|
| 37 |
+
# Compile pattern to match: form name [form] [dose/conc]
|
| 38 |
+
pat = re.compile(
|
| 39 |
+
fr"\b{form}\s+{name}{opt_form}{opt_dose}\b",
|
| 40 |
+
re.IGNORECASE
|
| 41 |
)
|
| 42 |
+
|
| 43 |
+
lines = text.split('\n')
|
| 44 |
+
matches = set()
|
| 45 |
+
for line in lines:
|
| 46 |
+
line = line.strip()
|
| 47 |
+
for m in pat.finditer(line):
|
| 48 |
+
out = m.group(0)
|
| 49 |
+
out = re.sub(r"\s+", " ", out).strip() # normalize spaces
|
| 50 |
+
matches.add(out.upper())
|
| 51 |
+
return '\n'.join(matches)
|
| 52 |
+
|
| 53 |
|
| 54 |
|
| 55 |
|