IFMedTechdemo commited on
Commit
a7d8613
·
verified ·
1 Parent(s): aff30bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -11
app.py CHANGED
@@ -18,18 +18,11 @@ def preprocess_image_for_ocr(image):
18
  preprocessed_pil = Image.fromarray(adaptive_threshold)
19
  return preprocessed_pil
20
 
 
21
  def extract_medication_lines(text):
22
- """
23
- Flexible regex: Find lines with [form], [name], [dose] anywhere.
24
- Handles free text/table/mixed layouts.
25
- """
26
- # Medicine forms
27
  form_pattern = r"(TAB(L?ET)?|CAP(SULE)?|SYRUP|SYP|DROP(S)?|INJ(CTION)?|OINTMENT|CREAM|GEL|PATCH|SOL(UTION)?|ORAL)"
28
- # Name: up to 4 tokens (space/hyphen/slash), case/mixed
29
  name_pattern = r"([A-Z0-9\-/]+(?:\s+[A-Z0-9\-/]+){0,4})"
30
- # Dose/concentration: 1-4 digits, optional space, units
31
  dose_pattern = r"(\d{1,4}\s*(mg|ml|mcg|g|kg|units|IU)|\d{1,2}\s*%(\s*w\/w|\s*w\/v|\s*v\/v)?)"
32
- # Allow any order: form+name+dose/mid/suffix/prefix
33
  main_pattern = (
34
  r"(?<!\w)(" + form_pattern + r")[\s\-]+"
35
  r"" + name_pattern + r"" # name after form
@@ -42,11 +35,13 @@ def extract_medication_lines(text):
42
  line_stripped = line.strip()
43
  match = med_regex.search(line_stripped)
44
  if match:
45
- # Compose: form + name + dose
46
- cleaned = f"{match.group(1).upper()} {match.group(2).upper()} {match.group(5)}"
47
- meds.append(cleaned.strip())
48
  return '\n'.join(meds)
49
 
 
 
 
50
  def clinical_ner_extract(text, use_gpu=False):
51
  """
52
  Uses ClinicalNER for medicine name, then finds form/dose in source sentence.
 
18
  preprocessed_pil = Image.fromarray(adaptive_threshold)
19
  return preprocessed_pil
20
 
21
+
22
  def extract_medication_lines(text):
 
 
 
 
 
23
  form_pattern = r"(TAB(L?ET)?|CAP(SULE)?|SYRUP|SYP|DROP(S)?|INJ(CTION)?|OINTMENT|CREAM|GEL|PATCH|SOL(UTION)?|ORAL)"
 
24
  name_pattern = r"([A-Z0-9\-/]+(?:\s+[A-Z0-9\-/]+){0,4})"
 
25
  dose_pattern = r"(\d{1,4}\s*(mg|ml|mcg|g|kg|units|IU)|\d{1,2}\s*%(\s*w\/w|\s*w\/v|\s*v\/v)?)"
 
26
  main_pattern = (
27
  r"(?<!\w)(" + form_pattern + r")[\s\-]+"
28
  r"" + name_pattern + r"" # name after form
 
35
  line_stripped = line.strip()
36
  match = med_regex.search(line_stripped)
37
  if match:
38
+ # Ignore group indices, instead join non-None groups or use match.group(0)
39
+ meds.append(match.group(0).strip())
 
40
  return '\n'.join(meds)
41
 
42
+
43
+
44
+
45
  def clinical_ner_extract(text, use_gpu=False):
46
  """
47
  Uses ClinicalNER for medicine name, then finds form/dose in source sentence.