LightOnOCR

Paused

App Files Files Community

IFMedTechdemo commited on 19 days ago

Commit

c9ad6ed

verified ·

1 Parent(s): f574169

Update app.py

Browse files

Files changed (1) hide show

app.py +339 -339

app.py CHANGED Viewed

@@ -1,323 +1,5 @@
 #################################################################################################
-import subprocess
-import sys
-import spaces
-import torch
-import gradio as gr
-from PIL import Image
-import numpy as np
-import cv2
-import pypdfium2 as pdfium
-from transformers import (
-    LightOnOCRForConditionalGeneration,
-    LightOnOCRProcessor,
-)
-from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
-import re
-device = "cuda" if torch.cuda.is_available() else "cpu"
-if device == "cuda":
-    attn_implementation = "sdpa"
-    dtype = torch.bfloat16
-else:
-    attn_implementation = "eager"
-    dtype = torch.float32
-ocr_model = LightOnOCRForConditionalGeneration.from_pretrained(
-    "lightonai/LightOnOCR-1B-1025",
-    attn_implementation=attn_implementation,
-    torch_dtype=dtype,
-    trust_remote_code=True,
-).to(device).eval()
-processor = LightOnOCRProcessor.from_pretrained(
-    "lightonai/LightOnOCR-1B-1025",
-    trust_remote_code=True,
-)
-ner_tokenizer = AutoTokenizer.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
-ner_model = AutoModelForTokenClassification.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
-ner_pipeline = pipeline(
-    "ner",
-    model=ner_model,
-    tokenizer=ner_tokenizer,
-    aggregation_strategy="simple",
-)
-def render_pdf_page(page, max_resolution=1540, scale=2.77):
-    width, height = page.get_size()
-    pixel_width = width * scale
-    pixel_height = height * scale
-    resize_factor = min(1, max_resolution / pixel_width, max_resolution / pixel_height)
-    target_scale = scale * resize_factor
-    return page.render(scale=target_scale, rev_byteorder=True).to_pil()
-def process_pdf(pdf_path, page_num=1):
-    pdf = pdfium.PdfDocument(pdf_path)
-    total_pages = len(pdf)
-    page_idx = min(max(int(page_num) - 1, 0), total_pages - 1)
-    page = pdf[page_idx]
-    img = render_pdf_page(page)
-    pdf.close()
-    return img, total_pages, page_idx + 1
-def clean_output_text(text):
-    markers_to_remove = ["system", "user", "assistant"]
-    lines = text.split('\n')
-    cleaned_lines = []
-    for line in lines:
-        stripped = line.strip()
-        if stripped.lower() not in markers_to_remove:
-            cleaned_lines.append(line)
-    cleaned = '\n'.join(cleaned_lines).strip()
-    if "assistant" in text.lower():
-        parts = text.split("assistant", 1)
-        if len(parts) > 1:
-            cleaned = parts[1].strip()
-    return cleaned
-def preprocess_image_for_ocr(image):
-    image_rgb = image.convert("RGB")
-    img_np = np.array(image_rgb)
-    gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
-    adaptive_threshold = cv2.adaptiveThreshold(
-        gray,
-        255,
-        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-        cv2.THRESH_BINARY,
-        85,
-        11,
-    )
-    preprocessed_pil = Image.fromarray(adaptive_threshold)
-    return preprocessed_pil
-def extract_medication_lines(text):
-    """
-    Extracts medication/drug lines from text using regex.
-    Matches lines beginning with tab, tablet, cap, capsule, syrup, syp, oral, inj, injection, ointment, drops, patch, sol, solution, etc.
-    Handles case-insensitivity and abbreviations like T., C., tab., cap. etc.
-    """
-    # "|" means OR. (?:...) is a non-capturing group.
-    pattern = r"""^\s*    # Leading spaces allowed
-    (
-        T\.?|TAB\.?|TABLET      # T., T, TAB, TAB., TABLET
-        |C\.?|CAP\.?|CAPSULE    # C., C, CAP, CAP., CAPSULE
-        |SYRUP|SYP
-        |ORAL
-        |INJ\.?|INJECTION       # INJ., INJ, INJECTION
-        |OINTMENT|DROPS|PATCH|SOL\.?|SOLUTION
-    )
-    \s+[A-Z0-9 \-\(\)/,.]+      # Name/dose/other info (at least one space/letter after the pattern)
-    """
-    # Compile with re.IGNORECASE and re.VERBOSE for readability
-    med_regex = re.compile(pattern, re.IGNORECASE | re.VERBOSE)
-    meds = []
-    for line in text.split('\n'):
-        line = line.strip()
-        if med_regex.match(line):
-            meds.append(line)
-    return '\n'.join(meds)
-def extract_meds(text, use_ner):
-    """
-    Switches between Clinical NER or regex extraction.
-    Returns medications string.
-    """
-    if use_ner:
-        entities = ner_pipeline(text)
-        meds = []
-        for ent in entities:
-            if ent["entity_group"] == "treatment":
-                word = ent["word"]
-                if word.startswith("##") and meds:
-                    meds[-1] += word[2:]
-                else:
-                    meds.append(word)
-        return ", ".join(set(meds)) if meds else "None detected"
-    else:
-        return extract_medication_lines(text) or "None detected"
-@spaces.GPU
-def extract_text_from_image(image, temperature=0.2):
-    """OCR with adaptive thresholding."""
-    processed_img = preprocess_image_for_ocr(image)
-    chat = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "image", "image": processed_img}
-            ],
-        }
-    ]
-    inputs = processor.apply_chat_template(
-        chat,
-        add_generation_prompt=True,
-        tokenize=True,
-        return_dict=True,
-        return_tensors="pt",
-    )
-    # Move inputs to device
-    inputs = {
-        k: (
-            v.to(device=device, dtype=dtype)
-            if isinstance(v, torch.Tensor) and v.dtype in [torch.float32, torch.float16, torch.bfloat16]
-            else v.to(device)
-            if isinstance(v, torch.Tensor)
-            else v
-        )
-        for k, v in inputs.items()
-    }
-    generation_kwargs = dict(
-        **inputs,
-        max_new_tokens=2048,
-        temperature=temperature if temperature > 0 else 0.0,
-        use_cache=True,
-        do_sample=temperature > 0,
-    )
-    with torch.no_grad():
-        outputs = ocr_model.generate(**generation_kwargs)
-    output_text = processor.decode(outputs[0], skip_special_tokens=True)
-    cleaned_text = clean_output_text(output_text)
-    yield cleaned_text, output_text, processed_img
-def process_input(file_input, temperature, page_num, extraction_mode):
-    if file_input is None:
-        yield "Please upload an image or PDF first.", "", "", "", "No file!", 1
-        return
-    image_to_process = None
-    page_info = ""
-    slider_value = page_num
-    file_path = file_input if isinstance(file_input, str) else file_input.name
-    if file_path.lower().endswith(".pdf"):
-        try:
-            image_to_process, total_pages, actual_page = process_pdf(file_path, int(page_num))
-            page_info = f"Processing page {actual_page} of {total_pages}"
-            slider_value = actual_page
-        except Exception as e:
-            msg = f"Error processing PDF: {str(e)}"
-            yield msg, "", msg, "", None, slider_value
-            return
-    else:
-        try:
-            image_to_process = Image.open(file_path)
-            page_info = "Processing image"
-        except Exception as e:
-            msg = f"Error opening image: {str(e)}"
-            yield msg, "", msg, "", None, slider_value
-            return
-    use_ner = extraction_mode == "Regex"   #"Clinical NER"
-    try:
-        for cleaned_text, raw_md, processed_img in extract_text_from_image(
-            image_to_process, temperature
-        ):
-            meds_out = extract_meds(cleaned_text, use_ner)
-            yield cleaned_text, meds_out, raw_md, page_info, processed_img, slider_value
-    except Exception as e:
-        error_msg = f"Error during text extraction: {str(e)}"
-        yield error_msg, "", error_msg, page_info, image_to_process, slider_value
-def update_slider(file_input):
-    if file_input is None:
-        return gr.update(maximum=20, value=1)
-    file_path = file_input if isinstance(file_input, str) else file_input.name
-    if file_path.lower().endswith('.pdf'):
-        try:
-            pdf = pdfium.PdfDocument(file_path)
-            total_pages = len(pdf)
-            pdf.close()
-            return gr.update(maximum=total_pages, value=1)
-        except:
-            return gr.update(maximum=20, value=1)
-    else:
-        return gr.update(maximum=1, value=1)
-with gr.Blocks(title="💊 Medicine Extraction", theme=gr.themes.Soft()) as demo:
-    file_input = gr.File(
-        label="🖼️ Upload Image or PDF",
-        file_types=[".pdf", ".png", ".jpg", ".jpeg"],
-        type="filepath"
-    )
-    temperature = gr.Slider(
-        minimum=0.0,
-        maximum=1.0,
-        value=0.2,
-        step=0.05,
-        label="Temperature"
-    )
-    page_slider = gr.Slider(
-        minimum=1, maximum=20, value=1, step=1,
-        label="Page Number (PDF only)",
-        interactive=True
-    )
-    extraction_mode = gr.Radio(
-        choices=["Clinical NER", "Regex"],
-        value="Regex",
-        label="Extraction Method",
-        info="Clinical NER uses ML, Regex uses rules"
-    )
-    output_text = gr.Textbox(
-        label="📝 Extracted Text",
-        lines=4,
-        max_lines=10,
-        interactive=False,
-        show_copy_button=True
-    )
-    medicines_output = gr.Textbox(
-        label="💊 Extracted Medicines/Drugs",
-        placeholder="Medicine/drug names will appear here...",
-        lines=2,
-        max_lines=10,
-        interactive=False,
-        show_copy_button=True
-    )
-    raw_output = gr.Textbox(
-        label="Raw Model Output",
-        lines=2,
-        max_lines=5,
-        interactive=False
-    )
-    page_info = gr.Markdown(
-        value=""  # Info of PDF page
-    )
-    rendered_image = gr.Image(
-        label="Processed Image (Thresholded for OCR)",
-        interactive=False
-    )
-    num_pages = gr.Number(
-        value=1, label="Current Page (slider)", visible=False
-    )
-    submit_btn = gr.Button("Extract Medicines", variant="primary")
-    submit_btn.click(
-        fn=process_input,
-        inputs=[file_input, temperature, page_slider, extraction_mode],
-        outputs=[output_text, medicines_output, raw_output, page_info, rendered_image, num_pages]
-    )
-    file_input.change(
-        fn=update_slider,
-        inputs=[file_input],
-        outputs=[page_slider]
-    )
-if __name__ == "__main__":
-    demo.launch()
-####################################################  running code only NER #######################
-#!/usr/bin/env python3
 # import subprocess
 # import sys
@@ -334,6 +16,7 @@ if __name__ == "__main__":
 #     LightOnOCRProcessor,
 # )
 # from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
 # device = "cuda" if torch.cuda.is_available() else "cpu"
 # if device == "cuda":
@@ -397,7 +80,6 @@ if __name__ == "__main__":
 #     return cleaned
 # def preprocess_image_for_ocr(image):
-#     """Convert PIL.Image to adaptive thresholded image for OCR."""
 #     image_rgb = image.convert("RGB")
 #     img_np = np.array(image_rgb)
 #     gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
@@ -412,9 +94,58 @@ if __name__ == "__main__":
 #     preprocessed_pil = Image.fromarray(adaptive_threshold)
 #     return preprocessed_pil
 # @spaces.GPU
 # def extract_text_from_image(image, temperature=0.2):
-#     """OCR + clinical NER, with preprocessing."""
 #     processed_img = preprocess_image_for_ocr(image)
 #     chat = [
 #         {
@@ -451,22 +182,11 @@ if __name__ == "__main__":
 #     )
 #     with torch.no_grad():
 #         outputs = ocr_model.generate(**generation_kwargs)
 #     output_text = processor.decode(outputs[0], skip_special_tokens=True)
 #     cleaned_text = clean_output_text(output_text)
-#     entities = ner_pipeline(cleaned_text)
-#     medications = []
-#     for ent in entities:
-#         if ent["entity_group"] == "treatment":
-#             word = ent["word"]
-#             if word.startswith("##") and medications:
-#                 medications[-1] += word[2:]
-#             else:
-#                 medications.append(word)
-#     medications_str = ", ".join(set(medications)) if medications else "None detected"
-#     yield cleaned_text, medications_str, output_text, processed_img
-# def process_input(file_input, temperature, page_num):
 #     if file_input is None:
 #         yield "Please upload an image or PDF first.", "", "", "", "No file!", 1
 #         return
@@ -494,11 +214,13 @@ if __name__ == "__main__":
 #             yield msg, "", msg, "", None, slider_value
 #             return
 #     try:
-#         for cleaned_text, medications, raw_md, processed_img in extract_text_from_image(
 #             image_to_process, temperature
 #         ):
-#             yield cleaned_text, medications, raw_md, page_info, processed_img, slider_value
 #     except Exception as e:
 #         error_msg = f"Error during text extraction: {str(e)}"
 #         yield error_msg, "", error_msg, page_info, image_to_process, slider_value
@@ -536,6 +258,12 @@ if __name__ == "__main__":
 #         label="Page Number (PDF only)",
 #         interactive=True
 #     )
 #     output_text = gr.Textbox(
 #         label="📝 Extracted Text",
 #         lines=4,
@@ -547,7 +275,7 @@ if __name__ == "__main__":
 #         label="💊 Extracted Medicines/Drugs",
 #         placeholder="Medicine/drug names will appear here...",
 #         lines=2,
-#         max_lines=5,
 #         interactive=False,
 #         show_copy_button=True
 #     )
@@ -558,7 +286,7 @@ if __name__ == "__main__":
 #         interactive=False
 #     )
 #     page_info = gr.Markdown(
-#         value="" # Info of PDF page
 #     )
 #     rendered_image = gr.Image(
 #         label="Processed Image (Thresholded for OCR)",
@@ -571,7 +299,7 @@ if __name__ == "__main__":
 #     submit_btn.click(
 #         fn=process_input,
-#         inputs=[file_input, temperature, page_slider],
 #         outputs=[output_text, medicines_output, raw_output, page_info, rendered_image, num_pages]
 #     )
@@ -586,6 +314,278 @@ if __name__ == "__main__":
 ##########################################   #############################################################

 #################################################################################################
 # import subprocess
 # import sys
 #     LightOnOCRProcessor,
 # )
 # from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+# import re
 # device = "cuda" if torch.cuda.is_available() else "cpu"
 # if device == "cuda":
 #     return cleaned
 # def preprocess_image_for_ocr(image):
 #     image_rgb = image.convert("RGB")
 #     img_np = np.array(image_rgb)
 #     gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
 #     preprocessed_pil = Image.fromarray(adaptive_threshold)
 #     return preprocessed_pil
+# def extract_medication_lines(text):
+#     """
+#     Extracts medication/drug lines from text using regex.
+#     Matches lines beginning with tab, tablet, cap, capsule, syrup, syp, oral, inj, injection, ointment, drops, patch, sol, solution, etc.
+#     Handles case-insensitivity and abbreviations like T., C., tab., cap. etc.
+#     """
+#     # "|" means OR. (?:...) is a non-capturing group.
+#     pattern = r"""^\s*    # Leading spaces allowed
+#     (
+#         T\.?|TAB\.?|TABLET      # T., T, TAB, TAB., TABLET
+#         |C\.?|CAP\.?|CAPSULE    # C., C, CAP, CAP., CAPSULE
+#         |SYRUP|SYP
+#         |ORAL
+#         |INJ\.?|INJECTION       # INJ., INJ, INJECTION
+#         |OINTMENT|DROPS|PATCH|SOL\.?|SOLUTION
+#     )
+#     \s+[A-Z0-9 \-\(\)/,.]+      # Name/dose/other info (at least one space/letter after the pattern)
+#     """
+#     # Compile with re.IGNORECASE and re.VERBOSE for readability
+#     med_regex = re.compile(pattern, re.IGNORECASE | re.VERBOSE)
+#     meds = []
+#     for line in text.split('\n'):
+#         line = line.strip()
+#         if med_regex.match(line):
+#             meds.append(line)
+#     return '\n'.join(meds)
+# def extract_meds(text, use_ner):
+#     """
+#     Switches between Clinical NER or regex extraction.
+#     Returns medications string.
+#     """
+#     if use_ner:
+#         entities = ner_pipeline(text)
+#         meds = []
+#         for ent in entities:
+#             if ent["entity_group"] == "treatment":
+#                 word = ent["word"]
+#                 if word.startswith("##") and meds:
+#                     meds[-1] += word[2:]
+#                 else:
+#                     meds.append(word)
+#         return ", ".join(set(meds)) if meds else "None detected"
+#     else:
+#         return extract_medication_lines(text) or "None detected"
 # @spaces.GPU
 # def extract_text_from_image(image, temperature=0.2):
+#     """OCR with adaptive thresholding."""
 #     processed_img = preprocess_image_for_ocr(image)
 #     chat = [
 #         {
 #     )
 #     with torch.no_grad():
 #         outputs = ocr_model.generate(**generation_kwargs)
 #     output_text = processor.decode(outputs[0], skip_special_tokens=True)
 #     cleaned_text = clean_output_text(output_text)
+#     yield cleaned_text, output_text, processed_img
+# def process_input(file_input, temperature, page_num, extraction_mode):
 #     if file_input is None:
 #         yield "Please upload an image or PDF first.", "", "", "", "No file!", 1
 #         return
 #             yield msg, "", msg, "", None, slider_value
 #             return
+#     use_ner = extraction_mode == "Regex"   #"Clinical NER"
 #     try:
+#         for cleaned_text, raw_md, processed_img in extract_text_from_image(
 #             image_to_process, temperature
 #         ):
+#             meds_out = extract_meds(cleaned_text, use_ner)
+#             yield cleaned_text, meds_out, raw_md, page_info, processed_img, slider_value
 #     except Exception as e:
 #         error_msg = f"Error during text extraction: {str(e)}"
 #         yield error_msg, "", error_msg, page_info, image_to_process, slider_value
 #         label="Page Number (PDF only)",
 #         interactive=True
 #     )
+#     extraction_mode = gr.Radio(
+#         choices=["Clinical NER", "Regex"],
+#         value="Regex",
+#         label="Extraction Method",
+#         info="Clinical NER uses ML, Regex uses rules"
+#     )
 #     output_text = gr.Textbox(
 #         label="📝 Extracted Text",
 #         lines=4,
 #         label="💊 Extracted Medicines/Drugs",
 #         placeholder="Medicine/drug names will appear here...",
 #         lines=2,
+#         max_lines=10,
 #         interactive=False,
 #         show_copy_button=True
 #     )
 #         interactive=False
 #     )
 #     page_info = gr.Markdown(
+#         value=""  # Info of PDF page
 #     )
 #     rendered_image = gr.Image(
 #         label="Processed Image (Thresholded for OCR)",
 #     submit_btn.click(
 #         fn=process_input,
+#         inputs=[file_input, temperature, page_slider, extraction_mode],
 #         outputs=[output_text, medicines_output, raw_output, page_info, rendered_image, num_pages]
 #     )
+####################################################  running code only NER #######################
+#!/usr/bin/env python3
+import subprocess
+import sys
+import spaces
+import torch
+import gradio as gr
+from PIL import Image
+import numpy as np
+import cv2
+import pypdfium2 as pdfium
+from transformers import (
+    LightOnOCRForConditionalGeneration,
+    LightOnOCRProcessor,
+)
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+device = "cuda" if torch.cuda.is_available() else "cpu"
+if device == "cuda":
+    attn_implementation = "sdpa"
+    dtype = torch.bfloat16
+else:
+    attn_implementation = "eager"
+    dtype = torch.float32
+ocr_model = LightOnOCRForConditionalGeneration.from_pretrained(
+    "lightonai/LightOnOCR-1B-1025",
+    attn_implementation=attn_implementation,
+    torch_dtype=dtype,
+    trust_remote_code=True,
+).to(device).eval()
+processor = LightOnOCRProcessor.from_pretrained(
+    "lightonai/LightOnOCR-1B-1025",
+    trust_remote_code=True,
+)
+ner_tokenizer = AutoTokenizer.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
+ner_model = AutoModelForTokenClassification.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
+ner_pipeline = pipeline(
+    "ner",
+    model=ner_model,
+    tokenizer=ner_tokenizer,
+    aggregation_strategy="simple",
+)
+def render_pdf_page(page, max_resolution=1540, scale=2.77):
+    width, height = page.get_size()
+    pixel_width = width * scale
+    pixel_height = height * scale
+    resize_factor = min(1, max_resolution / pixel_width, max_resolution / pixel_height)
+    target_scale = scale * resize_factor
+    return page.render(scale=target_scale, rev_byteorder=True).to_pil()
+def process_pdf(pdf_path, page_num=1):
+    pdf = pdfium.PdfDocument(pdf_path)
+    total_pages = len(pdf)
+    page_idx = min(max(int(page_num) - 1, 0), total_pages - 1)
+    page = pdf[page_idx]
+    img = render_pdf_page(page)
+    pdf.close()
+    return img, total_pages, page_idx + 1
+def clean_output_text(text):
+    markers_to_remove = ["system", "user", "assistant"]
+    lines = text.split('\n')
+    cleaned_lines = []
+    for line in lines:
+        stripped = line.strip()
+        if stripped.lower() not in markers_to_remove:
+            cleaned_lines.append(line)
+    cleaned = '\n'.join(cleaned_lines).strip()
+    if "assistant" in text.lower():
+        parts = text.split("assistant", 1)
+        if len(parts) > 1:
+            cleaned = parts[1].strip()
+    return cleaned
+def preprocess_image_for_ocr(image):
+    """Convert PIL.Image to adaptive thresholded image for OCR."""
+    image_rgb = image.convert("RGB")
+    img_np = np.array(image_rgb)
+    gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
+    adaptive_threshold = cv2.adaptiveThreshold(
+        gray,
+        255,
+        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        cv2.THRESH_BINARY,
+        85,
+        11,
+    )
+    preprocessed_pil = Image.fromarray(adaptive_threshold)
+    return preprocessed_pil
+@spaces.GPU
+def extract_text_from_image(image, temperature=0.2):
+    """OCR + clinical NER, with preprocessing."""
+    processed_img = preprocess_image_for_ocr(image)
+    chat = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": processed_img}
+            ],
+        }
+    ]
+    inputs = processor.apply_chat_template(
+        chat,
+        add_generation_prompt=True,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt",
+    )
+    # Move inputs to device
+    inputs = {
+        k: (
+            v.to(device=device, dtype=dtype)
+            if isinstance(v, torch.Tensor) and v.dtype in [torch.float32, torch.float16, torch.bfloat16]
+            else v.to(device)
+            if isinstance(v, torch.Tensor)
+            else v
+        )
+        for k, v in inputs.items()
+    }
+    generation_kwargs = dict(
+        **inputs,
+        max_new_tokens=2048,
+        temperature=temperature if temperature > 0 else 0.0,
+        use_cache=True,
+        do_sample=temperature > 0,
+    )
+    with torch.no_grad():
+        outputs = ocr_model.generate(**generation_kwargs)
+    output_text = processor.decode(outputs[0], skip_special_tokens=True)
+    cleaned_text = clean_output_text(output_text)
+    entities = ner_pipeline(cleaned_text)
+    medications = []
+    for ent in entities:
+        if ent["entity_group"] == "treatment":
+            word = ent["word"]
+            if word.startswith("##") and medications:
+                medications[-1] += word[2:]
+            else:
+                medications.append(word)
+    medications_str = ", ".join(set(medications)) if medications else "None detected"
+    yield cleaned_text, medications_str, output_text, processed_img
+def process_input(file_input, temperature, page_num):
+    if file_input is None:
+        yield "Please upload an image or PDF first.", "", "", "", "No file!", 1
+        return
+    image_to_process = None
+    page_info = ""
+    slider_value = page_num
+    file_path = file_input if isinstance(file_input, str) else file_input.name
+    if file_path.lower().endswith(".pdf"):
+        try:
+            image_to_process, total_pages, actual_page = process_pdf(file_path, int(page_num))
+            page_info = f"Processing page {actual_page} of {total_pages}"
+            slider_value = actual_page
+        except Exception as e:
+            msg = f"Error processing PDF: {str(e)}"
+            yield msg, "", msg, "", None, slider_value
+            return
+    else:
+        try:
+            image_to_process = Image.open(file_path)
+            page_info = "Processing image"
+        except Exception as e:
+            msg = f"Error opening image: {str(e)}"
+            yield msg, "", msg, "", None, slider_value
+            return
+    try:
+        for cleaned_text, medications, raw_md, processed_img in extract_text_from_image(
+            image_to_process, temperature
+        ):
+            yield cleaned_text, medications, raw_md, page_info, processed_img, slider_value
+    except Exception as e:
+        error_msg = f"Error during text extraction: {str(e)}"
+        yield error_msg, "", error_msg, page_info, image_to_process, slider_value
+def update_slider(file_input):
+    if file_input is None:
+        return gr.update(maximum=20, value=1)
+    file_path = file_input if isinstance(file_input, str) else file_input.name
+    if file_path.lower().endswith('.pdf'):
+        try:
+            pdf = pdfium.PdfDocument(file_path)
+            total_pages = len(pdf)
+            pdf.close()
+            return gr.update(maximum=total_pages, value=1)
+        except:
+            return gr.update(maximum=20, value=1)
+    else:
+        return gr.update(maximum=1, value=1)
+with gr.Blocks(title="💊 Medicine Extraction", theme=gr.themes.Soft()) as demo:
+    file_input = gr.File(
+        label="🖼️ Upload Image or PDF",
+        file_types=[".pdf", ".png", ".jpg", ".jpeg"],
+        type="filepath"
+    )
+    temperature = gr.Slider(
+        minimum=0.0,
+        maximum=1.0,
+        value=0.2,
+        step=0.05,
+        label="Temperature"
+    )
+    page_slider = gr.Slider(
+        minimum=1, maximum=20, value=1, step=1,
+        label="Page Number (PDF only)",
+        interactive=True
+    )
+    output_text = gr.Textbox(
+        label="📝 Extracted Text",
+        lines=4,
+        max_lines=10,
+        interactive=False,
+        show_copy_button=True
+    )
+    medicines_output = gr.Textbox(
+        label="💊 Extracted Medicines/Drugs",
+        placeholder="Medicine/drug names will appear here...",
+        lines=2,
+        max_lines=5,
+        interactive=False,
+        show_copy_button=True
+    )
+    raw_output = gr.Textbox(
+        label="Raw Model Output",
+        lines=2,
+        max_lines=5,
+        interactive=False
+    )
+    page_info = gr.Markdown(
+        value="" # Info of PDF page
+    )
+    rendered_image = gr.Image(
+        label="Processed Image (Thresholded for OCR)",
+        interactive=False
+    )
+    num_pages = gr.Number(
+        value=1, label="Current Page (slider)", visible=False
+    )
+    submit_btn = gr.Button("Extract Medicines", variant="primary")
+    submit_btn.click(
+        fn=process_input,
+        inputs=[file_input, temperature, page_slider],
+        outputs=[output_text, medicines_output, raw_output, page_info, rendered_image, num_pages]
+    )
+    file_input.change(
+        fn=update_slider,
+        inputs=[file_input],
+        outputs=[page_slider]
+    )
+if __name__ == "__main__":
+    demo.launch()
 ##########################################   #############################################################