LightOnOCR

Paused

App Files Files Community

IFMedTechdemo commited on 21 days ago

Commit

5ea9a7f

verified ·

1 Parent(s): 7363d0f

Update app.py

Browse files

Files changed (1) hide show

app.py +339 -339

app.py CHANGED Viewed

@@ -1,323 +1,5 @@
 #################################################################################################
-# import subprocess
-# import sys
-# import spaces
-# import torch
-# import gradio as gr
-# from PIL import Image
-# import numpy as np
-# import cv2
-# import pypdfium2 as pdfium
-# from transformers import (
-#     LightOnOCRForConditionalGeneration,
-#     LightOnOCRProcessor,
-# )
-# from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
-# import re
-# device = "cuda" if torch.cuda.is_available() else "cpu"
-# if device == "cuda":
-#     attn_implementation = "sdpa"
-#     dtype = torch.bfloat16
-# else:
-#     attn_implementation = "eager"
-#     dtype = torch.float32
-# ocr_model = LightOnOCRForConditionalGeneration.from_pretrained(
-#     "lightonai/LightOnOCR-1B-1025",
-#     attn_implementation=attn_implementation,
-#     torch_dtype=dtype,
-#     trust_remote_code=True,
-# ).to(device).eval()
-# processor = LightOnOCRProcessor.from_pretrained(
-#     "lightonai/LightOnOCR-1B-1025",
-#     trust_remote_code=True,
-# )
-# ner_tokenizer = AutoTokenizer.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
-# ner_model = AutoModelForTokenClassification.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
-# ner_pipeline = pipeline(
-#     "ner",
-#     model=ner_model,
-#     tokenizer=ner_tokenizer,
-#     aggregation_strategy="simple",
-# )
-# def render_pdf_page(page, max_resolution=1540, scale=2.77):
-#     width, height = page.get_size()
-#     pixel_width = width * scale
-#     pixel_height = height * scale
-#     resize_factor = min(1, max_resolution / pixel_width, max_resolution / pixel_height)
-#     target_scale = scale * resize_factor
-#     return page.render(scale=target_scale, rev_byteorder=True).to_pil()
-# def process_pdf(pdf_path, page_num=1):
-#     pdf = pdfium.PdfDocument(pdf_path)
-#     total_pages = len(pdf)
-#     page_idx = min(max(int(page_num) - 1, 0), total_pages - 1)
-#     page = pdf[page_idx]
-#     img = render_pdf_page(page)
-#     pdf.close()
-#     return img, total_pages, page_idx + 1
-# def clean_output_text(text):
-#     markers_to_remove = ["system", "user", "assistant"]
-#     lines = text.split('\n')
-#     cleaned_lines = []
-#     for line in lines:
-#         stripped = line.strip()
-#         if stripped.lower() not in markers_to_remove:
-#             cleaned_lines.append(line)
-#     cleaned = '\n'.join(cleaned_lines).strip()
-#     if "assistant" in text.lower():
-#         parts = text.split("assistant", 1)
-#         if len(parts) > 1:
-#             cleaned = parts[1].strip()
-#     return cleaned
-# def preprocess_image_for_ocr(image):
-#     image_rgb = image.convert("RGB")
-#     img_np = np.array(image_rgb)
-#     gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
-#     adaptive_threshold = cv2.adaptiveThreshold(
-#         gray,
-#         255,
-#         cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-#         cv2.THRESH_BINARY,
-#         85,
-#         11,
-#     )
-#     preprocessed_pil = Image.fromarray(adaptive_threshold)
-#     return preprocessed_pil
-# def extract_medication_lines(text):
-#     """
-#     Extracts medication/drug lines from text using regex.
-#     Matches lines beginning with tab, tablet, cap, capsule, syrup, syp, oral, inj, injection, ointment, drops, patch, sol, solution, etc.
-#     Handles case-insensitivity and abbreviations like T., C., tab., cap. etc.
-#     """
-#     # "|" means OR. (?:...) is a non-capturing group.
-#     pattern = r"""^\s*    # Leading spaces allowed
-#     (
-#         T\.?|TAB\.?|TABLET      # T., T, TAB, TAB., TABLET
-#         |C\.?|CAP\.?|CAPSULE    # C., C, CAP, CAP., CAPSULE
-#         |SYRUP|SYP
-#         |ORAL
-#         |INJ\.?|INJECTION       # INJ., INJ, INJECTION
-#         |OINTMENT|DROPS|PATCH|SOL\.?|SOLUTION
-#     )
-#     \s+[A-Z0-9 \-\(\)/,.]+      # Name/dose/other info (at least one space/letter after the pattern)
-#     """
-#     # Compile with re.IGNORECASE and re.VERBOSE for readability
-#     med_regex = re.compile(pattern, re.IGNORECASE | re.VERBOSE)
-#     meds = []
-#     for line in text.split('\n'):
-#         line = line.strip()
-#         if med_regex.match(line):
-#             meds.append(line)
-#     return '\n'.join(meds)
-# def extract_meds(text, use_ner):
-#     """
-#     Switches between Clinical NER or regex extraction.
-#     Returns medications string.
-#     """
-#     if use_ner:
-#         entities = ner_pipeline(text)
-#         meds = []
-#         for ent in entities:
-#             if ent["entity_group"] == "treatment":
-#                 word = ent["word"]
-#                 if word.startswith("##") and meds:
-#                     meds[-1] += word[2:]
-#                 else:
-#                     meds.append(word)
-#         return ", ".join(set(meds)) if meds else "None detected"
-#     else:
-#         return extract_medication_lines(text) or "None detected"
-# @spaces.GPU
-# def extract_text_from_image(image, temperature=0.2):
-#     """OCR with adaptive thresholding."""
-#     processed_img = preprocess_image_for_ocr(image)
-#     chat = [
-#         {
-#             "role": "user",
-#             "content": [
-#                 {"type": "image", "image": processed_img}
-#             ],
-#         }
-#     ]
-#     inputs = processor.apply_chat_template(
-#         chat,
-#         add_generation_prompt=True,
-#         tokenize=True,
-#         return_dict=True,
-#         return_tensors="pt",
-#     )
-#     # Move inputs to device
-#     inputs = {
-#         k: (
-#             v.to(device=device, dtype=dtype)
-#             if isinstance(v, torch.Tensor) and v.dtype in [torch.float32, torch.float16, torch.bfloat16]
-#             else v.to(device)
-#             if isinstance(v, torch.Tensor)
-#             else v
-#         )
-#         for k, v in inputs.items()
-#     }
-#     generation_kwargs = dict(
-#         **inputs,
-#         max_new_tokens=2048,
-#         temperature=temperature if temperature > 0 else 0.0,
-#         use_cache=True,
-#         do_sample=temperature > 0,
-#     )
-#     with torch.no_grad():
-#         outputs = ocr_model.generate(**generation_kwargs)
-#     output_text = processor.decode(outputs[0], skip_special_tokens=True)
-#     cleaned_text = clean_output_text(output_text)
-#     yield cleaned_text, output_text, processed_img
-# def process_input(file_input, temperature, page_num, extraction_mode):
-#     if file_input is None:
-#         yield "Please upload an image or PDF first.", "", "", "", "No file!", 1
-#         return
-#     image_to_process = None
-#     page_info = ""
-#     slider_value = page_num
-#     file_path = file_input if isinstance(file_input, str) else file_input.name
-#     if file_path.lower().endswith(".pdf"):
-#         try:
-#             image_to_process, total_pages, actual_page = process_pdf(file_path, int(page_num))
-#             page_info = f"Processing page {actual_page} of {total_pages}"
-#             slider_value = actual_page
-#         except Exception as e:
-#             msg = f"Error processing PDF: {str(e)}"
-#             yield msg, "", msg, "", None, slider_value
-#             return
-#     else:
-#         try:
-#             image_to_process = Image.open(file_path)
-#             page_info = "Processing image"
-#         except Exception as e:
-#             msg = f"Error opening image: {str(e)}"
-#             yield msg, "", msg, "", None, slider_value
-#             return
-#     use_ner = extraction_mode == "Regex"   #"Clinical NER"
-#     try:
-#         for cleaned_text, raw_md, processed_img in extract_text_from_image(
-#             image_to_process, temperature
-#         ):
-#             meds_out = extract_meds(cleaned_text, use_ner)
-#             yield cleaned_text, meds_out, raw_md, page_info, processed_img, slider_value
-#     except Exception as e:
-#         error_msg = f"Error during text extraction: {str(e)}"
-#         yield error_msg, "", error_msg, page_info, image_to_process, slider_value
-# def update_slider(file_input):
-#     if file_input is None:
-#         return gr.update(maximum=20, value=1)
-#     file_path = file_input if isinstance(file_input, str) else file_input.name
-#     if file_path.lower().endswith('.pdf'):
-#         try:
-#             pdf = pdfium.PdfDocument(file_path)
-#             total_pages = len(pdf)
-#             pdf.close()
-#             return gr.update(maximum=total_pages, value=1)
-#         except:
-#             return gr.update(maximum=20, value=1)
-#     else:
-#         return gr.update(maximum=1, value=1)
-# with gr.Blocks(title="💊 Medicine Extraction", theme=gr.themes.Soft()) as demo:
-#     file_input = gr.File(
-#         label="🖼️ Upload Image or PDF",
-#         file_types=[".pdf", ".png", ".jpg", ".jpeg"],
-#         type="filepath"
-#     )
-#     temperature = gr.Slider(
-#         minimum=0.0,
-#         maximum=1.0,
-#         value=0.2,
-#         step=0.05,
-#         label="Temperature"
-#     )
-#     page_slider = gr.Slider(
-#         minimum=1, maximum=20, value=1, step=1,
-#         label="Page Number (PDF only)",
-#         interactive=True
-#     )
-#     extraction_mode = gr.Radio(
-#         choices=["Clinical NER", "Regex"],
-#         value="Regex",
-#         label="Extraction Method",
-#         info="Clinical NER uses ML, Regex uses rules"
-#     )
-#     output_text = gr.Textbox(
-#         label="📝 Extracted Text",
-#         lines=4,
-#         max_lines=10,
-#         interactive=False,
-#         show_copy_button=True
-#     )
-#     medicines_output = gr.Textbox(
-#         label="💊 Extracted Medicines/Drugs",
-#         placeholder="Medicine/drug names will appear here...",
-#         lines=2,
-#         max_lines=10,
-#         interactive=False,
-#         show_copy_button=True
-#     )
-#     raw_output = gr.Textbox(
-#         label="Raw Model Output",
-#         lines=2,
-#         max_lines=5,
-#         interactive=False
-#     )
-#     page_info = gr.Markdown(
-#         value=""  # Info of PDF page
-#     )
-#     rendered_image = gr.Image(
-#         label="Processed Image (Thresholded for OCR)",
-#         interactive=False
-#     )
-#     num_pages = gr.Number(
-#         value=1, label="Current Page (slider)", visible=False
-#     )
-#     submit_btn = gr.Button("Extract Medicines", variant="primary")
-#     submit_btn.click(
-#         fn=process_input,
-#         inputs=[file_input, temperature, page_slider, extraction_mode],
-#         outputs=[output_text, medicines_output, raw_output, page_info, rendered_image, num_pages]
-#     )
-#     file_input.change(
-#         fn=update_slider,
-#         inputs=[file_input],
-#         outputs=[page_slider]
-#     )
-# if __name__ == "__main__":
-#     demo.launch()
-####################################################  running code only NER #######################
-#!/usr/bin/env python3
 import subprocess
 import sys
@@ -334,6 +16,7 @@ from transformers import (
     LightOnOCRProcessor,
 )
 from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
 device = "cuda" if torch.cuda.is_available() else "cpu"
 if device == "cuda":
@@ -397,7 +80,6 @@ def clean_output_text(text):
     return cleaned
 def preprocess_image_for_ocr(image):
-    """Convert PIL.Image to adaptive thresholded image for OCR."""
     image_rgb = image.convert("RGB")
     img_np = np.array(image_rgb)
     gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
@@ -412,9 +94,58 @@ def preprocess_image_for_ocr(image):
     preprocessed_pil = Image.fromarray(adaptive_threshold)
     return preprocessed_pil
 @spaces.GPU
 def extract_text_from_image(image, temperature=0.2):
-    """OCR + clinical NER, with preprocessing."""
     processed_img = preprocess_image_for_ocr(image)
     chat = [
         {
@@ -451,22 +182,11 @@ def extract_text_from_image(image, temperature=0.2):
     )
     with torch.no_grad():
         outputs = ocr_model.generate(**generation_kwargs)
     output_text = processor.decode(outputs[0], skip_special_tokens=True)
     cleaned_text = clean_output_text(output_text)
-    entities = ner_pipeline(cleaned_text)
-    medications = []
-    for ent in entities:
-        if ent["entity_group"] == "treatment":
-            word = ent["word"]
-            if word.startswith("##") and medications:
-                medications[-1] += word[2:]
-            else:
-                medications.append(word)
-    medications_str = ", ".join(set(medications)) if medications else "None detected"
-    yield cleaned_text, medications_str, output_text, processed_img
-def process_input(file_input, temperature, page_num):
     if file_input is None:
         yield "Please upload an image or PDF first.", "", "", "", "No file!", 1
         return
@@ -494,11 +214,13 @@ def process_input(file_input, temperature, page_num):
             yield msg, "", msg, "", None, slider_value
             return
     try:
-        for cleaned_text, medications, raw_md, processed_img in extract_text_from_image(
             image_to_process, temperature
         ):
-            yield cleaned_text, medications, raw_md, page_info, processed_img, slider_value
     except Exception as e:
         error_msg = f"Error during text extraction: {str(e)}"
         yield error_msg, "", error_msg, page_info, image_to_process, slider_value
@@ -536,6 +258,12 @@ with gr.Blocks(title="💊 Medicine Extraction", theme=gr.themes.Soft()) as demo
         label="Page Number (PDF only)",
         interactive=True
     )
     output_text = gr.Textbox(
         label="📝 Extracted Text",
         lines=4,
@@ -547,7 +275,7 @@ with gr.Blocks(title="💊 Medicine Extraction", theme=gr.themes.Soft()) as demo
         label="💊 Extracted Medicines/Drugs",
         placeholder="Medicine/drug names will appear here...",
         lines=2,
-        max_lines=5,
         interactive=False,
         show_copy_button=True
     )
@@ -558,7 +286,7 @@ with gr.Blocks(title="💊 Medicine Extraction", theme=gr.themes.Soft()) as demo
         interactive=False
     )
     page_info = gr.Markdown(
-        value="" # Info of PDF page
     )
     rendered_image = gr.Image(
         label="Processed Image (Thresholded for OCR)",
@@ -571,7 +299,7 @@ with gr.Blocks(title="💊 Medicine Extraction", theme=gr.themes.Soft()) as demo
     submit_btn.click(
         fn=process_input,
-        inputs=[file_input, temperature, page_slider],
         outputs=[output_text, medicines_output, raw_output, page_info, rendered_image, num_pages]
     )
@@ -586,6 +314,278 @@ if __name__ == "__main__":
 ##########################################   #############################################################

 #################################################################################################
 import subprocess
 import sys
     LightOnOCRProcessor,
 )
 from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+import re
 device = "cuda" if torch.cuda.is_available() else "cpu"
 if device == "cuda":
     return cleaned
 def preprocess_image_for_ocr(image):
     image_rgb = image.convert("RGB")
     img_np = np.array(image_rgb)
     gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
     preprocessed_pil = Image.fromarray(adaptive_threshold)
     return preprocessed_pil
+def extract_medication_lines(text):
+    """
+    Extracts medication/drug lines from text using regex.
+    Matches lines beginning with tab, tablet, cap, capsule, syrup, syp, oral, inj, injection, ointment, drops, patch, sol, solution, etc.
+    Handles case-insensitivity and abbreviations like T., C., tab., cap. etc.
+    """
+    # "|" means OR. (?:...) is a non-capturing group.
+    pattern = r"""^\s*    # Leading spaces allowed
+    (
+        T\.?|TAB\.?|TABLET      # T., T, TAB, TAB., TABLET
+        |C\.?|CAP\.?|CAPSULE    # C., C, CAP, CAP., CAPSULE
+        |SYRUP|SYP
+        |ORAL
+        |INJ\.?|INJECTION       # INJ., INJ, INJECTION
+        |OINTMENT|DROPS|PATCH|SOL\.?|SOLUTION
+    )
+    \s+[A-Z0-9 \-\(\)/,.]+      # Name/dose/other info (at least one space/letter after the pattern)
+    """
+    # Compile with re.IGNORECASE and re.VERBOSE for readability
+    med_regex = re.compile(pattern, re.IGNORECASE | re.VERBOSE)
+    meds = []
+    for line in text.split('\n'):
+        line = line.strip()
+        if med_regex.match(line):
+            meds.append(line)
+    return '\n'.join(meds)
+def extract_meds(text, use_ner):
+    """
+    Switches between Clinical NER or regex extraction.
+    Returns medications string.
+    """
+    if use_ner:
+        entities = ner_pipeline(text)
+        meds = []
+        for ent in entities:
+            if ent["entity_group"] == "treatment":
+                word = ent["word"]
+                if word.startswith("##") and meds:
+                    meds[-1] += word[2:]
+                else:
+                    meds.append(word)
+        return ", ".join(set(meds)) if meds else "None detected"
+    else:
+        return extract_medication_lines(text) or "None detected"
 @spaces.GPU
 def extract_text_from_image(image, temperature=0.2):
+    """OCR with adaptive thresholding."""
     processed_img = preprocess_image_for_ocr(image)
     chat = [
         {
     )
     with torch.no_grad():
         outputs = ocr_model.generate(**generation_kwargs)
     output_text = processor.decode(outputs[0], skip_special_tokens=True)
     cleaned_text = clean_output_text(output_text)
+    yield cleaned_text, output_text, processed_img
+def process_input(file_input, temperature, page_num, extraction_mode):
     if file_input is None:
         yield "Please upload an image or PDF first.", "", "", "", "No file!", 1
         return
             yield msg, "", msg, "", None, slider_value
             return
+    use_ner = extraction_mode == "Regex"   #"Clinical NER"
     try:
+        for cleaned_text, raw_md, processed_img in extract_text_from_image(
             image_to_process, temperature
         ):
+            meds_out = extract_meds(cleaned_text, use_ner)
+            yield cleaned_text, meds_out, raw_md, page_info, processed_img, slider_value
     except Exception as e:
         error_msg = f"Error during text extraction: {str(e)}"
         yield error_msg, "", error_msg, page_info, image_to_process, slider_value
         label="Page Number (PDF only)",
         interactive=True
     )
+    extraction_mode = gr.Radio(
+        choices=["Clinical NER", "Regex"],
+        value="Regex",
+        label="Extraction Method",
+        info="Clinical NER uses ML, Regex uses rules"
+    )
     output_text = gr.Textbox(
         label="📝 Extracted Text",
         lines=4,
         label="💊 Extracted Medicines/Drugs",
         placeholder="Medicine/drug names will appear here...",
         lines=2,
+        max_lines=10,
         interactive=False,
         show_copy_button=True
     )
         interactive=False
     )
     page_info = gr.Markdown(
+        value=""  # Info of PDF page
     )
     rendered_image = gr.Image(
         label="Processed Image (Thresholded for OCR)",
     submit_btn.click(
         fn=process_input,
+        inputs=[file_input, temperature, page_slider, extraction_mode],
         outputs=[output_text, medicines_output, raw_output, page_info, rendered_image, num_pages]
     )
+####################################################  running code only NER #######################
+#!/usr/bin/env python3
+# import subprocess
+# import sys
+# import spaces
+# import torch
+# import gradio as gr
+# from PIL import Image
+# import numpy as np
+# import cv2
+# import pypdfium2 as pdfium
+# from transformers import (
+#     LightOnOCRForConditionalGeneration,
+#     LightOnOCRProcessor,
+# )
+# from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
+# device = "cuda" if torch.cuda.is_available() else "cpu"
+# if device == "cuda":
+#     attn_implementation = "sdpa"
+#     dtype = torch.bfloat16
+# else:
+#     attn_implementation = "eager"
+#     dtype = torch.float32
+# ocr_model = LightOnOCRForConditionalGeneration.from_pretrained(
+#     "lightonai/LightOnOCR-1B-1025",
+#     attn_implementation=attn_implementation,
+#     torch_dtype=dtype,
+#     trust_remote_code=True,
+# ).to(device).eval()
+# processor = LightOnOCRProcessor.from_pretrained(
+#     "lightonai/LightOnOCR-1B-1025",
+#     trust_remote_code=True,
+# )
+# ner_tokenizer = AutoTokenizer.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
+# ner_model = AutoModelForTokenClassification.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
+# ner_pipeline = pipeline(
+#     "ner",
+#     model=ner_model,
+#     tokenizer=ner_tokenizer,
+#     aggregation_strategy="simple",
+# )
+# def render_pdf_page(page, max_resolution=1540, scale=2.77):
+#     width, height = page.get_size()
+#     pixel_width = width * scale
+#     pixel_height = height * scale
+#     resize_factor = min(1, max_resolution / pixel_width, max_resolution / pixel_height)
+#     target_scale = scale * resize_factor
+#     return page.render(scale=target_scale, rev_byteorder=True).to_pil()
+# def process_pdf(pdf_path, page_num=1):
+#     pdf = pdfium.PdfDocument(pdf_path)
+#     total_pages = len(pdf)
+#     page_idx = min(max(int(page_num) - 1, 0), total_pages - 1)
+#     page = pdf[page_idx]
+#     img = render_pdf_page(page)
+#     pdf.close()
+#     return img, total_pages, page_idx + 1
+# def clean_output_text(text):
+#     markers_to_remove = ["system", "user", "assistant"]
+#     lines = text.split('\n')
+#     cleaned_lines = []
+#     for line in lines:
+#         stripped = line.strip()
+#         if stripped.lower() not in markers_to_remove:
+#             cleaned_lines.append(line)
+#     cleaned = '\n'.join(cleaned_lines).strip()
+#     if "assistant" in text.lower():
+#         parts = text.split("assistant", 1)
+#         if len(parts) > 1:
+#             cleaned = parts[1].strip()
+#     return cleaned
+# def preprocess_image_for_ocr(image):
+#     """Convert PIL.Image to adaptive thresholded image for OCR."""
+#     image_rgb = image.convert("RGB")
+#     img_np = np.array(image_rgb)
+#     gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
+#     adaptive_threshold = cv2.adaptiveThreshold(
+#         gray,
+#         255,
+#         cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+#         cv2.THRESH_BINARY,
+#         85,
+#         35,
+#     )
+#     preprocessed_pil = Image.fromarray(adaptive_threshold)
+#     return preprocessed_pil
+# @spaces.GPU
+# def extract_text_from_image(image, temperature=0.2):
+#     """OCR + clinical NER, with preprocessing."""
+#     processed_img = preprocess_image_for_ocr(image)
+#     chat = [
+#         {
+#             "role": "user",
+#             "content": [
+#                 {"type": "image", "image": processed_img}
+#             ],
+#         }
+#     ]
+#     inputs = processor.apply_chat_template(
+#         chat,
+#         add_generation_prompt=True,
+#         tokenize=True,
+#         return_dict=True,
+#         return_tensors="pt",
+#     )
+#     # Move inputs to device
+#     inputs = {
+#         k: (
+#             v.to(device=device, dtype=dtype)
+#             if isinstance(v, torch.Tensor) and v.dtype in [torch.float32, torch.float16, torch.bfloat16]
+#             else v.to(device)
+#             if isinstance(v, torch.Tensor)
+#             else v
+#         )
+#         for k, v in inputs.items()
+#     }
+#     generation_kwargs = dict(
+#         **inputs,
+#         max_new_tokens=2048,
+#         temperature=temperature if temperature > 0 else 0.0,
+#         use_cache=True,
+#         do_sample=temperature > 0,
+#     )
+#     with torch.no_grad():
+#         outputs = ocr_model.generate(**generation_kwargs)
+#     output_text = processor.decode(outputs[0], skip_special_tokens=True)
+#     cleaned_text = clean_output_text(output_text)
+#     entities = ner_pipeline(cleaned_text)
+#     medications = []
+#     for ent in entities:
+#         if ent["entity_group"] == "treatment":
+#             word = ent["word"]
+#             if word.startswith("##") and medications:
+#                 medications[-1] += word[2:]
+#             else:
+#                 medications.append(word)
+#     medications_str = ", ".join(set(medications)) if medications else "None detected"
+#     yield cleaned_text, medications_str, output_text, processed_img
+# def process_input(file_input, temperature, page_num):
+#     if file_input is None:
+#         yield "Please upload an image or PDF first.", "", "", "", "No file!", 1
+#         return
+#     image_to_process = None
+#     page_info = ""
+#     slider_value = page_num
+#     file_path = file_input if isinstance(file_input, str) else file_input.name
+#     if file_path.lower().endswith(".pdf"):
+#         try:
+#             image_to_process, total_pages, actual_page = process_pdf(file_path, int(page_num))
+#             page_info = f"Processing page {actual_page} of {total_pages}"
+#             slider_value = actual_page
+#         except Exception as e:
+#             msg = f"Error processing PDF: {str(e)}"
+#             yield msg, "", msg, "", None, slider_value
+#             return
+#     else:
+#         try:
+#             image_to_process = Image.open(file_path)
+#             page_info = "Processing image"
+#         except Exception as e:
+#             msg = f"Error opening image: {str(e)}"
+#             yield msg, "", msg, "", None, slider_value
+#             return
+#     try:
+#         for cleaned_text, medications, raw_md, processed_img in extract_text_from_image(
+#             image_to_process, temperature
+#         ):
+#             yield cleaned_text, medications, raw_md, page_info, processed_img, slider_value
+#     except Exception as e:
+#         error_msg = f"Error during text extraction: {str(e)}"
+#         yield error_msg, "", error_msg, page_info, image_to_process, slider_value
+# def update_slider(file_input):
+#     if file_input is None:
+#         return gr.update(maximum=20, value=1)
+#     file_path = file_input if isinstance(file_input, str) else file_input.name
+#     if file_path.lower().endswith('.pdf'):
+#         try:
+#             pdf = pdfium.PdfDocument(file_path)
+#             total_pages = len(pdf)
+#             pdf.close()
+#             return gr.update(maximum=total_pages, value=1)
+#         except:
+#             return gr.update(maximum=20, value=1)
+#     else:
+#         return gr.update(maximum=1, value=1)
+# with gr.Blocks(title="💊 Medicine Extraction", theme=gr.themes.Soft()) as demo:
+#     file_input = gr.File(
+#         label="🖼️ Upload Image or PDF",
+#         file_types=[".pdf", ".png", ".jpg", ".jpeg"],
+#         type="filepath"
+#     )
+#     temperature = gr.Slider(
+#         minimum=0.0,
+#         maximum=1.0,
+#         value=0.2,
+#         step=0.05,
+#         label="Temperature"
+#     )
+#     page_slider = gr.Slider(
+#         minimum=1, maximum=20, value=1, step=1,
+#         label="Page Number (PDF only)",
+#         interactive=True
+#     )
+#     output_text = gr.Textbox(
+#         label="📝 Extracted Text",
+#         lines=4,
+#         max_lines=10,
+#         interactive=False,
+#         show_copy_button=True
+#     )
+#     medicines_output = gr.Textbox(
+#         label="💊 Extracted Medicines/Drugs",
+#         placeholder="Medicine/drug names will appear here...",
+#         lines=2,
+#         max_lines=5,
+#         interactive=False,
+#         show_copy_button=True
+#     )
+#     raw_output = gr.Textbox(
+#         label="Raw Model Output",
+#         lines=2,
+#         max_lines=5,
+#         interactive=False
+#     )
+#     page_info = gr.Markdown(
+#         value="" # Info of PDF page
+#     )
+#     rendered_image = gr.Image(
+#         label="Processed Image (Thresholded for OCR)",
+#         interactive=False
+#     )
+#     num_pages = gr.Number(
+#         value=1, label="Current Page (slider)", visible=False
+#     )
+#     submit_btn = gr.Button("Extract Medicines", variant="primary")
+#     submit_btn.click(
+#         fn=process_input,
+#         inputs=[file_input, temperature, page_slider],
+#         outputs=[output_text, medicines_output, raw_output, page_info, rendered_image, num_pages]
+#     )
+#     file_input.change(
+#         fn=update_slider,
+#         inputs=[file_input],
+#         outputs=[page_slider]
+#     )
+# if __name__ == "__main__":
+#     demo.launch()
 ##########################################   #############################################################