LightOnOCR

Paused

File size: 38,886 Bytes



######################################   version  4  NER change done   #######################################################


import spaces
import gradio as gr
from PIL import Image
import numpy as np
import cv2
import re

def preprocess_image_for_ocr(image):
    image_rgb = image.convert("RGB")
    img_np = np.array(image_rgb)
    gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
    adaptive_threshold = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 85, 11,
    )
    preprocessed_pil = Image.fromarray(adaptive_threshold)
    return preprocessed_pil




import re

def extract_medication_lines(text):
    """
    Extracts medication lines robustly:
    - Matches form as T./TAB./TAB/TABLET/TABLETS, C./CAP./CAP/CAPSULE/CAPSULES, etc.
    - Floating/slash doses (e.g., 2.5MG, 10/20MG)
    - Optional second form (prefix/suffix/mid)
    - Any case
    """
    # Comprehensive form pattern (optional . or plural S)
    form = r"(T\.?|TAB\.?|TABLET(S)?|C\.?|CAP\.?|CAPSULE(S)?|SYRUP(S)?|SYP|DROP(S)?|INJ\.?|INJECTION(S)?|OINTMENT(S)?|CREAM(S)?|GEL(S)?|PATCH(ES)?|SOL\.?|SOLUTION(S)?|ORAL)"
    name = r"([A-Z0-9\-/]+(?:\s+[A-Z0-9\-/]+){0,4})"
    opt_form = fr"(?:\s+{form})?"  # allow form at end as well
    # Dose: decimal numbers, slash combos, unit, or blank
    opt_dose = r"(?:\s*\d{1,4}(?:\.\d+)?(?:/\d{1,4}(?:\.\d+)?)?\s*(mg|ml|mcg|g|kg|units|iu|%|))?"

    pattern = re.compile(
        fr"\b{form}\s+{name}{opt_form}{opt_dose}\b",
        re.IGNORECASE
    )

    lines = text.split('\n')
    matches = set()
    for line in lines:
        line = line.strip()
        for m in pattern.finditer(line):
            out = m.group(0)
            out = re.sub(r"\s+", " ", out).strip()
            matches.add(out.upper())
    return '\n'.join(matches)






def clinical_ner_extract(text, use_gpu=False):
    """
    Uses ClinicalNER for medicine name, then finds form/dose in source sentence.
    Returns clean combinations: form + entity + dose (no unwanted text).
    """
    # Load models in GPU context if required
    import torch
    from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

    device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
    tokenizer = AutoTokenizer.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
    model = AutoModelForTokenClassification.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
    ner_pipeline = pipeline(
        "ner",
        model=model,
        tokenizer=tokenizer,
        aggregation_strategy="simple",
        device=0 if device=="cuda" else -1
    )

    text_lines = text.split('\n')
    entities = ner_pipeline(text)
    meds = []
    for ent in entities:
        if ent["entity_group"] == "treatment":
            # For each detected medicine entity, scan lines for context
            entity_name = ent["word"].lower()
            for line in text_lines:
                if entity_name in line.lower():
                    # Find form and dose
                    form_match = re.search(r"(TAB(L?ET)?|CAP(SULE)?|SYRUP|SYP|DROP(S)?|INJ(CTION)?|OINTMENT|CREAM|GEL|PATCH|SOL(UTION)?|ORAL)", line, re.IGNORECASE)
                    dose_match = re.search(r"(\d{1,4} ?(mg|ml|mcg|g|kg|units|IU)|\d{1,2} ?%( ?w\/w| ?w\/v| ?v\/v)?)", line, re.IGNORECASE)
                    tokens = []
                    if form_match:
                        tokens.append(form_match.group(0).upper())
                    tokens.append(ent["word"].upper())
                    if dose_match:
                        tokens.append(dose_match.group(0))
                    meds.append(" ".join(tokens).strip())
                    break
    return '\n'.join(set(meds)) if meds else "None detected"

@spaces.GPU
def run_ocr_and_extract(image, temperature=0.2, extraction_mode="Regex"):
    # Load OCR model ONLY in GPU context!
    import torch
    from transformers import LightOnOCRForConditionalGeneration, LightOnOCRProcessor

    device = "cuda" if torch.cuda.is_available() else "cpu"
    attn = "sdpa" if device == "cuda" else "eager"
    dtype = torch.bfloat16 if device == "cuda" else torch.float32

    ocr_model = LightOnOCRForConditionalGeneration.from_pretrained(
        "lightonai/LightOnOCR-1B-1025",
        attn_implementation=attn,
        torch_dtype=dtype,
        trust_remote_code=True,
    ).to(device).eval()
    processor = LightOnOCRProcessor.from_pretrained(
        "lightonai/LightOnOCR-1B-1025",
        trust_remote_code=True,
    )

    processed_img = image

    # processed_img = preprocess_image_for_ocr(image)
    chat = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": processed_img}
            ],
        }
    ]
    inputs = processor.apply_chat_template(
        chat,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt",
    )

    inputs = {
        k: (v.to(device=device, dtype=dtype)
            if isinstance(v, torch.Tensor) and v.dtype in [torch.float32, torch.float16, torch.bfloat16]
            else v.to(device)
            if isinstance(v, torch.Tensor)
            else v)
        for k, v in inputs.items()
    }
    generation_kwargs = dict(
        **inputs,
        max_new_tokens=2048,
        temperature=temperature if temperature > 0 else 0.0,
        use_cache=True,
        do_sample=temperature > 0,
    )
    with torch.no_grad():
        outputs = ocr_model.generate(**generation_kwargs)

    output_text = processor.decode(outputs[0], skip_special_tokens=True)
    raw_text = output_text.strip()

    # Clean medicines using selected extraction method
    if extraction_mode == "Clinical NER":
        meds = clinical_ner_extract(raw_text, use_gpu=(device=="cuda"))
    else: # Regex
        meds = extract_medication_lines(raw_text)
    yield meds, raw_text, processed_img

def process_input(file_input, temperature, extraction_mode):
    if file_input is None:
        yield "Please upload an image/PDF.", "", None
        return
    image_to_process = Image.open(file_input)
    for meds_out, raw_text, processed_img in run_ocr_and_extract(image_to_process, temperature, extraction_mode):
        yield meds_out, raw_text, processed_img

with gr.Blocks(title="💊 Medicine Extraction", theme=gr.themes.Soft()) as demo:
    file_input = gr.File(
        label="Upload Image (or PDF first page for OCR)",
        file_types=[".png", ".jpg", ".jpeg"], # PDF support: requires render as image first
        type="filepath"
    )
    temperature = gr.Slider(
        minimum=0.0,
        maximum=1.0,
        value=0.2,
        step=0.05,
        label="Temperature"
    )
    extraction_mode = gr.Radio(
        choices=["Regex", "Clinical NER"],
        value="Regex",
        label="Extraction Method"
    )
    medicines_output = gr.Textbox(
        label="💊 Cleaned Medicines",
        lines=10,
        interactive=False,
        show_copy_button=True
    )
    raw_output = gr.Textbox(
        label="Raw OCR Output",
        lines=10,
        interactive=False,
        show_copy_button=True
    )
    rendered_image = gr.Image(
        label="Processed Image (Thresholded for OCR)",
        interactive=False
    )
    submit_btn = gr.Button("Extract Medicines", variant="primary")

    submit_btn.click(
        fn=process_input,
        inputs=[file_input, temperature, extraction_mode],
        outputs=[medicines_output, raw_output, rendered_image]
    )

if __name__ == "__main__":
    demo.launch()







#####################################    version  3  NER modification to be done  ############################################################

# import spaces
# import gradio as gr
# from PIL import Image
# import numpy as np
# import cv2
# import re

# import re

# def extract_medication_lines(text):
#     """
#     Extracts medication/drug lines from text using flexible regex.
#     Supports tablet, capsule, syrup, drops, injection, ointment, cream, gel, patch, solution, etc.
#     Matches dose like '1/2/10/250/500 mg/ml/mcg/g/kg' or concentration '1%/2%/0.2%/0.5%/10%' w/w, w/v, v/v.
#     """

#     form_pattern = r"(TAB(L?ET)?|CAP(SULE)?|SYRUP|SYP|DROP(S)?|INJ(CTION)?|OINTMENT|CREAM|GEL|PATCH|SOL(UTION)?|ORAL)"
#     # Drug name: starts with a word (alphanumeric, maybe a hyphen), up to 4 words (spaces, hyphens or slash)
#     name_pattern = r"([A-Z0-9\-/]+(?:\s+[A-Z0-9\-/]+){0,4})"
#     # Dose: e.g., 250mg, 10ml, 0.5%, 10 mcg, 150mcg, etc. and concentration/w/w/w/v/etc.
#     dose_pattern = r"(\d{1,4}\s*(mg|ml|mcg|g|kg|units|IU)|\d{1,2}\s*%(\s*w\/w|\s*w\/v|\s*v\/v)?)"
#     # concentration can appear for creams/gels: e.g. "1% w/w", "2%"

#     # Main pattern: will attempt to capture form anywhere, then name, then dose/concentration
#     main_pattern = (
#         r"(?:" + form_pattern + r"\s+)?" +          # Form prefix optional
#         name_pattern + r"\s*" +
#         r"(?:" + form_pattern + r"\s*)?" +          # Form mid/suffix optional
#         r"(?:" + dose_pattern + r")"                # Dose/concentration required
#     )

#     med_regex = re.compile(main_pattern, re.IGNORECASE)

#     meds = []
#     for line in text.split('\n'):
#         line_stripped = line.strip()
#         match = med_regex.search(line_stripped)
#         if match:
#             meds.append(line_stripped)
#     return '\n'.join(meds)


# ###########################    added NER modification to be done ###################################

# def get_medicine_context(entities, text_lines):
#     """
#     For each medicine entity detected by NER, find its form and dose context from its source line.
#     Returns list of strings like 'TAB ALDACTONE 25MG'.
#     """
#     output = []
#     for ent in entities:
#         if ent["entity_group"] == "treatment":
#             # Find line containing the entity's word (robust for multiline output)
#             for line in text_lines:
#                 if ent["word"].lower() in line.lower():
#                     # Search line for context
#                     match = re.search(r"((TAB(L?ET)?|CAP(SULE)?|SYRUP|SYP|DROP(S)?|INJ(CTION)?|OINTMENT|CREAM|GEL|PATCH|SOL(UTION)?|ORAL).{0,40})", line, re.IGNORECASE)
#                     dose = re.search(r"\d{1,4}\s*(mg|ml|mcg|g|kg|units|IU)|\d{1,2}\s*%(\s*w\/w|\s*w\/v|\s*v\/v)?", line, re.IGNORECASE)
#                     info = []
#                     if match:
#                         info.append(match.group(0).strip())
#                     else:
#                         info.append(ent["word"].strip())
#                     if dose:
#                         info.append(dose.group(0).strip())
#                     output.append(" ".join(info))
#                     break
#     return "\n".join(set(output)) if output else "None detected"


# ################################



# def preprocess_image_for_ocr(image):
#     image_rgb = image.convert("RGB")
#     img_np = np.array(image_rgb)
#     gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
#     adaptive_threshold = cv2.adaptiveThreshold(
#         gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 85,35,
#     )
#     preprocessed_pil = Image.fromarray(adaptive_threshold)
#     return preprocessed_pil

# @spaces.GPU
# def extract_text_from_image(image, temperature=0.2, use_ner=False):
#     # Import and load within GPU context!
#     import torch
#     from transformers import (
#         LightOnOCRForConditionalGeneration,
#         LightOnOCRProcessor,
#         AutoTokenizer, AutoModelForTokenClassification, pipeline,
#     )

#     device = "cuda" if torch.cuda.is_available() else "cpu"
#     attn_implementation = "sdpa" if device == "cuda" else "eager"
#     dtype = torch.bfloat16 if device == "cuda" else torch.float32

#     ocr_model = LightOnOCRForConditionalGeneration.from_pretrained(
#         "lightonai/LightOnOCR-1B-1025",
#         attn_implementation=attn_implementation,
#         torch_dtype=dtype,
#         trust_remote_code=True,
#     ).to(device).eval()

#     processor = LightOnOCRProcessor.from_pretrained(
#         "lightonai/LightOnOCR-1B-1025",
#         trust_remote_code=True,
#     )
#     # NER only if requested
#     if use_ner:
#         ner_tokenizer = AutoTokenizer.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
#         ner_model = AutoModelForTokenClassification.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
#         ner_pipeline = pipeline(
#             "ner", model=ner_model, tokenizer=ner_tokenizer, aggregation_strategy="simple"
#         )

#     processed_img = preprocess_image_for_ocr(image)

#     chat = [
#         {
#             "role": "user",
#             "content": [
#                 {"type": "image", "image": processed_img}
#             ],
#         }
#     ]
#     inputs = processor.apply_chat_template(
#         chat,
#         add_generation_prompt=True,
#         tokenize=True,
#         return_dict=True,
#         return_tensors="pt",
#     )

#     inputs = {
#         k: (v.to(device=device, dtype=dtype)
#             if isinstance(v, torch.Tensor) and v.dtype in [torch.float32, torch.float16, torch.bfloat16]
#             else v.to(device)
#             if isinstance(v, torch.Tensor)
#             else v)
#         for k, v in inputs.items()
#     }
#     generation_kwargs = dict(
#         **inputs,
#         max_new_tokens=2048,
#         temperature=temperature if temperature > 0 else 0.0,
#         use_cache=True,
#         do_sample=temperature > 0,
#     )
#     with torch.no_grad():
#         outputs = ocr_model.generate(**generation_kwargs)

#     output_text = processor.decode(outputs[0], skip_special_tokens=True)
#     cleaned_text = output_text.strip()
#     # Extract medicines
#     if use_ner:
#         entities = ner_pipeline(cleaned_text)
#         meds = []
#         for ent in entities:
#             if ent["entity_group"] == "treatment":
#                 word = ent["word"]
#                 if word.startswith("##") and meds:
#                     meds[-1] += word[2:]
#                 else:
#                     meds.append(word)
#         result_meds = ", ".join(set(meds)) if meds else "None detected"
#     else:
#         result_meds = extract_medication_lines(cleaned_text) or "None detected"

#     yield result_meds, processed_img  # Only medicines and processed image

# def process_input(file_input, temperature, page_num, extraction_mode):
#     if file_input is None:
#         yield "Please upload an image or PDF first.", None
#         return
#     image_to_process = Image.open(file_input) if not str(file_input).lower().endswith(".pdf") else None  # simplify to image only
#     use_ner = extraction_mode == "Clinical NER"

#     for meds_out, processed_img in extract_text_from_image(image_to_process, temperature, use_ner):
#         yield meds_out, processed_img

# with gr.Blocks(title="💊 Medicine Extraction", theme=gr.themes.Soft()) as demo:
#     file_input = gr.File(
#         label="🖼️ Upload Image",
#         file_types=[".png", ".jpg", ".jpeg"],
#         type="filepath"
#     )
#     temperature = gr.Slider(
#         minimum=0.0,
#         maximum=1.0,
#         value=0.2,
#         step=0.05,
#         label="Temperature"
#     )
#     extraction_mode = gr.Radio(
#         choices=["Clinical NER", "Regex"],
#         value="Regex",
#         label="Extraction Method",
#         info="Clinical NER uses ML, Regex uses rules"
#     )
#     medicines_output = gr.Textbox(
#         label="💊 Extracted Medicines/Drugs",
#         placeholder="Medicine/drug names will appear here...",
#         lines=2,
#         max_lines=10,
#         interactive=False,
#         show_copy_button=True
#     )
#     rendered_image = gr.Image(
#         label="Processed Image (Adaptive Thresholded for OCR)",
#         interactive=False
#     )
#     submit_btn = gr.Button("Extract Medicines", variant="primary")

#     page_slider = gr.Slider(minimum=1, maximum=20, value=1, step=1, label="Page Number")

#     submit_btn.click(
#     fn=process_input,
#     inputs=[file_input, temperature, page_slider, extraction_mode],
#     outputs=[medicines_output, rendered_image]
# )


# if __name__ == "__main__":
#     demo.launch()



####################################################  running code only NER #######################

#!/usr/bin/env python3

# import subprocess
# import sys

# import spaces
# import torch

# import gradio as gr
# from PIL import Image
# import numpy as np
# import cv2
# import pypdfium2 as pdfium
# from transformers import (
#     LightOnOCRForConditionalGeneration,
#     LightOnOCRProcessor,
# )
# from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

# device = "cuda" if torch.cuda.is_available() else "cpu"
# if device == "cuda":
#     attn_implementation = "sdpa"
#     dtype = torch.bfloat16
# else:
#     attn_implementation = "eager"
#     dtype = torch.float32

# ocr_model = LightOnOCRForConditionalGeneration.from_pretrained(
#     "lightonai/LightOnOCR-1B-1025",
#     attn_implementation=attn_implementation,
#     torch_dtype=dtype,
#     trust_remote_code=True,
# ).to(device).eval()

# processor = LightOnOCRProcessor.from_pretrained(
#     "lightonai/LightOnOCR-1B-1025",
#     trust_remote_code=True,
# )

# ner_tokenizer = AutoTokenizer.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
# ner_model = AutoModelForTokenClassification.from_pretrained("samrawal/bert-base-uncased_clinical-ner")
# ner_pipeline = pipeline(
#     "ner",
#     model=ner_model,
#     tokenizer=ner_tokenizer,
#     aggregation_strategy="simple",
# )

# def render_pdf_page(page, max_resolution=1540, scale=2.77):
#     width, height = page.get_size()
#     pixel_width = width * scale
#     pixel_height = height * scale
#     resize_factor = min(1, max_resolution / pixel_width, max_resolution / pixel_height)
#     target_scale = scale * resize_factor
#     return page.render(scale=target_scale, rev_byteorder=True).to_pil()

# def process_pdf(pdf_path, page_num=1):
#     pdf = pdfium.PdfDocument(pdf_path)
#     total_pages = len(pdf)
#     page_idx = min(max(int(page_num) - 1, 0), total_pages - 1)
#     page = pdf[page_idx]
#     img = render_pdf_page(page)
#     pdf.close()
#     return img, total_pages, page_idx + 1

# def clean_output_text(text):
#     markers_to_remove = ["system", "user", "assistant"]
#     lines = text.split('\n')
#     cleaned_lines = []
#     for line in lines:
#         stripped = line.strip()
#         if stripped.lower() not in markers_to_remove:
#             cleaned_lines.append(line)
#     cleaned = '\n'.join(cleaned_lines).strip()
#     if "assistant" in text.lower():
#         parts = text.split("assistant", 1)
#         if len(parts) > 1:
#             cleaned = parts[1].strip()
#     return cleaned

# def preprocess_image_for_ocr(image):
#     """Convert PIL.Image to adaptive thresholded image for OCR."""
#     image_rgb = image.convert("RGB")
#     img_np = np.array(image_rgb)
#     gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
#     adaptive_threshold = cv2.adaptiveThreshold(
#         gray,
#         255,
#         cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
#         cv2.THRESH_BINARY,
#         85,
#         35,
#     )
#     preprocessed_pil = Image.fromarray(adaptive_threshold)
#     return preprocessed_pil

# @spaces.GPU
# def extract_text_from_image(image, temperature=0.2):
#     """OCR + clinical NER, with preprocessing."""
#     processed_img = preprocess_image_for_ocr(image)
#     chat = [
#         {
#             "role": "user",
#             "content": [
#                 {"type": "image", "image": processed_img}
#             ],
#         }
#     ]
#     inputs = processor.apply_chat_template(
#         chat,
#         add_generation_prompt=True,
#         tokenize=True,
#         return_dict=True,
#         return_tensors="pt",
#     )
#     # Move inputs to device
#     inputs = {
#         k: (
#             v.to(device=device, dtype=dtype)
#             if isinstance(v, torch.Tensor) and v.dtype in [torch.float32, torch.float16, torch.bfloat16]
#             else v.to(device)
#             if isinstance(v, torch.Tensor)
#             else v
#         )
#         for k, v in inputs.items()
#     }
#     generation_kwargs = dict(
#         **inputs,
#         max_new_tokens=2048,
#         temperature=temperature if temperature > 0 else 0.0,
#         use_cache=True,
#         do_sample=temperature > 0,
#     )
#     with torch.no_grad():
#         outputs = ocr_model.generate(**generation_kwargs)

#     output_text = processor.decode(outputs[0], skip_special_tokens=True)
#     cleaned_text = clean_output_text(output_text)
#     entities = ner_pipeline(cleaned_text)
#     medications = []
#     for ent in entities:
#         if ent["entity_group"] == "treatment":
#             word = ent["word"]
#             if word.startswith("##") and medications:
#                 medications[-1] += word[2:]
#             else:
#                 medications.append(word)
#     medications_str = ", ".join(set(medications)) if medications else "None detected"
#     yield cleaned_text, medications_str, output_text, processed_img

# def process_input(file_input, temperature, page_num):
#     if file_input is None:
#         yield "Please upload an image or PDF first.", "", "", "", "No file!", 1
#         return

#     image_to_process = None
#     page_info = ""
#     slider_value = page_num
#     file_path = file_input if isinstance(file_input, str) else file_input.name

#     if file_path.lower().endswith(".pdf"):
#         try:
#             image_to_process, total_pages, actual_page = process_pdf(file_path, int(page_num))
#             page_info = f"Processing page {actual_page} of {total_pages}"
#             slider_value = actual_page
#         except Exception as e:
#             msg = f"Error processing PDF: {str(e)}"
#             yield msg, "", msg, "", None, slider_value
#             return
#     else:
#         try:
#             image_to_process = Image.open(file_path)
#             page_info = "Processing image"
#         except Exception as e:
#             msg = f"Error opening image: {str(e)}"
#             yield msg, "", msg, "", None, slider_value
#             return

#     try:
#         for cleaned_text, medications, raw_md, processed_img in extract_text_from_image(
#             image_to_process, temperature
#         ):
#             yield cleaned_text, medications, raw_md, page_info, processed_img, slider_value
#     except Exception as e:
#         error_msg = f"Error during text extraction: {str(e)}"
#         yield error_msg, "", error_msg, page_info, image_to_process, slider_value

# def update_slider(file_input):
#     if file_input is None:
#         return gr.update(maximum=20, value=1)
#     file_path = file_input if isinstance(file_input, str) else file_input.name
#     if file_path.lower().endswith('.pdf'):
#         try:
#             pdf = pdfium.PdfDocument(file_path)
#             total_pages = len(pdf)
#             pdf.close()
#             return gr.update(maximum=total_pages, value=1)
#         except:
#             return gr.update(maximum=20, value=1)
#     else:
#         return gr.update(maximum=1, value=1)

# with gr.Blocks(title="💊 Medicine Extraction", theme=gr.themes.Soft()) as demo:
#     file_input = gr.File(
#         label="🖼️ Upload Image or PDF",
#         file_types=[".pdf", ".png", ".jpg", ".jpeg"],
#         type="filepath"
#     )
#     temperature = gr.Slider(
#         minimum=0.0,
#         maximum=1.0,
#         value=0.2,
#         step=0.05,
#         label="Temperature"
#     )
#     page_slider = gr.Slider(
#         minimum=1, maximum=20, value=1, step=1,
#         label="Page Number (PDF only)",
#         interactive=True
#     )
#     output_text = gr.Textbox(
#         label="📝 Extracted Text",
#         lines=4,
#         max_lines=10,
#         interactive=False,
#         show_copy_button=True
#     )
#     medicines_output = gr.Textbox(
#         label="💊 Extracted Medicines/Drugs",
#         placeholder="Medicine/drug names will appear here...",
#         lines=2,
#         max_lines=5,
#         interactive=False,
#         show_copy_button=True
#     )
#     raw_output = gr.Textbox(
#         label="Raw Model Output",
#         lines=2,
#         max_lines=5,
#         interactive=False
#     )
#     page_info = gr.Markdown(
#         value="" # Info of PDF page
#     )
#     rendered_image = gr.Image(
#         label="Processed Image (Thresholded for OCR)",
#         interactive=False
#     )
#     num_pages = gr.Number(
#         value=1, label="Current Page (slider)", visible=False
#     )
#     submit_btn = gr.Button("Extract Medicines", variant="primary")

#     submit_btn.click(
#         fn=process_input,
#         inputs=[file_input, temperature, page_slider],
#         outputs=[output_text, medicines_output, raw_output, page_info, rendered_image, num_pages]
#     )

#     file_input.change(
#         fn=update_slider,
#         inputs=[file_input],
#         outputs=[page_slider]
#     )

# if __name__ == "__main__":
#     demo.launch()




##########################################   #############################################################



# Create Gradio interface
# with gr.Blocks(title="📖 Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft()) as demo:
#     gr.Markdown(f"""
# # 📖 Image/PDF to Text Extraction with LightOnOCR

# **💡 How to use:**
# 1. Upload an image or PDF
# 2. For PDFs: select which page to extract (1-20)
# 3. Adjust temperature if needed
# 4. Click "Extract Text"

# **Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!

# **Model:** LightOnOCR-1B-1025 by LightOn AI  
# **Device:** {device.upper()}  
# **Attention:** {attn_implementation}
# """)
    
#     with gr.Row():
#         with gr.Column(scale=1):
#             file_input = gr.File(
#                 label="🖼️ Upload Image or PDF",
#                 file_types=[".pdf", ".png", ".jpg", ".jpeg"],
#                 type="filepath"
#             )
#             rendered_image = gr.Image(
#                 label="📄 Preview",
#                 type="pil",
#                 height=400,
#                 interactive=False
#             )
#             num_pages = gr.Slider(
#                 minimum=1,
#                 maximum=20,
#                 value=1,
#                 step=1,
#                 label="PDF: Page Number",
#                 info="Select which page to extract"
#             )
#             page_info = gr.Textbox(
#                 label="Processing Info",
#                 value="",
#                 interactive=False
#             )
#             temperature = gr.Slider(
#                 minimum=0.0,
#                 maximum=1.0,
#                 value=0.2,
#                 step=0.05,
#                 label="Temperature",
#                 info="0.0 = deterministic, Higher = more varied"
#             )
#             submit_btn = gr.Button("Extract Text", variant="primary")
#             clear_btn = gr.Button("Clear", variant="secondary")
        
#         with gr.Column(scale=2):
#             output_text = gr.Markdown(
#                 label="📄 Extracted Text (Rendered)",
#                 value="*Extracted text will appear here...*"
#             )
#             medications_output = gr.Textbox(
#     label="💊 Extracted Medicines/Drugs",
#     placeholder="Medicine/drug names will appear here...",
#     lines=2,
#     max_lines=5,
#     interactive=False,
#     show_copy_button=True
# )
    
#     with gr.Row():
#         with gr.Column():
#             raw_output = gr.Textbox(
#                 label="Raw Markdown Output",
#                 placeholder="Raw text will appear here...",
#                 lines=20,
#                 max_lines=30,
#                 show_copy_button=True
#             )
    
#     # Event handlers
#     submit_btn.click(
#     fn=process_input,
#     inputs=[file_input, temperature, num_pages, ],
#     outputs=[output_text, medications_output, raw_output, page_info, rendered_image, num_pages]
# )




####################################  old code to be checked #############################################

# import sys
# import threading

# import spaces
# import torch

# import gradio as gr
# from PIL import Image
# from io import BytesIO
# import pypdfium2 as pdfium
# from transformers import (
#     LightOnOCRForConditionalGeneration,
#     LightOnOCRProcessor,
#     TextIteratorStreamer,
# )

# # ---- CLINICAL NER IMPORTS ----
# import spacy

# device = "cuda" if torch.cuda.is_available() else "cpu"

# # Choose best attention implementation based on device
# if device == "cuda":
#     attn_implementation = "sdpa"  
#     dtype = torch.bfloat16
#     print("Using sdpa for GPU")
# else:
#     attn_implementation = "eager"  # Best for CPU
#     dtype = torch.float32
#     print("Using eager attention for CPU")

# # Initialize the LightOnOCR model and processor
# print(f"Loading model on {device} with {attn_implementation} attention...")
# model = LightOnOCRForConditionalGeneration.from_pretrained(
#     "lightonai/LightOnOCR-1B-1025",
#     attn_implementation=attn_implementation,
#     torch_dtype=dtype,
#     trust_remote_code=True
# ).to(device).eval()

# processor = LightOnOCRProcessor.from_pretrained(
#     "lightonai/LightOnOCR-1B-1025",
#     trust_remote_code=True
# )
# print("Model loaded successfully!")

# # ---- LOAD CLINICAL NER MODEL (BC5CDR) ----
# print("Loading clinical NER model (bc5cdr)...")
# nlp_ner = spacy.load("en_ner_bc5cdr_md")
# print("Clinical NER loaded.")

# def render_pdf_page(page, max_resolution=1540, scale=2.77):
#     """Render a PDF page to PIL Image."""
#     width, height = page.get_size()
#     pixel_width = width * scale
#     pixel_height = height * scale
#     resize_factor = min(1, max_resolution / pixel_width, max_resolution / pixel_height)
#     target_scale = scale * resize_factor
#     return page.render(scale=target_scale, rev_byteorder=True).to_pil()


# def process_pdf(pdf_path, page_num=1):
#     """Extract a specific page from PDF."""
#     pdf = pdfium.PdfDocument(pdf_path)
#     total_pages = len(pdf)
#     page_idx = min(max(int(page_num) - 1, 0), total_pages - 1)
    
#     page = pdf[page_idx]
#     img = render_pdf_page(page)
    
#     pdf.close()
#     return img, total_pages, page_idx + 1


# def clean_output_text(text):
#     """Remove chat template artifacts from output."""
#     markers_to_remove = ["system", "user", "assistant"]
#     lines = text.split('\n')
#     cleaned_lines = []
#     for line in lines:
#         stripped = line.strip()
#         # Skip lines that are just template markers
#         if stripped.lower() not in markers_to_remove:
#             cleaned_lines.append(line)
#     cleaned = '\n'.join(cleaned_lines).strip()
#     if "assistant" in text.lower():
#         parts = text.split("assistant", 1)
#         if len(parts) > 1:
#             cleaned = parts[1].strip()
#     return cleaned

# def extract_medication_names(text):
#     """Extract medication names using clinical NER (spacy: bc5cdr CHEMICAL)."""
#     doc = nlp_ner(text)
#     meds = [ent.text for ent in doc.ents if ent.label_ == "CHEMICAL"]
#     meds_unique = list(dict.fromkeys(meds))
#     return meds_unique


# @spaces.GPU
# def extract_text_from_image(image, temperature=0.2, stream=False):
#     """Extract text from image using LightOnOCR model."""
#     chat = [
#         {
#             "role": "user",
#             "content": [
#                 {"type": "image", "url": image},
#             ],
#         }
#     ]
#     inputs = processor.apply_chat_template(
#         chat,
#         add_generation_prompt=True,
#         tokenize=True,
#         return_dict=True,
#         return_tensors="pt"
#     )
#     inputs = {
#         k: v.to(device=device, dtype=dtype) if isinstance(v, torch.Tensor) and v.dtype in [torch.float32, torch.float16, torch.bfloat16]
#         else v.to(device) if isinstance(v, torch.Tensor) 
#         else v 
#         for k, v in inputs.items()
#     }
#     generation_kwargs = dict(
#         **inputs,
#         max_new_tokens=2048,
#         temperature=temperature if temperature > 0 else 0.0,
#         use_cache=True,
#         do_sample=temperature > 0,
#     )
#     if stream:
#         # Streaming generation
#         streamer = TextIteratorStreamer(
#             processor.tokenizer,
#             skip_prompt=True,
#             skip_special_tokens=True
#         )
#         generation_kwargs["streamer"] = streamer
#         thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
#         thread.start()
#         full_text = ""
#         for new_text in streamer:
#             full_text += new_text
#             cleaned_text = clean_output_text(full_text)
#             yield cleaned_text
#         thread.join()
#     else:
#         # Non-streaming generation
#         with torch.no_grad():
#             outputs = model.generate(**generation_kwargs)
#         output_text = processor.decode(outputs[0], skip_special_tokens=True)
#         cleaned_text = clean_output_text(output_text)
#         yield cleaned_text

# def process_input(file_input, temperature, page_num, enable_streaming):
#     """Process uploaded file (image or PDF) and extract medication names via OCR+NER."""
#     if file_input is None:
#         yield "Please upload an image or PDF first.", "", "", None, gr.update()
#         return
#     image_to_process = None
#     page_info = ""
#     file_path = file_input if isinstance(file_input, str) else file_input.name
#     # Handle PDF files
#     if file_path.lower().endswith('.pdf'):
#         try:
#             image_to_process, total_pages, actual_page = process_pdf(file_path, int(page_num))
#             page_info = f"Processing page {actual_page} of {total_pages}"
#         except Exception as e:
#             yield f"Error processing PDF: {str(e)}", "", "", None, gr.update()
#             return
#     # Handle image files
#     else:
#         try:
#             image_to_process = Image.open(file_path)
#             page_info = "Processing image"
#         except Exception as e:
#             yield f"Error opening image: {str(e)}", "", "", None, gr.update()
#             return
#     try:
#         for extracted_text in extract_text_from_image(image_to_process, temperature, stream=enable_streaming):
#             meds = extract_medication_names(extracted_text)
#             meds_str = "\n".join(meds) if meds else "No medications found."
#             yield meds_str, meds_str, page_info, image_to_process, gr.update()
#     except Exception as e:
#         error_msg = f"Error during text extraction: {str(e)}"
#         yield error_msg, error_msg, page_info, image_to_process, gr.update()

# def update_slider(file_input):
#     """Update page slider based on PDF page count."""
#     if file_input is None:
#         return gr.update(maximum=20, value=1)
#     file_path = file_input if isinstance(file_input, str) else file_input.name
#     if file_path.lower().endswith('.pdf'):
#         try:
#             pdf = pdfium.PdfDocument(file_path)
#             total_pages = len(pdf)
#             pdf.close()
#             return gr.update(maximum=total_pages, value=1)
#         except:
#             return gr.update(maximum=20, value=1)
#     else:
#         return gr.update(maximum=1, value=1)

# # ----- GRADIO UI -----
# with gr.Blocks(title="📖 Image/PDF OCR + Clinical NER", theme=gr.themes.Soft()) as demo:
#     gr.Markdown(f"""
# # 📖 Medication Extraction from Image/PDF with LightOnOCR + Clinical NER

# **💡 How to use:**
# 1. Upload an image or PDF
# 2. For PDFs: select which page to extract
# 3. Adjust temperature if needed
# 4. Click "Extract Medications"

# **Output:** Only medication names found in text (via NER)

# **Model:** LightOnOCR-1B-1025 by LightOn AI  
# **Device:** {device.upper()}  
# **Attention:** {attn_implementation}
# """)
#     with gr.Row():
#         with gr.Column(scale=1):
#             file_input = gr.File(
#                 label="🖼️ Upload Image or PDF",
#                 file_types=[".pdf", ".png", ".jpg", ".jpeg"],
#                 type="filepath"
#             )
#             rendered_image = gr.Image(
#                 label="📄 Preview",
#                 type="pil",
#                 height=400,
#                 interactive=False
#             )
#             num_pages = gr.Slider(
#                 minimum=1,
#                 maximum=20,
#                 value=1,
#                 step=1,
#                 label="PDF: Page Number",
#                 info="Select which page to extract"
#             )
#             page_info = gr.Textbox(
#                 label="Processing Info",
#                 value="",
#                 interactive=False
#             )
#             temperature = gr.Slider(
#                 minimum=0.0,
#                 maximum=1.0,
#                 value=0.2,
#                 step=0.05,
#                 label="Temperature",
#                 info="0.0 = deterministic, Higher = more varied"
#             )
#             enable_streaming = gr.Checkbox(
#                 label="Enable Streaming",
#                 value=True,
#                 info="Show text progressively as it's generated"
#             )
#             submit_btn = gr.Button("Extract Medications", variant="primary")
#             clear_btn = gr.Button("Clear", variant="secondary")
#         with gr.Column(scale=2):
#             output_text = gr.Markdown(
#                 label="🩺 Extracted Medication Names",
#                 value="*Medication names will appear here...*"
#             )
#     with gr.Row():
#         with gr.Column():
#             raw_output = gr.Textbox(
#                 label="Extracted Medication Names (Raw)",
#                 placeholder="Medication list will appear here...",
#                 lines=20,
#                 max_lines=30,
#                 show_copy_button=True
#             )
#     # Event handlers
#     submit_btn.click(
#         fn=process_input,
#         inputs=[file_input, temperature, num_pages, enable_streaming],
#         outputs=[output_text, raw_output, page_info, rendered_image, num_pages]
#     )
#     file_input.change(
#         fn=update_slider,
#         inputs=[file_input],
#         outputs=[num_pages]
#     )
#     clear_btn.click(
#         fn=lambda: (None, "*Medication names will appear here...*", "", "", None, 1),
#         outputs=[file_input, output_text, raw_output, page_info, rendered_image, num_pages]
#     )

# if __name__ == "__main__":
#     demo.launch()