Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from PIL import Image | |
| import base64 | |
| import io | |
| import re | |
| from llms import LLM | |
| from typing import Union | |
| # Helper: Convert PIL Image to base64 string | |
| def image_to_base64(image: Image.Image) -> str: | |
| buffered = io.BytesIO() | |
| image.save(buffered, format="PNG") | |
| img_bytes = buffered.getvalue() | |
| img_b64 = base64.b64encode(img_bytes).decode("utf-8") | |
| return img_b64 | |
| # Helper: Extract LaTeX from LLM output | |
| def extract_latex(text): | |
| # Look for ```latex ... ``` or ``` ... ``` | |
| code_block = re.search(r"```latex\s*([\s\S]+?)```", text) | |
| if not code_block: | |
| code_block = re.search(r"```\s*([\s\S]+?)```", text) | |
| if code_block: | |
| return code_block.group(1).strip() | |
| # Look for $...$ inline | |
| dollar_block = re.search(r"\$(.+?)\$", text) | |
| if dollar_block: | |
| return dollar_block.group(1).strip() | |
| # Fallback: return everything stripped | |
| return text.strip() | |
| # Multimodal prompt template (LangChain style) | |
| MULTIMODAL_PROMPT = ( | |
| "You are an expert at recognizing mathematical equations from images. " | |
| "Given the following image (base64 encoded PNG), extract the LaTeX code for any equation present. " | |
| "If there is no equation, say 'No equation found.'\n" | |
| "Image (base64):\n{image_b64}" | |
| ) | |
| def image_to_latex_llm(image: Image.Image, model_name: str): | |
| if image is None: | |
| return "Please upload an image." | |
| buffered = io.BytesIO() | |
| image.save(buffered, format="PNG") | |
| img_bytes = buffered.getvalue() | |
| img_b64 = base64.b64encode(img_bytes).decode("utf-8") | |
| # Log base64 string (shortened) | |
| # print(f"[LOG] Image base64 (first 60 chars): {img_b64[:60]}...") | |
| prompt = [ | |
| { | |
| "type": "text", | |
| "text": ( | |
| "You are an expert at recognizing mathematical equations from images. " | |
| "Given the following image, extract the LaTeX code for any equation present. " | |
| "If there is no equation, say 'No equation found.' " | |
| "Return only the LaTeX code, and wrap it in a markdown code block with the word 'latex', like this: ```latex ... ``` ." | |
| ), | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": f"data:image/png;base64,{img_b64}", | |
| }, | |
| ] | |
| message = {"role": "user", "content": prompt} | |
| llm = LLM(model=model_name) | |
| response = llm.chat_model.invoke([message]) | |
| print(f"[LOG] Raw LLM output: {response.content}") | |
| latex_raw = response.content | |
| latex = extract_latex(latex_raw) | |
| print(f"[LOG] Extracted LaTeX: {latex}") | |
| return latex | |
| import requests | |
| import logging | |
| import os | |
| BACKEND_API_ENDPOINT = os.getenv("BACKEND_API_ENDPOINT") | |
| REQUEST_TIMEOUT = 300 | |
| def call_latex_ocr_inference(payload: dict) -> Union[dict, str]: | |
| """ | |
| Sends a POST request to the Latex OCR Modal endpoint with given payload. | |
| """ | |
| endpoint = BACKEND_API_ENDPOINT | |
| try: | |
| resp = requests.post( | |
| endpoint, | |
| json=payload, | |
| timeout=REQUEST_TIMEOUT | |
| ) | |
| resp.raise_for_status() | |
| return resp.json() | |
| except requests.exceptions.RequestException as e: | |
| logging.error(f"Error calling API: {str(e)}") | |
| return {"error": str(e)} | |
| def image_to_latex_hf(image: Image.Image,model_name): | |
| if image is None: | |
| return "Please upload an image." | |
| buffered = io.BytesIO() | |
| image.save(buffered, format="PNG") | |
| img_bytes = buffered.getvalue() | |
| img_b64 = base64.b64encode(img_bytes).decode("utf-8") | |
| payload = { | |
| "image_base64": img_b64, | |
| "text_input": "", | |
| "model_name": model_name | |
| } | |
| response = call_latex_ocr_inference(payload) | |
| print(f"[LOG] Raw response: {response}") | |
| latex_equation = response['result'] | |
| return latex_equation | |
| def image_to_latex(image: Image.Image, model_name): | |
| print("model_name", model_name) | |
| if "gemini" in model_name or "openai" in model_name or "claude" in model_name: | |
| latex_equation = image_to_latex_llm(image, model_name) | |
| else: | |
| latex_equation = image_to_latex_hf(image, model_name) | |
| rendered_latex_equation = "$$" + latex_equation + "$$" | |
| return latex_equation, rendered_latex_equation | |
| with gr.Blocks() as demo: | |
| gr.HTML(""" | |
| <div style='text-align: center; margin-top: 40px;'> | |
| <span style='font-size: 3em; font-family: serif; font-weight: 400;'>Image to LaTeX</span><br> | |
| <span style='font-size: 5em; font-family: serif; color: #1784d6; font-weight: 600;'>Converter</span><br><br> | |
| <span style='font-size: 1.2em; font-family: sans-serif;'> | |
| Upload an image containing a math equation and get the <b>LaTeX</b> code instantly.<br> | |
| <!-- Powered by <span style='color:#1784d6;'>AI</span> and LLMs. --> | |
| </span> | |
| </div> | |
| <!-- <hr style='margin: 32px 0 32px 0'> --> | |
| <div style='max-width:420px;margin:0 auto;'> | |
| """) | |
| image_input = gr.Image(type="pil", label="Upload Image", elem_id="image-input") | |
| model = gr.Dropdown( | |
| [ | |
| "gemini-2.0-flash", | |
| # "claude-3-5-sonnet", | |
| # "gpt-4o", | |
| "qwen/qwen2.5-vl-3b-instruct", | |
| "google/gemma-3-4b-it" | |
| ], | |
| value="gemini-2.0-flash", | |
| label="Model Options", | |
| elem_id="model-dropdown" | |
| ) | |
| btn = gr.Button("Convert to LaTeX", elem_id="convert-button") | |
| output = gr.Textbox(label="LaTeX Equation", elem_id="output") | |
| rendered_output = gr.Markdown(label="Rendered Equation") | |
| btn.click( | |
| fn=image_to_latex, | |
| inputs=[image_input, model], | |
| outputs=[output, rendered_output] | |
| ) | |
| gr.HTML(""" | |
| </div> | |
| <style> | |
| #image-input { | |
| max-width: 60% !important; | |
| margin: 10px auto !important; | |
| } | |
| #output { | |
| max-width: 100% !important; | |
| margin: 10px auto !important; | |
| } | |
| #convert-button { | |
| width: 160px !important; | |
| margin: 10px auto !important; | |
| } | |
| #examples { | |
| max-width: 600px !important; | |
| margin: 10px auto !important; | |
| } | |
| #examples img { | |
| height: 50px; | |
| width: auto; | |
| object-fit: contain; | |
| object-position: center; | |
| } | |
| .form { | |
| max-width: 60% !important; | |
| margin: 10px auto !important; | |
| background: transparent !important; | |
| box-shadow: none !important; | |
| } | |
| } | |
| </style> | |
| """) | |
| gr.HTML("""</div>""") | |
| gr.Examples( | |
| examples=[ | |
| ["examples/ex01.jpg"], | |
| ["examples/ex02.jpg"], | |
| ["examples/ex03.jpg"], | |
| ["examples/ex04.jpg"], | |
| ], | |
| inputs=[image_input], | |
| label="Try Examples", | |
| cache_examples=False, | |
| elem_id="examples" | |
| ) | |
| demo.launch(share=True) | |
| # def main(): | |
| # demo.launch() | |
| # if __name__ == "__main__": | |
| # main() | |