Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from openai import OpenAI | |
| import base64 | |
| from PIL import Image | |
| import io | |
| from datetime import datetime | |
| # OpenAI client setup | |
| client = OpenAI( | |
| base_url="https://openrouter.ai/api/v1", | |
| api_key='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' | |
| ) | |
| def analyze_image(image, prompt): | |
| if image is None: | |
| return "Please upload or capture an image first." | |
| # Convert image to base64 | |
| buffered = io.BytesIO() | |
| image.save(buffered, format="JPEG") | |
| img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| try: | |
| response = client.chat.completions.create( | |
| model="opengvlab/internvl3-14b:free", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": """You are Dalton, an expert AI assistant specialized in image understanding. | |
| Your tasks include: | |
| - Extracting and structuring text from images | |
| - Answering questions about image content | |
| - Providing detailed descriptions | |
| - Analyzing receipts, documents, and other visual content | |
| Be thorough, accurate, and helpful in your responses.""" | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt}, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{img_str}" | |
| } | |
| } | |
| ] | |
| } | |
| ], | |
| max_tokens=2048 | |
| ) | |
| result = response.choices[0].message.content | |
| return result | |
| except Exception as e: | |
| return f"An error occurred: {str(e)}" | |
| # Custom CSS for better mobile experience | |
| css = """ | |
| #mobile-camera { width: 100% !important; } | |
| #prompt-textbox { min-height: 100px !important; } | |
| .result-box { | |
| max-height: 500px; | |
| overflow-y: auto; | |
| padding: 15px; | |
| border: 1px solid #e0e0e0; | |
| border-radius: 8px; | |
| } | |
| .footer { | |
| margin-top: 20px; | |
| font-size: 12px; | |
| color: #666; | |
| text-align: center; | |
| } | |
| """ | |
| with gr.Blocks(css=css, title="DaltonVision - Koshur AI") as demo: | |
| gr.Markdown(""" | |
| # π§Ύ DaltonVision - InternVL3-14B | |
| ### Advanced Image Understanding β’ Powered by OpenRouter β’ Developed by [Koshur AI](https://koshurai.com) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| # Image input section | |
| image_input = gr.Image( | |
| sources=["upload", "webcam"], | |
| type="pil", | |
| label="Upload or Capture Image", | |
| elem_id="mobile-camera" | |
| ) | |
| # Prompt input | |
| prompt_input = gr.Textbox( | |
| label="π Enter your question or instruction", | |
| value="Extract all content structurally", | |
| lines=3, | |
| elem_id="prompt-textbox" | |
| ) | |
| submit_btn = gr.Button("π Analyze Image", variant="primary") | |
| gr.Examples( | |
| examples=[ | |
| ["What is the total amount on this receipt?"], | |
| ["List all items and their prices"], | |
| ["Who is the vendor and what is the date?"], | |
| ["Describe this image in detail"] | |
| ], | |
| inputs=[prompt_input], | |
| label="π‘ Try these example prompts:" | |
| ) | |
| with gr.Column(): | |
| # Result output | |
| result_output = gr.Markdown( | |
| label="β Analysis Result", | |
| elem_classes="result-box" | |
| ) | |
| # Footer | |
| gr.Markdown(""" | |
| <div class="footer"> | |
| Β© 2025 Koshur AI. All rights reserved.<br> | |
| Note: Images are processed in real-time and not stored. | |
| </div> | |
| """) | |
| # Button action | |
| submit_btn.click( | |
| fn=analyze_image, | |
| inputs=[image_input, prompt_input], | |
| outputs=result_output | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |