Spaces:

docilio
/

3nhance

Sleeping

App Files Files Community

docilio commited on Aug 3

Commit

4c31195

1 Parent(s): 7171080

Add application file

Browse files

Files changed (2) hide show

app.py +69 -4
requirements.txt +4 -0

app.py CHANGED Viewed

@@ -1,7 +1,72 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+from transformers import AutoProcessor, Gemma3nForConditionalGeneration
+from PIL import Image
+import torch
+import textwrap
+# 🔄 Load model and processor
+model_id = "google/gemma-3n-e2b-it"
+processor = AutoProcessor.from_pretrained(model_id)
+model = Gemma3nForConditionalGeneration.from_pretrained(
+    model_id,
+    torch_dtype=torch.float32,
+    device_map="cpu"
+).eval()
+# 🛠️ Helper to format output
+def print_response(text: str) -> str:
+    return "\n".join(textwrap.fill(line, 100) for line in text.split("\n"))
+# 🔍 Inference function
+def predict(image: Image.Image, instruction: str) -> str:
+    messages = [
+        {
+            "role": "system",
+            "content": [{"type": "text", "text": "You are a helpful assistant that extracts fields from documents."}],
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": instruction}
+            ],
+        },
+    ]
+    inputs = processor.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt"
+    ).to(model.device)
+    input_len = inputs["input_ids"].shape[-1]
+    with torch.inference_mode():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=500,
+            do_sample=False,
+            use_cache=False  # 🔥 Fixes CPU bug
+        )
+    gen = output[0][input_len:]
+    decoded = processor.decode(gen, skip_special_tokens=True)
+    return print_response(decoded)
+# 🎛️ Gradio Interface
+demo = gr.Interface(
+    fn=predict,
+    inputs=[
+        gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(lines=2, label="Instruction", value="List visible fields as JSON array {field, value}.")
+    ],
+    outputs=gr.Textbox(label="Output"),
+    title="Gemma 3n Vision + Text",
+    description="Upload an image (e.g., a passport or form) and ask for structured output."
+)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+transformers>=4.42.0
+torch
+gradio