| import traceback | |
| import gradio as gr | |
| from utils.get_RGB_image import get_RGB_image, is_online_file, steam_online_file | |
| from pdf2image import convert_from_path, convert_from_bytes | |
| def similarity_fn(document_image_1, document_image_2): | |
| xxx = 'BOTH' if document_image_1 and document_image_2 else 'ONE' if document_image_1 or document_image_2 else 'NONE' | |
| return gr.HTML(f'<pre>Similarity between the two documents: {xxx}<pre>', visible=True) | |
| def load_image(filename, page = 0): | |
| try: | |
| image = None | |
| try: | |
| if (is_online_file(filename)): | |
| image = get_RGB_image(convert_from_bytes(steam_online_file(filename))[page]) | |
| else: | |
| image = get_RGB_image(convert_from_path(filename)[page]) | |
| except: | |
| image = get_RGB_image(filename) | |
| return [ | |
| gr.Image(value=image, visible=True), | |
| None | |
| ] | |
| except: | |
| error = traceback.format_exc() | |
| return [None, gr.HTML(value=error, visible=True)] | |
| def preview_url(url, page = 0): | |
| [image, error] = load_image(url, page = page) | |
| if image: | |
| return [gr.Tabs(selected=0), image, error] | |
| else: | |
| return [gr.Tabs(selected=1), image, error] | |
| def document_view(document_number: int): | |
| gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=['center']) | |
| with gr.Tabs() as document_tabs: | |
| with gr.Tab("From Image", id=0): | |
| document = gr.Image(type="pil", label=f"Document {document_number}", visible=False) | |
| document_error_message = gr.HTML(label="Error Message", visible=False) | |
| document_preview = gr.UploadButton( | |
| "Click to PDF or Document Image", | |
| file_types=["image", ".pdf"], | |
| file_count="single") | |
| with gr.Tab("From URL", id=1): | |
| document_url = gr.Textbox( | |
| label=f"Document {document_number} URL", | |
| info="Paste a Link/URL to PDF or Document Image", | |
| placeholder="https://datasets-server.huggingface.co/.../image.jpg") | |
| document_url_error_message = gr.HTML(label="Error Message", visible=False) | |
| document_url_preview = gr.Button(value="Preview", variant="primary") | |
| document_preview.upload( | |
| fn = lambda file: load_image(file.name), | |
| inputs = [document_preview], | |
| outputs = [document, document_error_message]) | |
| document_url_preview.click( | |
| fn = preview_url, | |
| inputs = [document_url], | |
| outputs = [document_tabs, document, document_url_error_message]) | |
| return document | |
| def app(): | |
| title = 'Document Similarity Search Using Visual Layout Features' | |
| description = f"<h2>{title}<h2>" | |
| css = ''' | |
| image { max-height="86vh" !important; } | |
| .center { display: flex; flex: 1 1 auto; align-items: center; align-content: center; justify-content: center; justify-items: center; } | |
| .hr { width: 100%; display: block; padding: 0; margin: 0; background: gray; height: 4px; border: none; } | |
| ''' | |
| with gr.Blocks(title=title, css=css) as app: | |
| with gr.Row(): | |
| gr.HTML(value=description, elem_classes=['center']) | |
| with gr.Row(equal_height = False): | |
| with gr.Column(): | |
| document_1_image = document_view(1) | |
| with gr.Column(): | |
| document_2_image = document_view(2) | |
| gr.HTML('<hr/>', elem_classes=['hr']) | |
| with gr.Row(elem_classes=['center']): | |
| with gr.Column(): | |
| submit = gr.Button(value="Similarity", variant="primary") | |
| reset = gr.Button(value="Reset", variant="secondary") | |
| with gr.Column(): | |
| similarity_output = gr.HTML(visible=False) | |
| submit.click( | |
| fn=similarity_fn, | |
| inputs=[document_1_image, document_2_image], | |
| outputs=[similarity_output]) | |
| return app.launch(debug=True) |