Spaces:

jykoh
/

fromage

Build error

App Files Files Community

jykoh commited on Feb 20, 2023

Commit

a03fe94

1 Parent(s): cce1831

Bug fixes

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +19 -15
fromage/models.py +3 -2

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	.DS_Store


1	.DS_Store
2	+ venv/

app.py CHANGED Viewed

@@ -19,13 +19,15 @@ model = models.load_fromage('./', args_path, ckpt_path)
 def upload_image(state, image_input):
-    state += [(f"![](/file={image_input.name})", "(Image received. Type or ask something to continue.)")]
     input_image = Image.open(image_input.name).resize((224, 224)).convert('RGB')
-    return [state, input_image], state
 def reset():
-    return [[], None], []
 def save_image_to_local(image: Image.Image):
@@ -37,16 +39,19 @@ def save_image_to_local(image: Image.Image):
 def generate_for_prompt(input_text, state, ret_scale_factor, max_nm_rets, num_words, temperature):
     input_prompt = 'Q: ' + input_text + '\nA:'
-    input_image = state[1]
-    chat_history += input_prompt
     print('Generating for', chat_history, flush=True)
     # If an image was uploaded, prepend it to the model.
     model_inputs = None
     if input_image is not None:
-        model_inputs = [input_image, chat_history]
     else:
-        model_inputs = [chat_history]
     top_p = 1.0
     if temperature != 0.0:
@@ -74,15 +79,13 @@ def generate_for_prompt(input_text, state, ret_scale_factor, max_nm_rets, num_wo
                 response += f'<img src="/file={filename}">'
     # TODO(jykoh): Persist image inputs.
-    chat_history += ' '.join(text_outputs)
-    if chat_history[-1] != '\n':
-        chat_history += '\n'
-    state.append((input_text, response))
     # Set input image to None.
     print('state', state, flush=True)
-    return [state, None], state
 with gr.Blocks() as demo:
@@ -91,7 +94,7 @@ with gr.Blocks() as demo:
     )
     chatbot = gr.Chatbot()
-    gr_state = gr.State([[], None])  # chat_history, input_image
     with gr.Row():
         with gr.Column(scale=0.3, min_width=0):
@@ -106,7 +109,8 @@ with gr.Blocks() as demo:
             clear_btn = gr.Button("Clear History")
     text_input.submit(generate_for_prompt, [text_input, gr_state, ret_scale_factor, max_ret_images, gr_max_len, gr_temperature], [gr_state, chatbot])
     image_btn.upload(upload_image, [gr_state, image_btn], [gr_state, chatbot])
     clear_btn.click(reset, [], [gr_state, chatbot])
-demo.launch(share=False, debug=True, server_name="0.0.0.0")

 def upload_image(state, image_input):
+    conversation = state[0]
+    chat_history = state[1]
+    conversation += [(f"![](/file={image_input.name})", "")]
     input_image = Image.open(image_input.name).resize((224, 224)).convert('RGB')
+    return [conversation, chat_history, input_image], conversation
 def reset():
+    return [[], [], None], []
 def save_image_to_local(image: Image.Image):
 def generate_for_prompt(input_text, state, ret_scale_factor, max_nm_rets, num_words, temperature):
     input_prompt = 'Q: ' + input_text + '\nA:'
+    conversation = state[0]
+    chat_history = state[1]
+    input_image = state[2]
     print('Generating for', chat_history, flush=True)
     # If an image was uploaded, prepend it to the model.
     model_inputs = None
     if input_image is not None:
+        model_inputs = chat_history + [input_image]
     else:
+        model_inputs = chat_history
+    model_inputs.append(input_prompt)
     top_p = 1.0
     if temperature != 0.0:
                 response += f'<img src="/file={filename}">'
     # TODO(jykoh): Persist image inputs.
+    chat_history = model_inputs + model_outputs
+    conversation.append((input_text, response))
     # Set input image to None.
     print('state', state, flush=True)
+    print('updated state', [conversation, chat_history, None], flush=True)
+    return [conversation, chat_history, None], conversation
 with gr.Blocks() as demo:
     )
     chatbot = gr.Chatbot()
+    gr_state = gr.State([[], [], None])  # chat_history, input_image
     with gr.Row():
         with gr.Column(scale=0.3, min_width=0):
             clear_btn = gr.Button("Clear History")
     text_input.submit(generate_for_prompt, [text_input, gr_state, ret_scale_factor, max_ret_images, gr_max_len, gr_temperature], [gr_state, chatbot])
+    text_input.submit(lambda: "", None, text_input)  # Reset chatbox.
     image_btn.upload(upload_image, [gr_state, image_btn], [gr_state, chatbot])
     clear_btn.click(reset, [], [gr_state, chatbot])
+demo.launch(share=False, debug=True, server_name="127.0.0.1")

fromage/models.py CHANGED Viewed

@@ -628,13 +628,14 @@ def load_fromage(embeddings_dir: str, model_args_path: str, model_ckpt_path: str
   # Initialize tokenizer.
   tokenizer = GPT2Tokenizer.from_pretrained(model_kwargs['opt_version'])
-  tokenizer.pad_token = tokenizer.eos_token
   # Add special tokens to the model to enable [RET].
   tokenizer.add_special_tokens({"cls_token": "<|image|>"})
   tokenizer.add_tokens('[RET]')
   ret_token_idx = tokenizer('[RET]', add_special_tokens=False).input_ids
   assert len(ret_token_idx) == 1, ret_token_idx
   model_kwargs['retrieval_token_idx'] = ret_token_idx[0]
   args = namedtuple('args', model_kwargs)(**model_kwargs)
   # Initialize model for inference.
@@ -643,7 +644,7 @@ def load_fromage(embeddings_dir: str, model_args_path: str, model_ckpt_path: str
   model = model.bfloat16()
   model = model.cuda()
-  # Load pretrained linear mappings and [RET] embeddings.
   checkpoint = torch.load(model_ckpt_path)
   model.load_state_dict(checkpoint['state_dict'], strict=False)
   with torch.no_grad():

   # Initialize tokenizer.
   tokenizer = GPT2Tokenizer.from_pretrained(model_kwargs['opt_version'])
   # Add special tokens to the model to enable [RET].
   tokenizer.add_special_tokens({"cls_token": "<|image|>"})
   tokenizer.add_tokens('[RET]')
   ret_token_idx = tokenizer('[RET]', add_special_tokens=False).input_ids
   assert len(ret_token_idx) == 1, ret_token_idx
   model_kwargs['retrieval_token_idx'] = ret_token_idx[0]
+  # model_kwargs['opt_version'] = 'facebook/opt-125m'
+  # model_kwargs['visual_encoder'] = 'openai/clip-vit-base-patch32'
   args = namedtuple('args', model_kwargs)(**model_kwargs)
   # Initialize model for inference.
   model = model.bfloat16()
   model = model.cuda()
+  Load pretrained linear mappings and [RET] embeddings.
   checkpoint = torch.load(model_ckpt_path)
   model.load_state_dict(checkpoint['state_dict'], strict=False)
   with torch.no_grad():