Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForCausalLM, | |
| TextIteratorStreamer, | |
| ) | |
| import threading | |
| import time | |
| # ----------------------------------------------------------------------------- | |
| # 1. MODEL LOADING | |
| # ----------------------------------------------------------------------------- | |
| # In this advanced example, we'll instantiate the model directly (instead of using pipeline). | |
| # We'll do streaming outputs via TextIteratorStreamer. | |
| MODEL_NAME = "microsoft/phi-4" # Replace with an actual HF model if phi-4 is unavailable | |
| DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto") | |
| except: | |
| # Fallback if model is not found or large. Here we default to a smaller model | |
| MODEL_NAME = "gpt2" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE) | |
| model.eval() | |
| # ----------------------------------------------------------------------------- | |
| # 2. CONVERSATION / PROMPTS | |
| # ----------------------------------------------------------------------------- | |
| # We'll keep track of conversation using a list of dictionaries: | |
| # [ | |
| # {"role": "system", "content": "..."}, | |
| # {"role": "developer", "content": "..."}, | |
| # {"role": "user", "content": "User message"}, | |
| # {"role": "assistant", "content": "Assistant answer"}, | |
| # ... | |
| # ] | |
| # | |
| # We’ll also build in a mock retrieval system that merges knowledge snippets | |
| # into the final prompt if the user chooses to do so. | |
| DEFAULT_SYSTEM_PROMPT = ( | |
| "You are Philos, an advanced AI system created by ACC (Algorithmic Computer-generated Consciousness). " | |
| "Answer user queries accurately, thoroughly, and helpfully. Keep your responses relevant and correct." | |
| ) | |
| DEFAULT_DEVELOPER_PROMPT = ( | |
| "Ensure that you respond in a style that is professional, clear, and approachable. " | |
| "Include reasoning steps if needed, but keep them concise." | |
| ) | |
| # A small dictionary to emulate knowledge retrieval | |
| # In real scenarios, you might use an actual vector DB + retrieval method | |
| MOCK_KB = { | |
| "python": "Python is a high-level, interpreted programming language famous for its readability and flexibility.", | |
| "accelerate library": "The accelerate library by HF helps in distributed training and inference.", | |
| "phi-4 architecture": "phi-4 is a 14B-parameter, decoder-only Transformer with a 16K context window.", | |
| } | |
| def retrieve_knowledge(user_query): | |
| # Simple naive approach: check keywords in user query | |
| # Return a knowledge snippet if found | |
| matches = [] | |
| for keyword, snippet in MOCK_KB.items(): | |
| if keyword.lower() in user_query.lower(): | |
| matches.append(snippet) | |
| return matches | |
| # ----------------------------------------------------------------------------- | |
| # 3. HELPER: Build the prompt from conversation | |
| # ----------------------------------------------------------------------------- | |
| def build_prompt(conversation): | |
| """ | |
| Convert conversation (list of role/content dicts) into a single text prompt | |
| that the model can process. We adopt a simple format: | |
| system, developer, user, assistant, ... | |
| """ | |
| prompt = "" | |
| for msg in conversation: | |
| if msg["role"] == "system": | |
| prompt += f"[System]\n{msg['content']}\n" | |
| elif msg["role"] == "developer": | |
| prompt += f"[Developer]\n{msg['content']}\n" | |
| elif msg["role"] == "user": | |
| prompt += f"[User]\n{msg['content']}\n" | |
| else: # assistant | |
| prompt += f"[Assistant]\n{msg['content']}\n" | |
| prompt += "[Assistant]\n" # We end with an assistant role so model can continue | |
| return prompt | |
| # ----------------------------------------------------------------------------- | |
| # 4. STREAMING GENERATION | |
| # ----------------------------------------------------------------------------- | |
| def generate_tokens_stream(prompt, temperature=0.7, top_p=0.9, max_new_tokens=128): | |
| """ | |
| Uses TextIteratorStreamer to yield tokens one by one (or in small chunks). | |
| """ | |
| streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) | |
| input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(DEVICE) | |
| generation_kwargs = dict( | |
| input_ids=input_ids, | |
| streamer=streamer, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| # We'll run generation in a background thread, streaming tokens | |
| thread = threading.Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| # Stream tokens | |
| partial_text = "" | |
| for new_token in streamer: | |
| partial_text += new_token | |
| yield partial_text | |
| thread.join() | |
| # ----------------------------------------------------------------------------- | |
| # 5. MAIN CHAT FUNCTION | |
| # ----------------------------------------------------------------------------- | |
| def advanced_chat(user_msg, conversation, system_prompt, dev_prompt, retrieve_flg, temperature, top_p): | |
| """ | |
| - Update conversation with the user's message | |
| - Optionally retrieve knowledge and incorporate into the system or developer prompt | |
| - Build the final prompt | |
| - Stream the assistant's reply | |
| """ | |
| # If user message is empty | |
| if not user_msg.strip(): | |
| yield "Please enter a message." | |
| return | |
| # 1) Construct or update system/dev prompts | |
| system_message = {"role": "system", "content": system_prompt} | |
| developer_message = {"role": "developer", "content": dev_prompt} | |
| # 2) Insert or replace system/dev in the conversation | |
| # We'll assume the first system/dev messages are at the start of conversation | |
| # or add them if not present | |
| filtered = [msg for msg in conversation if msg["role"] not in ["system", "developer"]] | |
| conversation = [system_message, developer_message] + filtered | |
| # 3) Append user's message | |
| conversation.append({"role": "user", "content": user_msg}) | |
| # 4) Retrieve knowledge if user toggled "Include knowledge retrieval" | |
| if retrieve_flg: | |
| knowledge_snippets = retrieve_knowledge(user_msg) | |
| if knowledge_snippets: | |
| # We can just append them to developer or system content for simplicity | |
| knowledge_text = "\n".join(["[Knowledge] " + s for s in knowledge_snippets]) | |
| conversation[1]["content"] += f"\n\n[Additional Knowledge]\n{knowledge_text}" | |
| # 5) Build final prompt | |
| prompt = build_prompt(conversation) | |
| # 6) Stream the assistant’s response | |
| partial_response = "" | |
| for partial_text in generate_tokens_stream(prompt, temperature, top_p): | |
| partial_response = partial_text | |
| yield partial_text # Send partial tokens to Gradio for display | |
| # 7) Now that generation is complete, append final assistant message | |
| conversation.append({"role": "assistant", "content": partial_response}) | |
| # ----------------------------------------------------------------------------- | |
| # 6. BUILD GRADIO INTERFACE | |
| # ----------------------------------------------------------------------------- | |
| def build_ui(): | |
| with gr.Blocks(title="PhilosBeta-Advanced", css="#chatbot{height:550px} .overflow-y-auto{max-height:550px}") as demo: | |
| gr.Markdown("# **PhilosBeta: Advanced Demo**") | |
| gr.Markdown( | |
| "An example of multi-turn conversation with streaming responses, " | |
| "optional retrieval, and custom system/developer prompts." | |
| ) | |
| # State to store the conversation as a list of role/content dicts | |
| conversation_state = gr.State([]) | |
| # TEXT ELEMENTS | |
| with gr.Row(): | |
| with gr.Column(): | |
| system_prompt_box = gr.Textbox( | |
| label="System Prompt", | |
| value=DEFAULT_SYSTEM_PROMPT, | |
| lines=3 | |
| ) | |
| developer_prompt_box = gr.Textbox( | |
| label="Developer Prompt", | |
| value=DEFAULT_DEVELOPER_PROMPT, | |
| lines=3 | |
| ) | |
| with gr.Column(): | |
| retrieve_flag = gr.Checkbox(label="Include Knowledge Retrieval", value=False) | |
| temperature_slider = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature") | |
| top_p_slider = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-p") | |
| max_tokens_info = gr.Markdown("Max new tokens = 128 (fixed in code).") | |
| # MAIN CHAT UI | |
| chatbox = gr.Chatbot(label="Philos Conversation", elem_id="chatbot").style(height=500) | |
| user_input = gr.Textbox( | |
| label="Your Message", | |
| placeholder="Type here...", | |
| lines=3 | |
| ) | |
| send_btn = gr.Button("Send", variant="primary") | |
| # --------------------------------------------------------------------- | |
| # ACTION: Handle user input | |
| # --------------------------------------------------------------------- | |
| def user_send( | |
| user_text, conversation, sys_prompt, dev_prompt, retrieve_flg, temperature, top_p | |
| ): | |
| """ | |
| This function calls advanced_chat() and streams tokens back to update the Chatbot UI. | |
| """ | |
| # We'll create a generator to update the Chatbot in real-time | |
| message_stream = advanced_chat( | |
| user_msg=user_text, | |
| conversation=conversation, | |
| system_prompt=sys_prompt, | |
| dev_prompt=dev_prompt, | |
| retrieve_flg=retrieve_flg, | |
| temperature=temperature, | |
| top_p=top_p | |
| ) | |
| return message_stream, conversation | |
| # Gradio can handle generator outputs for streaming. | |
| # We map the streamed tokens to the Chatbot component in real-time. | |
| chatbox_stream = gr.Chatbot.update() | |
| send_btn.click( | |
| fn=user_send, | |
| inputs=[ | |
| user_input, | |
| conversation_state, | |
| system_prompt_box, | |
| developer_prompt_box, | |
| retrieve_flag, | |
| temperature_slider, | |
| top_p_slider, | |
| ], | |
| outputs=[chatbox_stream, conversation_state], | |
| ) | |
| # We also let the user press Enter to send messages | |
| user_input.submit( | |
| fn=user_send, | |
| inputs=[ | |
| user_input, | |
| conversation_state, | |
| system_prompt_box, | |
| developer_prompt_box, | |
| retrieve_flag, | |
| temperature_slider, | |
| top_p_slider, | |
| ], | |
| outputs=[chatbox_stream, conversation_state], | |
| ) | |
| return demo | |
| # ----------------------------------------------------------------------------- | |
| # 7. LAUNCH | |
| # ----------------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| ui = build_ui() | |
| ui.launch() | |