Spaces:

mgbam
/

PhilosBeta-Advanced-Chat

Runtime error

App Files Files Community

PhilosBeta-Advanced-Chat / app.py

mgbam

Create app.py

e69fb50 verified 11 months ago

raw

history blame contribute delete

11.1 kB

	import gradio as gr
	import torch
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	TextIteratorStreamer,
	)
	import threading
	import time

	# -----------------------------------------------------------------------------
	# 1. MODEL LOADING
	# -----------------------------------------------------------------------------
	# In this advanced example, we'll instantiate the model directly (instead of using pipeline).
	# We'll do streaming outputs via TextIteratorStreamer.

	MODEL_NAME = "microsoft/phi-4" # Replace with an actual HF model if phi-4 is unavailable
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
	except:
	# Fallback if model is not found or large. Here we default to a smaller model
	MODEL_NAME = "gpt2"
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)

	model.eval()


	# -----------------------------------------------------------------------------
	# 2. CONVERSATION / PROMPTS
	# -----------------------------------------------------------------------------
	# We'll keep track of conversation using a list of dictionaries:
	# [
	# {"role": "system", "content": "..."},
	# {"role": "developer", "content": "..."},
	# {"role": "user", "content": "User message"},
	# {"role": "assistant", "content": "Assistant answer"},
	# ...
	# ]
	#
	# We’ll also build in a mock retrieval system that merges knowledge snippets
	# into the final prompt if the user chooses to do so.

	DEFAULT_SYSTEM_PROMPT = (
	"You are Philos, an advanced AI system created by ACC (Algorithmic Computer-generated Consciousness). "
	"Answer user queries accurately, thoroughly, and helpfully. Keep your responses relevant and correct."
	)

	DEFAULT_DEVELOPER_PROMPT = (
	"Ensure that you respond in a style that is professional, clear, and approachable. "
	"Include reasoning steps if needed, but keep them concise."
	)

	# A small dictionary to emulate knowledge retrieval
	# In real scenarios, you might use an actual vector DB + retrieval method
	MOCK_KB = {
	"python": "Python is a high-level, interpreted programming language famous for its readability and flexibility.",
	"accelerate library": "The accelerate library by HF helps in distributed training and inference.",
	"phi-4 architecture": "phi-4 is a 14B-parameter, decoder-only Transformer with a 16K context window.",
	}

	def retrieve_knowledge(user_query):
	# Simple naive approach: check keywords in user query
	# Return a knowledge snippet if found
	matches = []
	for keyword, snippet in MOCK_KB.items():
	if keyword.lower() in user_query.lower():
	matches.append(snippet)
	return matches

	# -----------------------------------------------------------------------------
	# 3. HELPER: Build the prompt from conversation
	# -----------------------------------------------------------------------------
	def build_prompt(conversation):
	"""
	Convert conversation (list of role/content dicts) into a single text prompt
	that the model can process. We adopt a simple format:
	system, developer, user, assistant, ...
	"""
	prompt = ""
	for msg in conversation:
	if msg["role"] == "system":
	prompt += f"[System]\n{msg['content']}\n"
	elif msg["role"] == "developer":
	prompt += f"[Developer]\n{msg['content']}\n"
	elif msg["role"] == "user":
	prompt += f"[User]\n{msg['content']}\n"
	else: # assistant
	prompt += f"[Assistant]\n{msg['content']}\n"
	prompt += "[Assistant]\n" # We end with an assistant role so model can continue
	return prompt


	# -----------------------------------------------------------------------------
	# 4. STREAMING GENERATION
	# -----------------------------------------------------------------------------
	def generate_tokens_stream(prompt, temperature=0.7, top_p=0.9, max_new_tokens=128):
	"""
	Uses TextIteratorStreamer to yield tokens one by one (or in small chunks).
	"""
	streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
	input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(DEVICE)

	generation_kwargs = dict(
	input_ids=input_ids,
	streamer=streamer,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	)

	# We'll run generation in a background thread, streaming tokens
	thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	# Stream tokens
	partial_text = ""
	for new_token in streamer:
	partial_text += new_token
	yield partial_text

	thread.join()


	# -----------------------------------------------------------------------------
	# 5. MAIN CHAT FUNCTION
	# -----------------------------------------------------------------------------
	def advanced_chat(user_msg, conversation, system_prompt, dev_prompt, retrieve_flg, temperature, top_p):
	"""
	- Update conversation with the user's message
	- Optionally retrieve knowledge and incorporate into the system or developer prompt
	- Build the final prompt
	- Stream the assistant's reply
	"""
	# If user message is empty
	if not user_msg.strip():
	yield "Please enter a message."
	return

	# 1) Construct or update system/dev prompts
	system_message = {"role": "system", "content": system_prompt}
	developer_message = {"role": "developer", "content": dev_prompt}

	# 2) Insert or replace system/dev in the conversation
	# We'll assume the first system/dev messages are at the start of conversation
	# or add them if not present
	filtered = [msg for msg in conversation if msg["role"] not in ["system", "developer"]]
	conversation = [system_message, developer_message] + filtered

	# 3) Append user's message
	conversation.append({"role": "user", "content": user_msg})

	# 4) Retrieve knowledge if user toggled "Include knowledge retrieval"
	if retrieve_flg:
	knowledge_snippets = retrieve_knowledge(user_msg)
	if knowledge_snippets:
	# We can just append them to developer or system content for simplicity
	knowledge_text = "\n".join(["[Knowledge] " + s for s in knowledge_snippets])
	conversation[1]["content"] += f"\n\n[Additional Knowledge]\n{knowledge_text}"

	# 5) Build final prompt
	prompt = build_prompt(conversation)

	# 6) Stream the assistant’s response
	partial_response = ""
	for partial_text in generate_tokens_stream(prompt, temperature, top_p):
	partial_response = partial_text
	yield partial_text # Send partial tokens to Gradio for display

	# 7) Now that generation is complete, append final assistant message
	conversation.append({"role": "assistant", "content": partial_response})


	# -----------------------------------------------------------------------------
	# 6. BUILD GRADIO INTERFACE
	# -----------------------------------------------------------------------------
	def build_ui():
	with gr.Blocks(title="PhilosBeta-Advanced", css="#chatbot{height:550px} .overflow-y-auto{max-height:550px}") as demo:

	gr.Markdown("# PhilosBeta: Advanced Demo")
	gr.Markdown(
	"An example of multi-turn conversation with streaming responses, "
	"optional retrieval, and custom system/developer prompts."
	)

	# State to store the conversation as a list of role/content dicts
	conversation_state = gr.State([])

	# TEXT ELEMENTS
	with gr.Row():
	with gr.Column():
	system_prompt_box = gr.Textbox(
	label="System Prompt",
	value=DEFAULT_SYSTEM_PROMPT,
	lines=3
	)
	developer_prompt_box = gr.Textbox(
	label="Developer Prompt",
	value=DEFAULT_DEVELOPER_PROMPT,
	lines=3
	)
	with gr.Column():
	retrieve_flag = gr.Checkbox(label="Include Knowledge Retrieval", value=False)
	temperature_slider = gr.Slider(0.0, 2.0, 0.7, step=0.1, label="Temperature")
	top_p_slider = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-p")
	max_tokens_info = gr.Markdown("Max new tokens = 128 (fixed in code).")

	# MAIN CHAT UI
	chatbox = gr.Chatbot(label="Philos Conversation", elem_id="chatbot").style(height=500)
	user_input = gr.Textbox(
	label="Your Message",
	placeholder="Type here...",
	lines=3
	)
	send_btn = gr.Button("Send", variant="primary")

	# ---------------------------------------------------------------------
	# ACTION: Handle user input
	# ---------------------------------------------------------------------
	def user_send(
	user_text, conversation, sys_prompt, dev_prompt, retrieve_flg, temperature, top_p
	):
	"""
	This function calls advanced_chat() and streams tokens back to update the Chatbot UI.
	"""
	# We'll create a generator to update the Chatbot in real-time
	message_stream = advanced_chat(
	user_msg=user_text,
	conversation=conversation,
	system_prompt=sys_prompt,
	dev_prompt=dev_prompt,
	retrieve_flg=retrieve_flg,
	temperature=temperature,
	top_p=top_p
	)
	return message_stream, conversation

	# Gradio can handle generator outputs for streaming.
	# We map the streamed tokens to the Chatbot component in real-time.
	chatbox_stream = gr.Chatbot.update()
	send_btn.click(
	fn=user_send,
	inputs=[
	user_input,
	conversation_state,
	system_prompt_box,
	developer_prompt_box,
	retrieve_flag,
	temperature_slider,
	top_p_slider,
	],
	outputs=[chatbox_stream, conversation_state],
	)

	# We also let the user press Enter to send messages
	user_input.submit(
	fn=user_send,
	inputs=[
	user_input,
	conversation_state,
	system_prompt_box,
	developer_prompt_box,
	retrieve_flag,
	temperature_slider,
	top_p_slider,
	],
	outputs=[chatbox_stream, conversation_state],
	)

	return demo


	# -----------------------------------------------------------------------------
	# 7. LAUNCH
	# -----------------------------------------------------------------------------
	if __name__ == "__main__":
	ui = build_ui()
	ui.launch()