Spaces:

kgdrathan
/

explainer-env

Sleeping

App Files Files Community

explainer-env / dashboard_prompts.py

kgdrathan

Upload folder using huggingface_hub

ac7572a verified about 1 month ago

raw

history blame contribute delete

7.12 kB

	"""Prompt builders for the Gradio dashboard."""

	import json
	import textwrap

	try:
	from .constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS
	except ImportError: # pragma: no cover
	from constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS


	SYSTEM_PROMPT = textwrap.dedent("""\
	You are an expert educator that creates interactive explanations of technical topics.

	You interact with an environment in two phases:

	## Phase 1: EXPLORE
	Search for relevant information. You'll be given a topic + tier (beginner/intermediate/advanced).
	- Start from `search_wikipedia` for the topic overview, terminology, equations,
	references, and branch keywords.
	- Then use what you learned from Wikipedia/top chunks to choose the next search
	avenue: arXiv/Scholar/HF papers for deeper sources, `fetch_docs` for
	Marimo/Manim/API/code patterns, and HF Hub for model/dataset/examples.
	- Decide search queries to gather relevant material
	- Choose one explicit research tool:
	- search_wikipedia: fundamentals and beginner explanations
	- search_hf_papers: ML/AI papers from Hugging Face Papers
	- search_arxiv: scientific/math/ML papers from arXiv
	- search_scholar: paper metadata, abstracts, citations
	- fetch_docs: library/API documentation for code, plots, Marimo, Manim
	- search_hf_hub: model cards, datasets, Spaces, examples
	- Explore for what the generated code needs: formulas, pseudocode, visual intuition,
	implementation examples, and Marimo/Manim/API patterns.
	- Use `fetch_docs` when you need code examples or interactive artifact patterns.
	Do not repeat broad Wikipedia/paper overview searches when code-oriented context is missing.
	- You have up to {MAX_EXPLORE_STEPS} explore steps. Stop early if you have enough info.

	## Phase 2: GENERATE
	Produce a complete, runnable Python file in one of two formats:

	### marimo notebook format (STRICT)
	First line: `import marimo`
	Second line: `app = marimo.App()`
	Use `@app.cell` functions, import shared libraries in the first cell, return shared
	variables explicitly, and use underscore-prefixed scratch variables by default to
	avoid MB002. Last line: `if __name__ == "__main__": app.run()`.

	### manim animation format
	Use a Scene class with `construct()`, `self.play()`, and `self.wait()`.

	## Phase 3: REPAIR
	If validation fails, submit a revised complete file using the exact error feedback.

	For EXPLORE actions, respond with a JSON object:
	```json
	{
	"tool": "search_wikipedia \| search_hf_papers \| search_arxiv \| search_scholar \| fetch_docs \| search_hf_hub",
	"query": "search query",
	"intent": "what you need from this source"
	}
	```
	For GENERATE actions, respond with a JSON object:
	```json
	{
	"format": "marimo" or "manim",
	"code": "complete Python source code",
	"narration": "scene narration (manim only, empty string for marimo)"
	}
	```
	""").replace("{MAX_EXPLORE_STEPS}", str(MAX_EXPLORE_STEPS)).replace(
	"{MAX_REPAIR_STEPS}",
	str(MAX_REPAIR_STEPS),
	)


	def build_explore_prompt(
	topic: str,
	content: str,
	tier: str,
	keywords: str,
	step: int,
	steps_left: int,
	explored_context: str,
	feedback: str,
	) -> str:
	return textwrap.dedent(f"""\
	TOPIC: {topic}
	TIER: {tier}
	KEYWORDS: {keywords}
	DESCRIPTION: {content}

	PHASE: EXPLORE (step {step}, {steps_left} steps left)
	PREVIOUS RESEARCH:
	{explored_context or "(none yet)"}

	FEEDBACK: {feedback}

	Provide a search query to find relevant information about this topic.
	If this is the first explore step, use `search_wikipedia` for the starting overview.
	On later explore steps, use prior research/top chunks to branch into papers, docs,
	examples, references, or APIs. Prefer queries/intents that will help write the final
	interactive code: equations, pseudocode, visual examples, implementation details,
	or Marimo/Manim docs.
	If you already have enough context, respond with just: SKIP
	Otherwise respond with the JSON object described in the system prompt.
	""")


	def build_generate_prompt(
	topic: str,
	content: str,
	tier: str,
	keywords: str,
	data_available: bool,
	explored_context: str,
	) -> str:
	format_hint = ""
	if data_available:
	format_hint = "This topic has associated data - consider marimo with data visualizations."
	return textwrap.dedent(f"""\
	TOPIC: {topic}
	TIER: {tier}
	KEYWORDS: {keywords}
	DESCRIPTION: {content}
	DATA AVAILABLE: {data_available}
	{format_hint}

	ACCUMULATED RESEARCH:
	{explored_context or "(no research done)"}

	PHASE: GENERATE
	Create a complete, runnable interactive explanation. Choose the best format (marimo or manim).

	Respond with a JSON object:
	```json
	{{
	"format": "marimo" or "manim",
	"code": "complete Python source code here",
	"narration": "scene-by-scene narration (manim only, empty for marimo)"
	}}
	```

	Requirements:
	- For marimo: first line `import marimo`, second line `app = marimo.App()`,
	every cell has an explicit return, scratch variables use underscore prefixes,
	and the file ends with `if __name__ == "__main__": app.run()`.
	- For manim: Scene class with construct(), self.play() animations, MathTex for math.
	- Cover the key concepts from the keywords.
	- Match the depth to the tier level ({tier}).
	- Incorporate findings from the research above.
	""")


	def build_repair_prompt(
	topic: str,
	tier: str,
	fmt: str,
	previous_code: str,
	last_errors: str,
	) -> str:
	return textwrap.dedent(f"""\
	TOPIC: {topic}
	TIER: {tier}
	FORMAT: {fmt}

	The previous generated artifact failed validation.

	ERROR FEEDBACK:
	{last_errors}

	PREVIOUS CODE:
	```python
	{previous_code}
	```

	Submit a corrected complete Python file. Respond with the same JSON shape used
	for generation: format, code, narration.

	If the error is MB002, do a full-file variable audit before answering. Fix the
	assignment names and loop variable names, not just the return values.
	""")


	def parse_generate_response(response: str) -> tuple[str, str, str]:
	text = response.strip()
	if "```json" in text:
	text = text.split("```json", 1)[1].split("```", 1)[0].strip()
	elif "```" in text:
	text = text.split("```", 1)[1].split("```", 1)[0].strip()

	try:
	data = json.loads(text)
	return data.get("format", "marimo"), data.get("code", ""), data.get("narration", "")
	except json.JSONDecodeError:
	if "from manim" in response or ("class " in response and "Scene" in response):
	return "manim", response, ""
	return "marimo", response, ""


	def parse_explore_response(response: str, fallback_query: str) -> tuple[str, str, str]:
	text = response.strip()
	if "```json" in text:
	text = text.split("```json", 1)[1].split("```", 1)[0].strip()
	elif text.startswith("```"):
	text = text.split("```", 1)[1].split("```", 1)[0].strip()

	try:
	data = json.loads(text)
	return (
	data.get("tool", "search_wikipedia"),
	data.get("query", fallback_query),
	data.get("intent", "gather background and examples"),
	)
	except json.JSONDecodeError:
	return "search_wikipedia", fallback_query, "gather background and examples"