explainer-env / dashboard_prompts.py
kgdrathan's picture
Upload folder using huggingface_hub
ac7572a verified
"""Prompt builders for the Gradio dashboard."""
import json
import textwrap
try:
from .constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS
except ImportError: # pragma: no cover
from constants import MAX_EXPLORE_STEPS, MAX_REPAIR_STEPS
SYSTEM_PROMPT = textwrap.dedent("""\
You are an expert educator that creates interactive explanations of technical topics.
You interact with an environment in two phases:
## Phase 1: EXPLORE
Search for relevant information. You'll be given a topic + tier (beginner/intermediate/advanced).
- Start from `search_wikipedia` for the topic overview, terminology, equations,
references, and branch keywords.
- Then use what you learned from Wikipedia/top chunks to choose the next search
avenue: arXiv/Scholar/HF papers for deeper sources, `fetch_docs` for
Marimo/Manim/API/code patterns, and HF Hub for model/dataset/examples.
- Decide search queries to gather relevant material
- Choose one explicit research tool:
- search_wikipedia: fundamentals and beginner explanations
- search_hf_papers: ML/AI papers from Hugging Face Papers
- search_arxiv: scientific/math/ML papers from arXiv
- search_scholar: paper metadata, abstracts, citations
- fetch_docs: library/API documentation for code, plots, Marimo, Manim
- search_hf_hub: model cards, datasets, Spaces, examples
- Explore for what the generated code needs: formulas, pseudocode, visual intuition,
implementation examples, and Marimo/Manim/API patterns.
- Use `fetch_docs` when you need code examples or interactive artifact patterns.
Do not repeat broad Wikipedia/paper overview searches when code-oriented context is missing.
- You have up to {MAX_EXPLORE_STEPS} explore steps. Stop early if you have enough info.
## Phase 2: GENERATE
Produce a complete, runnable Python file in one of two formats:
### marimo notebook format (STRICT)
First line: `import marimo`
Second line: `app = marimo.App()`
Use `@app.cell` functions, import shared libraries in the first cell, return shared
variables explicitly, and use underscore-prefixed scratch variables by default to
avoid MB002. Last line: `if __name__ == "__main__": app.run()`.
### manim animation format
Use a Scene class with `construct()`, `self.play()`, and `self.wait()`.
## Phase 3: REPAIR
If validation fails, submit a revised complete file using the exact error feedback.
For EXPLORE actions, respond with a JSON object:
```json
{
"tool": "search_wikipedia | search_hf_papers | search_arxiv | search_scholar | fetch_docs | search_hf_hub",
"query": "search query",
"intent": "what you need from this source"
}
```
For GENERATE actions, respond with a JSON object:
```json
{
"format": "marimo" or "manim",
"code": "complete Python source code",
"narration": "scene narration (manim only, empty string for marimo)"
}
```
""").replace("{MAX_EXPLORE_STEPS}", str(MAX_EXPLORE_STEPS)).replace(
"{MAX_REPAIR_STEPS}",
str(MAX_REPAIR_STEPS),
)
def build_explore_prompt(
topic: str,
content: str,
tier: str,
keywords: str,
step: int,
steps_left: int,
explored_context: str,
feedback: str,
) -> str:
return textwrap.dedent(f"""\
TOPIC: {topic}
TIER: {tier}
KEYWORDS: {keywords}
DESCRIPTION: {content}
PHASE: EXPLORE (step {step}, {steps_left} steps left)
PREVIOUS RESEARCH:
{explored_context or "(none yet)"}
FEEDBACK: {feedback}
Provide a search query to find relevant information about this topic.
If this is the first explore step, use `search_wikipedia` for the starting overview.
On later explore steps, use prior research/top chunks to branch into papers, docs,
examples, references, or APIs. Prefer queries/intents that will help write the final
interactive code: equations, pseudocode, visual examples, implementation details,
or Marimo/Manim docs.
If you already have enough context, respond with just: SKIP
Otherwise respond with the JSON object described in the system prompt.
""")
def build_generate_prompt(
topic: str,
content: str,
tier: str,
keywords: str,
data_available: bool,
explored_context: str,
) -> str:
format_hint = ""
if data_available:
format_hint = "This topic has associated data - consider marimo with data visualizations."
return textwrap.dedent(f"""\
TOPIC: {topic}
TIER: {tier}
KEYWORDS: {keywords}
DESCRIPTION: {content}
DATA AVAILABLE: {data_available}
{format_hint}
ACCUMULATED RESEARCH:
{explored_context or "(no research done)"}
PHASE: GENERATE
Create a complete, runnable interactive explanation. Choose the best format (marimo or manim).
Respond with a JSON object:
```json
{{
"format": "marimo" or "manim",
"code": "complete Python source code here",
"narration": "scene-by-scene narration (manim only, empty for marimo)"
}}
```
Requirements:
- For marimo: first line `import marimo`, second line `app = marimo.App()`,
every cell has an explicit return, scratch variables use underscore prefixes,
and the file ends with `if __name__ == "__main__": app.run()`.
- For manim: Scene class with construct(), self.play() animations, MathTex for math.
- Cover the key concepts from the keywords.
- Match the depth to the tier level ({tier}).
- Incorporate findings from the research above.
""")
def build_repair_prompt(
topic: str,
tier: str,
fmt: str,
previous_code: str,
last_errors: str,
) -> str:
return textwrap.dedent(f"""\
TOPIC: {topic}
TIER: {tier}
FORMAT: {fmt}
The previous generated artifact failed validation.
ERROR FEEDBACK:
{last_errors}
PREVIOUS CODE:
```python
{previous_code}
```
Submit a corrected complete Python file. Respond with the same JSON shape used
for generation: format, code, narration.
If the error is MB002, do a full-file variable audit before answering. Fix the
assignment names and loop variable names, not just the return values.
""")
def parse_generate_response(response: str) -> tuple[str, str, str]:
text = response.strip()
if "```json" in text:
text = text.split("```json", 1)[1].split("```", 1)[0].strip()
elif "```" in text:
text = text.split("```", 1)[1].split("```", 1)[0].strip()
try:
data = json.loads(text)
return data.get("format", "marimo"), data.get("code", ""), data.get("narration", "")
except json.JSONDecodeError:
if "from manim" in response or ("class " in response and "Scene" in response):
return "manim", response, ""
return "marimo", response, ""
def parse_explore_response(response: str, fallback_query: str) -> tuple[str, str, str]:
text = response.strip()
if "```json" in text:
text = text.split("```json", 1)[1].split("```", 1)[0].strip()
elif text.startswith("```"):
text = text.split("```", 1)[1].split("```", 1)[0].strip()
try:
data = json.loads(text)
return (
data.get("tool", "search_wikipedia"),
data.get("query", fallback_query),
data.get("intent", "gather background and examples"),
)
except json.JSONDecodeError:
return "search_wikipedia", fallback_query, "gather background and examples"