Alikestocode commited on
Commit
f5a609d
Β·
1 Parent(s): 4f65341

Improve streaming with incremental JSON parsing and plan end token

Browse files

- Add PLAN_END_TOKEN for explicit plan termination
- Enhance system prompt with detailed JSON structure rules
- Implement incremental JSON parsing during streaming
- Update default max_new_tokens to 16000
- Add API names for endpoints
- Improve validation feedback during streaming

Files changed (1) hide show
  1. app.py +48 -16
app.py CHANGED
@@ -14,7 +14,9 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
14
  if not HF_TOKEN:
15
  raise RuntimeError("HF_TOKEN environment variable must be set for private router checkpoints.")
16
 
17
- ROUTER_SYSTEM_PROMPT = """You are the Router Agent coordinating Math, Code, and General-Search specialists.\nEmit ONLY strict JSON with keys route_plan, route_rationale, expected_artifacts,\nthinking_outline, handoff_plan, todo_list, difficulty, tags, acceptance_criteria, metrics.\nEach route_plan entry must be a tool call (e.g., /math(...), /code(...), /general-search(...)).\nBe concise but precise. Do not include prose outside of the JSON object."""
 
 
18
 
19
  MODELS = {
20
  "Router-Qwen3-32B-8bit": {
@@ -237,23 +239,48 @@ def generate_router_plan_streaming(
237
 
238
  # Stream tokens
239
  completion = ""
 
 
 
240
  for new_text in streamer:
241
  completion += new_text
242
- yield completion, {}, "πŸ”„ Generating...", prompt
243
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  # Final processing after streaming completes
245
  thread.join()
246
-
247
- try:
248
- json_block = extract_json_from_text(completion)
249
- plan = json.loads(json_block)
250
- ok, issues = validate_router_plan(plan)
251
- validation_msg = format_validation_message(ok, issues)
252
- except Exception as exc:
253
- plan = {}
254
- validation_msg = f"❌ JSON parsing failed: {exc}"
255
-
256
- yield completion, plan, validation_msg, prompt
 
 
257
 
258
  except Exception as exc:
259
  error_msg = f"❌ Generation failed: {str(exc)}"
@@ -316,7 +343,7 @@ def build_ui():
316
  placeholder="Comma-separated e.g. calculus, optimization, python",
317
  value="calculus, optimization, python",
318
  )
319
- max_new_tokens = gr.Slider(256, 20000, value=640, step=32, label="Max New Tokens")
320
  temperature = gr.Slider(0.0, 1.5, value=0.2, step=0.05, label="Temperature")
321
  top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
322
 
@@ -345,9 +372,14 @@ def build_ui():
345
  ],
346
  outputs=[raw_output, plan_json, validation_msg, prompt_view],
347
  show_progress="full",
 
348
  )
349
 
350
- clear_btn.click(fn=clear_outputs, outputs=[raw_output, plan_json, validation_msg, prompt_view])
 
 
 
 
351
 
352
  return demo
353
 
 
14
  if not HF_TOKEN:
15
  raise RuntimeError("HF_TOKEN environment variable must be set for private router checkpoints.")
16
 
17
+ PLAN_END_TOKEN = "<|end_of_plan|>"
18
+
19
+ ROUTER_SYSTEM_PROMPT = """You are the Router Agent coordinating Math, Code, and General-Search specialists.\nEmit EXACTLY ONE strict JSON object with keys route_plan, route_rationale, expected_artifacts,\nthinking_outline, handoff_plan, todo_list, difficulty, tags, acceptance_criteria, metrics.\nRules:\n- No markdown/code fences, no natural-language prologues or epilogues.\n- route_plan must be an ordered list of tool invocations such as /math(...), /code(...), /general-search(...).\n- todo_list must map each checklist item to the responsible tool.\n- metrics must include primary and secondary arrays (add optional *_guidance fields when they exist).\n- After the closing brace of the JSON object, immediately append the sentinel <|end_of_plan|>.\nExample output:\n{\n "route_plan": ["/general-search(...)"],\n "route_rationale": "...",\n ...\n}<|end_of_plan|>\nReturn nothing else."""
20
 
21
  MODELS = {
22
  "Router-Qwen3-32B-8bit": {
 
239
 
240
  # Stream tokens
241
  completion = ""
242
+ parsed_plan: Dict[str, Any] | None = None
243
+ validation_msg = "πŸ”„ Generating..."
244
+
245
  for new_text in streamer:
246
  completion += new_text
247
+ chunk = completion
248
+ finished = False
249
+ if PLAN_END_TOKEN in chunk:
250
+ chunk = chunk.split(PLAN_END_TOKEN, 1)[0]
251
+ finished = True
252
+
253
+ try:
254
+ json_block = extract_json_from_text(chunk)
255
+ candidate_plan = json.loads(json_block)
256
+ ok, issues = validate_router_plan(candidate_plan)
257
+ validation_msg = format_validation_message(ok, issues)
258
+ parsed_plan = candidate_plan if ok else candidate_plan
259
+ except Exception:
260
+ # Ignore until JSON is complete
261
+ pass
262
+
263
+ yield chunk, parsed_plan or {}, validation_msg, prompt
264
+
265
+ if finished:
266
+ completion = chunk
267
+ break
268
+
269
  # Final processing after streaming completes
270
  thread.join()
271
+
272
+ completion = completion.strip()
273
+ if parsed_plan is None:
274
+ try:
275
+ json_block = extract_json_from_text(completion)
276
+ parsed_plan = json.loads(json_block)
277
+ ok, issues = validate_router_plan(parsed_plan)
278
+ validation_msg = format_validation_message(ok, issues)
279
+ except Exception as exc:
280
+ parsed_plan = {}
281
+ validation_msg = f"❌ JSON parsing failed: {exc}"
282
+
283
+ yield completion, parsed_plan, validation_msg, prompt
284
 
285
  except Exception as exc:
286
  error_msg = f"❌ Generation failed: {str(exc)}"
 
343
  placeholder="Comma-separated e.g. calculus, optimization, python",
344
  value="calculus, optimization, python",
345
  )
346
+ max_new_tokens = gr.Slider(256, 20000, value=16000, step=32, label="Max New Tokens")
347
  temperature = gr.Slider(0.0, 1.5, value=0.2, step=0.05, label="Temperature")
348
  top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
349
 
 
372
  ],
373
  outputs=[raw_output, plan_json, validation_msg, prompt_view],
374
  show_progress="full",
375
+ api_name="/generate_router_plan_streaming",
376
  )
377
 
378
+ clear_btn.click(
379
+ fn=clear_outputs,
380
+ outputs=[raw_output, plan_json, validation_msg, prompt_view],
381
+ api_name="/clear_outputs",
382
+ )
383
 
384
  return demo
385