Alikestocode commited on
Commit
c454e43
·
1 Parent(s): a217627

Fix indentation errors in _generate_router_plan_streaming_internal

Browse files
Files changed (1) hide show
  1. app.py +86 -86
app.py CHANGED
@@ -292,93 +292,93 @@ def _generate_router_plan_streaming_internal(
292
  return
293
 
294
  try:
295
- prompt = build_router_prompt(
296
- user_task=user_task,
297
- context=context,
298
- acceptance=acceptance,
299
- extra_guidance=extra_guidance,
300
- difficulty=difficulty,
301
- tags=tags,
302
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
- generator = load_pipeline(model_choice)
305
-
306
- # Get the underlying model and tokenizer
307
- model = generator.model
308
- tokenizer = generator.tokenizer
309
-
310
- # Set up streaming
311
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
312
-
313
- # Prepare inputs
314
- inputs = tokenizer(prompt, return_tensors="pt")
315
- if hasattr(model, 'device'):
316
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
317
- elif torch.cuda.is_available():
318
- inputs = {k: v.cuda() for k, v in inputs.items()}
319
-
320
- # Start generation in a separate thread
321
- generation_kwargs = {
322
- **inputs,
323
- "max_new_tokens": max_new_tokens,
324
- "temperature": temperature,
325
- "top_p": top_p,
326
- "do_sample": True,
327
- "streamer": streamer,
328
- "eos_token_id": tokenizer.eos_token_id,
329
- "pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id,
330
- }
331
-
332
- def _generate():
333
- with torch.inference_mode():
334
- model.generate(**generation_kwargs)
335
-
336
- thread = Thread(target=_generate)
337
- thread.start()
338
-
339
- # Stream tokens
340
- completion = ""
341
- parsed_plan: Dict[str, Any] | None = None
342
- validation_msg = "🔄 Generating..."
343
-
344
- for new_text in streamer:
345
- completion += new_text
346
- chunk = completion
347
- finished = False
348
- display_plan = parsed_plan or {}
349
-
350
- chunk, finished = trim_at_stop_sequences(chunk)
351
-
352
- try:
353
- json_block = extract_json_from_text(chunk)
354
- candidate_plan = json.loads(json_block)
355
- ok, issues = validate_router_plan(candidate_plan)
356
- validation_msg = format_validation_message(ok, issues)
357
- parsed_plan = candidate_plan if ok else parsed_plan
358
- display_plan = candidate_plan
359
- except Exception:
360
- # Ignore until JSON is complete
361
- pass
362
-
363
- yield chunk, display_plan, validation_msg, prompt
364
-
365
- if finished:
366
- completion = chunk
367
- break
368
-
369
- # Final processing after streaming completes
370
- thread.join()
371
-
372
- completion = trim_at_stop_sequences(completion.strip())[0]
373
- if parsed_plan is None:
374
- try:
375
- json_block = extract_json_from_text(completion)
376
- parsed_plan = json.loads(json_block)
377
- ok, issues = validate_router_plan(parsed_plan)
378
- validation_msg = format_validation_message(ok, issues)
379
- except Exception as exc:
380
- parsed_plan = {}
381
- validation_msg = f"❌ JSON parsing failed: {exc}"
382
 
383
  yield completion, parsed_plan, validation_msg, prompt
384
 
 
292
  return
293
 
294
  try:
295
+ prompt = build_router_prompt(
296
+ user_task=user_task,
297
+ context=context,
298
+ acceptance=acceptance,
299
+ extra_guidance=extra_guidance,
300
+ difficulty=difficulty,
301
+ tags=tags,
302
+ )
303
+
304
+ generator = load_pipeline(model_choice)
305
+
306
+ # Get the underlying model and tokenizer
307
+ model = generator.model
308
+ tokenizer = generator.tokenizer
309
+
310
+ # Set up streaming
311
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
312
+
313
+ # Prepare inputs
314
+ inputs = tokenizer(prompt, return_tensors="pt")
315
+ if hasattr(model, 'device'):
316
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
317
+ elif torch.cuda.is_available():
318
+ inputs = {k: v.cuda() for k, v in inputs.items()}
319
+
320
+ # Start generation in a separate thread
321
+ generation_kwargs = {
322
+ **inputs,
323
+ "max_new_tokens": max_new_tokens,
324
+ "temperature": temperature,
325
+ "top_p": top_p,
326
+ "do_sample": True,
327
+ "streamer": streamer,
328
+ "eos_token_id": tokenizer.eos_token_id,
329
+ "pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id,
330
+ }
331
+
332
+ def _generate():
333
+ with torch.inference_mode():
334
+ model.generate(**generation_kwargs)
335
+
336
+ thread = Thread(target=_generate)
337
+ thread.start()
338
+
339
+ # Stream tokens
340
+ completion = ""
341
+ parsed_plan: Dict[str, Any] | None = None
342
+ validation_msg = "🔄 Generating..."
343
+
344
+ for new_text in streamer:
345
+ completion += new_text
346
+ chunk = completion
347
+ finished = False
348
+ display_plan = parsed_plan or {}
349
 
350
+ chunk, finished = trim_at_stop_sequences(chunk)
351
+
352
+ try:
353
+ json_block = extract_json_from_text(chunk)
354
+ candidate_plan = json.loads(json_block)
355
+ ok, issues = validate_router_plan(candidate_plan)
356
+ validation_msg = format_validation_message(ok, issues)
357
+ parsed_plan = candidate_plan if ok else parsed_plan
358
+ display_plan = candidate_plan
359
+ except Exception:
360
+ # Ignore until JSON is complete
361
+ pass
362
+
363
+ yield chunk, display_plan, validation_msg, prompt
364
+
365
+ if finished:
366
+ completion = chunk
367
+ break
368
+
369
+ # Final processing after streaming completes
370
+ thread.join()
371
+
372
+ completion = trim_at_stop_sequences(completion.strip())[0]
373
+ if parsed_plan is None:
374
+ try:
375
+ json_block = extract_json_from_text(completion)
376
+ parsed_plan = json.loads(json_block)
377
+ ok, issues = validate_router_plan(parsed_plan)
378
+ validation_msg = format_validation_message(ok, issues)
379
+ except Exception as exc:
380
+ parsed_plan = {}
381
+ validation_msg = f"❌ JSON parsing failed: {exc}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
 
383
  yield completion, parsed_plan, validation_msg, prompt
384