Spaces:

Alovestocode
/

ZeroGPU-LLM-Inference

Sleeping

Alikestocode commited on Nov 9

Commit

54880b1

1 Parent(s): d6f9002

Add debug logging for model loading and generation issues

Files changed (1) hide show

app.py CHANGED Viewed

@@ -581,7 +581,9 @@ def _generate_router_plan_streaming_internal(
             tags=tags,
         )
         generator = load_pipeline(model_choice)
         # Check if using vLLM or Transformers
         is_vllm = VLLM_AVAILABLE and isinstance(generator, LLM)
@@ -714,7 +716,12 @@ def _generate_router_plan_streaming_internal(
             thread.join()
         completion = trim_at_stop_sequences(completion.strip())[0]
-        if parsed_plan is None:
             try:
                 json_block = extract_json_from_text(completion)
                 parsed_plan = json.loads(json_block)
@@ -723,10 +730,14 @@ def _generate_router_plan_streaming_internal(
             except Exception as exc:
                 parsed_plan = {}
                 validation_msg = f"❌ JSON parsing failed: {exc}"
         yield completion, parsed_plan, validation_msg, prompt
     except Exception as exc:
         error_msg = f"❌ Generation failed: {str(exc)}"
         yield "", {}, error_msg, ""

             tags=tags,
         )
+        print(f"[DEBUG] Loading model: {model_choice}")
         generator = load_pipeline(model_choice)
+        print(f"[DEBUG] Model loaded successfully: {type(generator)}")
         # Check if using vLLM or Transformers
         is_vllm = VLLM_AVAILABLE and isinstance(generator, LLM)
             thread.join()
         completion = trim_at_stop_sequences(completion.strip())[0]
+        print(f"[DEBUG] Final completion length: {len(completion)}")
+        if not completion:
+            print("[DEBUG] WARNING: Completion is empty - model may not have generated output")
+            validation_msg = "⚠️ Model generated empty output. Check GPU allocation and model loading."
+        elif parsed_plan is None:
             try:
                 json_block = extract_json_from_text(completion)
                 parsed_plan = json.loads(json_block)
             except Exception as exc:
                 parsed_plan = {}
                 validation_msg = f"❌ JSON parsing failed: {exc}"
+                print(f"[DEBUG] JSON parsing error: {exc}")
         yield completion, parsed_plan, validation_msg, prompt
     except Exception as exc:
+        import traceback
+        print(f"[DEBUG] Exception in generation: {exc}")
+        print(f"[DEBUG] Traceback: {traceback.format_exc()}")
         error_msg = f"❌ Generation failed: {str(exc)}"
         yield "", {}, error_msg, ""