#!/usr/bin/env python3 """ Test script for ZeroGPU LLM Inference API using Gradio Client Usage: python test_api_gradio_client.py """ import sys try: from gradio_client import Client except ImportError: print("❌ gradio_client not installed!") print(" Install it with: pip install gradio_client") sys.exit(1) API_URL = "https://Alovestocode-ZeroGPU-LLM-Inference.hf.space" def test_api(): """Test the API endpoint using Gradio Client""" print("=" * 60) print("Testing ZeroGPU LLM Inference API (Gradio Client)") print("=" * 60) # Connect to the space print("\n1. Connecting to space...") try: client = Client(API_URL, verbose=False) print(f" ✅ Connected to {API_URL}") except Exception as e: print(f" ❌ Failed to connect: {e}") return False # List available endpoints print(f"\n2. Available endpoints: {len(client.endpoints) if hasattr(client.endpoints, '__len__') else 'N/A'}") try: if hasattr(client, 'endpoints') and client.endpoints: if isinstance(client.endpoints, list): for i, endpoint in enumerate(client.endpoints): if isinstance(endpoint, dict): api_name = endpoint.get('api_name', f'endpoint_{i}') fn_name = endpoint.get('fn_name', 'unknown') print(f" {i+1}. API Name: {api_name}") print(f" Function: {fn_name}") else: print(f" {i+1}. Endpoint index: {endpoint}") except Exception as e: print(f" (Could not list endpoints: {e})") # Test the generate_router_plan_streaming endpoint print("\n" + "=" * 60) print("3. Testing generate_router_plan_streaming endpoint...") print("=" * 60) test_params = { 'user_task': 'Solve a quadratic equation using Python', 'context': '', 'acceptance': '- Provide step-by-step solution\n- Include code example', 'extra_guidance': '', 'difficulty': 'intermediate', 'tags': 'math, python', 'model_choice': 'Router-Qwen3-32B-AWQ', 'max_new_tokens': 512, # Smaller for quick test 'temperature': 0.2, 'top_p': 0.9 } print("\nTest Parameters:") for key, value in test_params.items(): print(f" {key}: {value}") try: print("\n Sending request... (this may take a moment)") result = client.predict( test_params['user_task'], test_params['context'], test_params['acceptance'], test_params['extra_guidance'], test_params['difficulty'], test_params['tags'], test_params['model_choice'], test_params['max_new_tokens'], test_params['temperature'], test_params['top_p'], api_name='//generate_router_plan_streaming' ) print("\n ✅ API call successful!") print(f"\n Result type: {type(result)}") if isinstance(result, (list, tuple)): print(f" Number of outputs: {len(result)}") if len(result) >= 1: raw_output = result[0] print(f"\n Output 1 (Raw Model Output):") print(f" {'-' * 56}") if isinstance(raw_output, str): preview = raw_output[:500] if len(raw_output) > 500 else raw_output print(f" {preview}") if len(raw_output) > 500: print(f" ... (truncated, total length: {len(raw_output)} chars)") else: print(f" {raw_output}") if len(result) >= 2: plan_json = result[1] print(f"\n Output 2 (Parsed Router Plan):") print(f" {'-' * 56}") if isinstance(plan_json, dict): print(f" Keys: {list(plan_json.keys())}") if plan_json: import json print(f" Content:\n{json.dumps(plan_json, indent=2)[:500]}") else: print(" (empty)") else: print(f" {plan_json}") if len(result) >= 3: validation_msg = result[2] print(f"\n Output 3 (Validation Message):") print(f" {'-' * 56}") print(f" {validation_msg}") if len(result) >= 4: prompt_view = result[3] print(f"\n Output 4 (Full Prompt):") print(f" {'-' * 56}") if isinstance(prompt_view, str): preview = prompt_view[:300] if len(prompt_view) > 300 else prompt_view print(f" {preview}") if len(prompt_view) > 300: print(f" ... (truncated)") else: print(f" {prompt_view}") else: print(f" Result: {result}") return True except Exception as e: error_msg = str(e) if "GPU quota" in error_msg or "exceeded" in error_msg.lower(): print(f"\n ⚠️ API call reached GPU quota limit (this means the API is working!)") print(f" Error: {error_msg}") print(f" ✅ API endpoint is correctly configured and accessible") return True # Consider this a success since API is working else: print(f"\n ❌ API call failed: {e}") import traceback traceback.print_exc() return False def test_clear_endpoint(): """Test the clear_outputs endpoint""" print("\n" + "=" * 60) print("4. Testing clear_outputs endpoint...") print("=" * 60) try: client = Client(API_URL, verbose=False) result = client.predict(api_name='//clear_outputs') print(" ✅ Clear endpoint working!") print(f" Result: {result}") return True except Exception as e: print(f" ⚠️ Clear endpoint test failed: {e}") return False if __name__ == "__main__": print("\n") success = test_api() # Test clear endpoint test_clear_endpoint() print("\n" + "=" * 60) if success: print("✅ API test completed successfully!") print("\nThe API is working correctly. You can use it in your code with:") print("\n from gradio_client import Client") print(f" client = Client('{API_URL}')") print(" result = client.predict(") print(" 'user_task', '', 'acceptance', '', 'intermediate', 'tags',") print(" 'Router-Qwen3-32B-AWQ', 512, 0.2, 0.9,") print(" api_name='//generate_router_plan_streaming'") print(" )") else: print("❌ API test failed.") print(" Check the error messages above for details.") print("=" * 60) print() sys.exit(0 if success else 1)