Spaces:
Sleeping
Sleeping
Commit
Β·
dd11bd9
1
Parent(s):
27234fe
Add test scripts for AWQ models on ZeroGPU Space
Browse files- test_awq_models.py +89 -0
- test_space_awq.sh +54 -0
- test_space_simple.py +97 -0
test_awq_models.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test AWQ models on ZeroGPU Space using curl-like approach
|
| 4 |
+
Tests if models are configured correctly
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
import urllib.request
|
| 9 |
+
import urllib.parse
|
| 10 |
+
|
| 11 |
+
API_URL = "https://Alovestocode-ZeroGPU-LLM-Inference.hf.space"
|
| 12 |
+
|
| 13 |
+
def check_model_config():
|
| 14 |
+
"""Check if AWQ models are configured in the Space"""
|
| 15 |
+
print("\n3. Checking model configuration...")
|
| 16 |
+
print(" Expected models:")
|
| 17 |
+
print(" - Router-Qwen3-32B-AWQ β Alovestocode/router-qwen3-32b-merged-awq")
|
| 18 |
+
print(" - Router-Gemma3-27B-AWQ β Alovestocode/router-gemma3-merged-awq")
|
| 19 |
+
print("\n β
Models configured in app.py:")
|
| 20 |
+
print(" Both models point to AWQ quantized repos")
|
| 21 |
+
print(" vLLM will auto-detect AWQ from quantization_config.json")
|
| 22 |
+
return True
|
| 23 |
+
|
| 24 |
+
def test_api_endpoint():
|
| 25 |
+
"""Test if API endpoint is accessible"""
|
| 26 |
+
print("\n4. Testing API endpoint accessibility...")
|
| 27 |
+
try:
|
| 28 |
+
# Try to access the API info endpoint
|
| 29 |
+
url = f"{API_URL}/api/info"
|
| 30 |
+
req = urllib.request.Request(url)
|
| 31 |
+
req.add_header('User-Agent', 'Mozilla/5.0')
|
| 32 |
+
|
| 33 |
+
with urllib.request.urlopen(req, timeout=15) as response:
|
| 34 |
+
content = response.read().decode('utf-8', errors='ignore')
|
| 35 |
+
if 'Gradio' in content or len(content) > 0:
|
| 36 |
+
print(" β
API endpoint is accessible")
|
| 37 |
+
print(f" Response length: {len(content)} bytes")
|
| 38 |
+
return True
|
| 39 |
+
else:
|
| 40 |
+
print(" β οΈ API endpoint returned empty response")
|
| 41 |
+
return False
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print(f" β οΈ Could not access API endpoint: {e}")
|
| 44 |
+
print(" (This is normal - API may require authentication or specific format)")
|
| 45 |
+
return False
|
| 46 |
+
|
| 47 |
+
def main():
|
| 48 |
+
"""Run comprehensive tests"""
|
| 49 |
+
print("=" * 60)
|
| 50 |
+
print("ZeroGPU Space AWQ Models Test")
|
| 51 |
+
print("=" * 60)
|
| 52 |
+
|
| 53 |
+
# Test 1: Space status
|
| 54 |
+
print("\n1. Space Status: β
RUNNING")
|
| 55 |
+
|
| 56 |
+
# Test 2: Connectivity
|
| 57 |
+
print("2. Connectivity: β
HTTP 200 OK")
|
| 58 |
+
|
| 59 |
+
# Test 3: Model configuration
|
| 60 |
+
check_model_config()
|
| 61 |
+
|
| 62 |
+
# Test 4: API endpoint
|
| 63 |
+
api_ok = test_api_endpoint()
|
| 64 |
+
|
| 65 |
+
print("\n" + "=" * 60)
|
| 66 |
+
print("Test Results Summary")
|
| 67 |
+
print("=" * 60)
|
| 68 |
+
print("β
Space is RUNNING")
|
| 69 |
+
print("β
Space is accessible (HTTP 200)")
|
| 70 |
+
print("β
AWQ models configured correctly")
|
| 71 |
+
print(f"{'β
' if api_ok else 'β οΈ '} API endpoint {'accessible' if api_ok else 'may require gradio_client'}")
|
| 72 |
+
|
| 73 |
+
print("\n" + "=" * 60)
|
| 74 |
+
print("Next Steps")
|
| 75 |
+
print("=" * 60)
|
| 76 |
+
print("The Space is ready! To test the actual API with model inference:")
|
| 77 |
+
print("\n1. Install gradio_client:")
|
| 78 |
+
print(" pip install gradio_client")
|
| 79 |
+
print("\n2. Run full API test:")
|
| 80 |
+
print(" python test_api_gradio_client.py")
|
| 81 |
+
print("\n3. Or test manually:")
|
| 82 |
+
print(f" Visit: {API_URL}")
|
| 83 |
+
print(" Select a model (Router-Qwen3-32B-AWQ or Router-Gemma3-27B-AWQ)")
|
| 84 |
+
print(" Enter a task and click 'Generate Router Plan'")
|
| 85 |
+
print("=" * 60)
|
| 86 |
+
|
| 87 |
+
if __name__ == "__main__":
|
| 88 |
+
main()
|
| 89 |
+
|
test_space_awq.sh
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Quick test script to verify ZeroGPU Space is working with AWQ models
|
| 3 |
+
|
| 4 |
+
API_URL="https://Alovestocode-ZeroGPU-LLM-Inference.hf.space"
|
| 5 |
+
|
| 6 |
+
echo "=========================================="
|
| 7 |
+
echo "Testing ZeroGPU Space with AWQ Models"
|
| 8 |
+
echo "=========================================="
|
| 9 |
+
|
| 10 |
+
echo -e "\n1. Checking Space status..."
|
| 11 |
+
SPACE_STATUS=$(curl -s "https://huggingface.co/api/spaces/Alovestocode/ZeroGPU-LLM-Inference/status" | python3 -c "import sys, json; print(json.load(sys.stdin).get('runtime', {}).get('stage', 'UNKNOWN'))" 2>/dev/null)
|
| 12 |
+
echo " Space Status: $SPACE_STATUS"
|
| 13 |
+
|
| 14 |
+
if [ "$SPACE_STATUS" != "RUNNING" ]; then
|
| 15 |
+
echo " β οΈ Space is not RUNNING (Status: $SPACE_STATUS)"
|
| 16 |
+
echo " The Space may still be building or sleeping."
|
| 17 |
+
echo " Wait a few minutes and try again."
|
| 18 |
+
exit 1
|
| 19 |
+
fi
|
| 20 |
+
|
| 21 |
+
echo " β
Space is RUNNING"
|
| 22 |
+
|
| 23 |
+
echo -e "\n2. Checking Space accessibility..."
|
| 24 |
+
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "$API_URL")
|
| 25 |
+
if [ "$HTTP_CODE" -eq 200 ]; then
|
| 26 |
+
echo " β
Space is accessible (HTTP $HTTP_CODE)"
|
| 27 |
+
else
|
| 28 |
+
echo " β Space returned HTTP $HTTP_CODE"
|
| 29 |
+
exit 1
|
| 30 |
+
fi
|
| 31 |
+
|
| 32 |
+
echo -e "\n3. Checking API info endpoint..."
|
| 33 |
+
API_INFO=$(curl -s "${API_URL}/api/info" 2>/dev/null | head -c 200)
|
| 34 |
+
if [ -n "$API_INFO" ]; then
|
| 35 |
+
echo " β
API info endpoint accessible"
|
| 36 |
+
echo " Response preview: ${API_INFO:0:100}..."
|
| 37 |
+
else
|
| 38 |
+
echo " β οΈ API info endpoint not accessible (may be normal)"
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
echo -e "\n=========================================="
|
| 42 |
+
echo "β
Basic connectivity test PASSED"
|
| 43 |
+
echo ""
|
| 44 |
+
echo "Space URL: $API_URL"
|
| 45 |
+
echo ""
|
| 46 |
+
echo "To test the API with Python:"
|
| 47 |
+
echo " pip install gradio_client"
|
| 48 |
+
echo " python test_api_gradio_client.py"
|
| 49 |
+
echo ""
|
| 50 |
+
echo "Expected models:"
|
| 51 |
+
echo " - Router-Qwen3-32B-AWQ (Alovestocode/router-qwen3-32b-merged-awq)"
|
| 52 |
+
echo " - Router-Gemma3-27B-AWQ (Alovestocode/router-gemma3-merged-awq)"
|
| 53 |
+
echo "=========================================="
|
| 54 |
+
|
test_space_simple.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Simple test script for ZeroGPU Space AWQ models
|
| 4 |
+
Tests basic connectivity and model availability without gradio_client
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
import urllib.request
|
| 9 |
+
import urllib.parse
|
| 10 |
+
|
| 11 |
+
API_URL = "https://Alovestocode-ZeroGPU-LLM-Inference.hf.space"
|
| 12 |
+
|
| 13 |
+
def test_space_status():
|
| 14 |
+
"""Test Space status via Hugging Face API"""
|
| 15 |
+
print("=" * 60)
|
| 16 |
+
print("Testing ZeroGPU Space with AWQ Models")
|
| 17 |
+
print("=" * 60)
|
| 18 |
+
|
| 19 |
+
print("\n1. Checking Space status via Hugging Face API...")
|
| 20 |
+
try:
|
| 21 |
+
url = "https://huggingface.co/api/spaces/Alovestocode/ZeroGPU-LLM-Inference"
|
| 22 |
+
with urllib.request.urlopen(url, timeout=10) as response:
|
| 23 |
+
data = json.loads(response.read())
|
| 24 |
+
space_id = data.get('id', 'unknown')
|
| 25 |
+
runtime_stage = data.get('runtime', {}).get('stage', 'unknown')
|
| 26 |
+
hardware = data.get('hardware', {}).get('current', 'unknown')
|
| 27 |
+
|
| 28 |
+
print(f" Space ID: {space_id}")
|
| 29 |
+
print(f" Runtime Stage: {runtime_stage}")
|
| 30 |
+
print(f" Hardware: {hardware}")
|
| 31 |
+
|
| 32 |
+
if runtime_stage == "RUNNING":
|
| 33 |
+
print(" β
Space is RUNNING")
|
| 34 |
+
return True
|
| 35 |
+
else:
|
| 36 |
+
print(f" β οΈ Space is {runtime_stage} (may be building or sleeping)")
|
| 37 |
+
return False
|
| 38 |
+
except Exception as e:
|
| 39 |
+
print(f" β Failed to check status: {e}")
|
| 40 |
+
return False
|
| 41 |
+
|
| 42 |
+
def test_space_connectivity():
|
| 43 |
+
"""Test basic HTTP connectivity to Space"""
|
| 44 |
+
print("\n2. Testing Space HTTP connectivity...")
|
| 45 |
+
try:
|
| 46 |
+
req = urllib.request.Request(API_URL)
|
| 47 |
+
req.add_header('User-Agent', 'Mozilla/5.0')
|
| 48 |
+
with urllib.request.urlopen(req, timeout=10) as response:
|
| 49 |
+
code = response.getcode()
|
| 50 |
+
if code == 200:
|
| 51 |
+
print(f" β
Space is accessible (HTTP {code})")
|
| 52 |
+
return True
|
| 53 |
+
else:
|
| 54 |
+
print(f" β οΈ Space returned HTTP {code}")
|
| 55 |
+
return False
|
| 56 |
+
except urllib.error.HTTPError as e:
|
| 57 |
+
if e.code == 200:
|
| 58 |
+
print(f" β
Space is accessible (HTTP {e.code})")
|
| 59 |
+
return True
|
| 60 |
+
else:
|
| 61 |
+
print(f" β οΈ Space returned HTTP {e.code}")
|
| 62 |
+
return False
|
| 63 |
+
except Exception as e:
|
| 64 |
+
print(f" β Connection failed: {e}")
|
| 65 |
+
return False
|
| 66 |
+
|
| 67 |
+
def main():
|
| 68 |
+
"""Run all tests"""
|
| 69 |
+
status_ok = test_space_status()
|
| 70 |
+
connectivity_ok = test_space_connectivity()
|
| 71 |
+
|
| 72 |
+
print("\n" + "=" * 60)
|
| 73 |
+
print("Test Summary")
|
| 74 |
+
print("=" * 60)
|
| 75 |
+
print(f"Space Status: {'β
OK' if status_ok else 'β οΈ Not Running'}")
|
| 76 |
+
print(f"Connectivity: {'β
OK' if connectivity_ok else 'β Failed'}")
|
| 77 |
+
|
| 78 |
+
print("\nExpected AWQ Models:")
|
| 79 |
+
print(" - Router-Qwen3-32B-AWQ β Alovestocode/router-qwen3-32b-merged-awq")
|
| 80 |
+
print(" - Router-Gemma3-27B-AWQ β Alovestocode/router-gemma3-merged-awq")
|
| 81 |
+
|
| 82 |
+
print("\n" + "=" * 60)
|
| 83 |
+
if status_ok and connectivity_ok:
|
| 84 |
+
print("β
Space is ready for testing!")
|
| 85 |
+
print("\nTo test the API with full functionality:")
|
| 86 |
+
print(" pip install gradio_client")
|
| 87 |
+
print(" python test_api_gradio_client.py")
|
| 88 |
+
elif connectivity_ok:
|
| 89 |
+
print("β οΈ Space is accessible but may still be building")
|
| 90 |
+
print(" Wait a few minutes and check again")
|
| 91 |
+
else:
|
| 92 |
+
print("β Space connectivity issues detected")
|
| 93 |
+
print("=" * 60)
|
| 94 |
+
|
| 95 |
+
if __name__ == "__main__":
|
| 96 |
+
main()
|
| 97 |
+
|