Spaces:

Alovestocode
/

ZeroGPU-LLM-Inference

Sleeping

Alikestocode commited on about 1 month ago

Commit

27234fe

1 Parent(s): d02a9d8

Update Qwen model repo to AWQ quantized version

Files changed (1) hide show

app.py CHANGED Viewed

@@ -105,7 +105,7 @@ ROUTER_SYSTEM_PROMPT = """You are the Router Agent coordinating Math, Code, and
 MODELS = {
     "Router-Qwen3-32B-AWQ": {
-        "repo_id": "Alovestocode/router-qwen3-32b-merged",
         "description": "Router checkpoint on Qwen3 32B merged, optimized with AWQ quantization via vLLM.",
         "params_b": 32.0,
         "quantization": "awq",  # vLLM will auto-detect AWQ

 MODELS = {
     "Router-Qwen3-32B-AWQ": {
+        "repo_id": "Alovestocode/router-qwen3-32b-merged-awq",  # AWQ quantized model
         "description": "Router checkpoint on Qwen3 32B merged, optimized with AWQ quantization via vLLM.",
         "params_b": 32.0,
         "quantization": "awq",  # vLLM will auto-detect AWQ