Spaces:
Sleeping
Sleeping
Commit
·
98c0d4d
1
Parent(s):
4b47dea
Use model-specific AWQ configs (Gemma group_size=64)
Browse files- quantize_to_awq_colab.ipynb +40 -2
quantize_to_awq_colab.ipynb
CHANGED
|
@@ -104,6 +104,44 @@
|
|
| 104 |
"## 3. Configuration\n"
|
| 105 |
]
|
| 106 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
{
|
| 108 |
"cell_type": "code",
|
| 109 |
"execution_count": null,
|
|
@@ -524,7 +562,7 @@
|
|
| 524 |
" repo_id=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"repo_id\"],\n",
|
| 525 |
" output_repo=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"output_repo\"],\n",
|
| 526 |
" model_type=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"model_type\"],\n",
|
| 527 |
-
" awq_config=
|
| 528 |
" calibration_dataset_size=128\n",
|
| 529 |
")\n"
|
| 530 |
]
|
|
@@ -547,7 +585,7 @@
|
|
| 547 |
" repo_id=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"repo_id\"],\n",
|
| 548 |
" output_repo=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"output_repo\"],\n",
|
| 549 |
" model_type=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"model_type\"],\n",
|
| 550 |
-
" awq_config=
|
| 551 |
" calibration_dataset_size=128\n",
|
| 552 |
")\n"
|
| 553 |
]
|
|
|
|
| 104 |
"## 3. Configuration\n"
|
| 105 |
]
|
| 106 |
},
|
| 107 |
+
{
|
| 108 |
+
"cell_type": "code",
|
| 109 |
+
"execution_count": null,
|
| 110 |
+
"metadata": {},
|
| 111 |
+
"outputs": [],
|
| 112 |
+
"source": [
|
| 113 |
+
"# Model-specific AWQ overrides. Keys match MODELS_TO_QUANTIZE entries.\n",
|
| 114 |
+
"MODEL_AWQ_OVERRIDES = {\n",
|
| 115 |
+
" \"router-gemma3-merged\": {\"group_size\": 64},\n",
|
| 116 |
+
"}\n",
|
| 117 |
+
"\n",
|
| 118 |
+
"# Derived AWQ configs per model (defaults + overrides)\n",
|
| 119 |
+
"MODEL_AWQ_CONFIGS = {\n",
|
| 120 |
+
" model_key: {**AWQ_CONFIG, **MODEL_AWQ_OVERRIDES.get(model_key, {})}\n",
|
| 121 |
+
" for model_key in MODELS_TO_QUANTIZE.keys()\n",
|
| 122 |
+
"}\n",
|
| 123 |
+
"\n"
|
| 124 |
+
]
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"cell_type": "code",
|
| 128 |
+
"execution_count": null,
|
| 129 |
+
"metadata": {},
|
| 130 |
+
"outputs": [],
|
| 131 |
+
"source": [
|
| 132 |
+
"# Model-specific overrides for AWQ quantization\n",
|
| 133 |
+
"# These are merged into the base AWQ config inside quantize_model_to_awq\n",
|
| 134 |
+
"MODEL_AWQ_OVERRIDES = {\n",
|
| 135 |
+
" # Gemma 3 uses linear layers whose column widths are not divisible by 128.\n",
|
| 136 |
+
" # Using group_size=64 avoids quantization failures while retaining accuracy.\n",
|
| 137 |
+
" \"gemma\": {\n",
|
| 138 |
+
" \"group_size\": 64,\n",
|
| 139 |
+
" },\n",
|
| 140 |
+
" # Add additional overrides keyed by model_type as needed\n",
|
| 141 |
+
"}\n",
|
| 142 |
+
"\n"
|
| 143 |
+
]
|
| 144 |
+
},
|
| 145 |
{
|
| 146 |
"cell_type": "code",
|
| 147 |
"execution_count": null,
|
|
|
|
| 562 |
" repo_id=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"repo_id\"],\n",
|
| 563 |
" output_repo=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"output_repo\"],\n",
|
| 564 |
" model_type=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"model_type\"],\n",
|
| 565 |
+
" awq_config=MODEL_AWQ_CONFIGS[\"router-gemma3-merged\"],\n",
|
| 566 |
" calibration_dataset_size=128\n",
|
| 567 |
")\n"
|
| 568 |
]
|
|
|
|
| 585 |
" repo_id=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"repo_id\"],\n",
|
| 586 |
" output_repo=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"output_repo\"],\n",
|
| 587 |
" model_type=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"model_type\"],\n",
|
| 588 |
+
" awq_config=MODEL_AWQ_CONFIGS[\"router-qwen3-32b-merged\"],\n",
|
| 589 |
" calibration_dataset_size=128\n",
|
| 590 |
")\n"
|
| 591 |
]
|