Spaces:
Sleeping
Sleeping
Commit
·
f8c20fd
1
Parent(s):
98c0d4d
Lower Gemma AWQ group size to 16
Browse files- quantize_to_awq_colab.ipynb +1 -11
quantize_to_awq_colab.ipynb
CHANGED
|
@@ -112,7 +112,7 @@
|
|
| 112 |
"source": [
|
| 113 |
"# Model-specific AWQ overrides. Keys match MODELS_TO_QUANTIZE entries.\n",
|
| 114 |
"MODEL_AWQ_OVERRIDES = {\n",
|
| 115 |
-
" \"router-gemma3-merged\": {\"group_size\":
|
| 116 |
"}\n",
|
| 117 |
"\n",
|
| 118 |
"# Derived AWQ configs per model (defaults + overrides)\n",
|
|
@@ -129,16 +129,6 @@
|
|
| 129 |
"metadata": {},
|
| 130 |
"outputs": [],
|
| 131 |
"source": [
|
| 132 |
-
"# Model-specific overrides for AWQ quantization\n",
|
| 133 |
-
"# These are merged into the base AWQ config inside quantize_model_to_awq\n",
|
| 134 |
-
"MODEL_AWQ_OVERRIDES = {\n",
|
| 135 |
-
" # Gemma 3 uses linear layers whose column widths are not divisible by 128.\n",
|
| 136 |
-
" # Using group_size=64 avoids quantization failures while retaining accuracy.\n",
|
| 137 |
-
" \"gemma\": {\n",
|
| 138 |
-
" \"group_size\": 64,\n",
|
| 139 |
-
" },\n",
|
| 140 |
-
" # Add additional overrides keyed by model_type as needed\n",
|
| 141 |
-
"}\n",
|
| 142 |
"\n"
|
| 143 |
]
|
| 144 |
},
|
|
|
|
| 112 |
"source": [
|
| 113 |
"# Model-specific AWQ overrides. Keys match MODELS_TO_QUANTIZE entries.\n",
|
| 114 |
"MODEL_AWQ_OVERRIDES = {\n",
|
| 115 |
+
" \"router-gemma3-merged\": {\"group_size\": 16},\n",
|
| 116 |
"}\n",
|
| 117 |
"\n",
|
| 118 |
"# Derived AWQ configs per model (defaults + overrides)\n",
|
|
|
|
| 129 |
"metadata": {},
|
| 130 |
"outputs": [],
|
| 131 |
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
"\n"
|
| 133 |
]
|
| 134 |
},
|