Alikestocode commited on
Commit
f8c20fd
·
1 Parent(s): 98c0d4d

Lower Gemma AWQ group size to 16

Browse files
Files changed (1) hide show
  1. quantize_to_awq_colab.ipynb +1 -11
quantize_to_awq_colab.ipynb CHANGED
@@ -112,7 +112,7 @@
112
  "source": [
113
  "# Model-specific AWQ overrides. Keys match MODELS_TO_QUANTIZE entries.\n",
114
  "MODEL_AWQ_OVERRIDES = {\n",
115
- " \"router-gemma3-merged\": {\"group_size\": 64},\n",
116
  "}\n",
117
  "\n",
118
  "# Derived AWQ configs per model (defaults + overrides)\n",
@@ -129,16 +129,6 @@
129
  "metadata": {},
130
  "outputs": [],
131
  "source": [
132
- "# Model-specific overrides for AWQ quantization\n",
133
- "# These are merged into the base AWQ config inside quantize_model_to_awq\n",
134
- "MODEL_AWQ_OVERRIDES = {\n",
135
- " # Gemma 3 uses linear layers whose column widths are not divisible by 128.\n",
136
- " # Using group_size=64 avoids quantization failures while retaining accuracy.\n",
137
- " \"gemma\": {\n",
138
- " \"group_size\": 64,\n",
139
- " },\n",
140
- " # Add additional overrides keyed by model_type as needed\n",
141
- "}\n",
142
  "\n"
143
  ]
144
  },
 
112
  "source": [
113
  "# Model-specific AWQ overrides. Keys match MODELS_TO_QUANTIZE entries.\n",
114
  "MODEL_AWQ_OVERRIDES = {\n",
115
+ " \"router-gemma3-merged\": {\"group_size\": 16},\n",
116
  "}\n",
117
  "\n",
118
  "# Derived AWQ configs per model (defaults + overrides)\n",
 
129
  "metadata": {},
130
  "outputs": [],
131
  "source": [
 
 
 
 
 
 
 
 
 
 
132
  "\n"
133
  ]
134
  },