Alikestocode commited on
Commit
98c0d4d
·
1 Parent(s): 4b47dea

Use model-specific AWQ configs (Gemma group_size=64)

Browse files
Files changed (1) hide show
  1. quantize_to_awq_colab.ipynb +40 -2
quantize_to_awq_colab.ipynb CHANGED
@@ -104,6 +104,44 @@
104
  "## 3. Configuration\n"
105
  ]
106
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  {
108
  "cell_type": "code",
109
  "execution_count": null,
@@ -524,7 +562,7 @@
524
  " repo_id=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"repo_id\"],\n",
525
  " output_repo=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"output_repo\"],\n",
526
  " model_type=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"model_type\"],\n",
527
- " awq_config=AWQ_CONFIG,\n",
528
  " calibration_dataset_size=128\n",
529
  ")\n"
530
  ]
@@ -547,7 +585,7 @@
547
  " repo_id=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"repo_id\"],\n",
548
  " output_repo=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"output_repo\"],\n",
549
  " model_type=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"model_type\"],\n",
550
- " awq_config=AWQ_CONFIG,\n",
551
  " calibration_dataset_size=128\n",
552
  ")\n"
553
  ]
 
104
  "## 3. Configuration\n"
105
  ]
106
  },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": null,
110
+ "metadata": {},
111
+ "outputs": [],
112
+ "source": [
113
+ "# Model-specific AWQ overrides. Keys match MODELS_TO_QUANTIZE entries.\n",
114
+ "MODEL_AWQ_OVERRIDES = {\n",
115
+ " \"router-gemma3-merged\": {\"group_size\": 64},\n",
116
+ "}\n",
117
+ "\n",
118
+ "# Derived AWQ configs per model (defaults + overrides)\n",
119
+ "MODEL_AWQ_CONFIGS = {\n",
120
+ " model_key: {**AWQ_CONFIG, **MODEL_AWQ_OVERRIDES.get(model_key, {})}\n",
121
+ " for model_key in MODELS_TO_QUANTIZE.keys()\n",
122
+ "}\n",
123
+ "\n"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": null,
129
+ "metadata": {},
130
+ "outputs": [],
131
+ "source": [
132
+ "# Model-specific overrides for AWQ quantization\n",
133
+ "# These are merged into the base AWQ config inside quantize_model_to_awq\n",
134
+ "MODEL_AWQ_OVERRIDES = {\n",
135
+ " # Gemma 3 uses linear layers whose column widths are not divisible by 128.\n",
136
+ " # Using group_size=64 avoids quantization failures while retaining accuracy.\n",
137
+ " \"gemma\": {\n",
138
+ " \"group_size\": 64,\n",
139
+ " },\n",
140
+ " # Add additional overrides keyed by model_type as needed\n",
141
+ "}\n",
142
+ "\n"
143
+ ]
144
+ },
145
  {
146
  "cell_type": "code",
147
  "execution_count": null,
 
562
  " repo_id=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"repo_id\"],\n",
563
  " output_repo=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"output_repo\"],\n",
564
  " model_type=MODELS_TO_QUANTIZE[\"router-gemma3-merged\"][\"model_type\"],\n",
565
+ " awq_config=MODEL_AWQ_CONFIGS[\"router-gemma3-merged\"],\n",
566
  " calibration_dataset_size=128\n",
567
  ")\n"
568
  ]
 
585
  " repo_id=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"repo_id\"],\n",
586
  " output_repo=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"output_repo\"],\n",
587
  " model_type=MODELS_TO_QUANTIZE[\"router-qwen3-32b-merged\"][\"model_type\"],\n",
588
+ " awq_config=MODEL_AWQ_CONFIGS[\"router-qwen3-32b-merged\"],\n",
589
  " calibration_dataset_size=128\n",
590
  ")\n"
591
  ]