Spaces:
Sleeping
Sleeping
Commit
·
2dff966
1
Parent(s):
a79bc8f
Fix linter error: use %pip instead of !pip in Colab notebook
Browse files- quantize_to_awq_colab.ipynb +27 -2
quantize_to_awq_colab.ipynb
CHANGED
|
@@ -29,8 +29,8 @@
|
|
| 29 |
"outputs": [],
|
| 30 |
"source": [
|
| 31 |
"# Install required packages\n",
|
| 32 |
-
"
|
| 33 |
-
"
|
| 34 |
]
|
| 35 |
},
|
| 36 |
{
|
|
@@ -354,6 +354,31 @@
|
|
| 354 |
"for model_key, model_info in MODELS_TO_QUANTIZE.items():\n",
|
| 355 |
" verify_awq_model(model_info[\"output_repo\"])\n"
|
| 356 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
}
|
| 358 |
],
|
| 359 |
"metadata": {
|
|
|
|
| 29 |
"outputs": [],
|
| 30 |
"source": [
|
| 31 |
"# Install required packages\n",
|
| 32 |
+
"%pip install -q autoawq transformers accelerate huggingface_hub\n",
|
| 33 |
+
"%pip install -q torch --index-url https://download.pytorch.org/whl/cu118\n"
|
| 34 |
]
|
| 35 |
},
|
| 36 |
{
|
|
|
|
| 354 |
"for model_key, model_info in MODELS_TO_QUANTIZE.items():\n",
|
| 355 |
" verify_awq_model(model_info[\"output_repo\"])\n"
|
| 356 |
]
|
| 357 |
+
},
|
| 358 |
+
{
|
| 359 |
+
"cell_type": "markdown",
|
| 360 |
+
"metadata": {},
|
| 361 |
+
"source": [
|
| 362 |
+
"## Notes\n",
|
| 363 |
+
"\n",
|
| 364 |
+
"- **GPU Required**: This quantization requires a GPU with at least 40GB VRAM (A100/H100 recommended)\n",
|
| 365 |
+
"- **Time**: Each model takes approximately 30-60 minutes to quantize\n",
|
| 366 |
+
"- **Memory**: Ensure you have enough disk space (models are ~20-30GB each)\n",
|
| 367 |
+
"- **Output Repos**: You can either create new repos (with `-awq` suffix) or upload to existing repos\n",
|
| 368 |
+
"- **Usage**: After quantization, update your `app.py` to use the AWQ repos:\n",
|
| 369 |
+
" ```python\n",
|
| 370 |
+
" MODELS = {\n",
|
| 371 |
+
" \"Router-Gemma3-27B-AWQ\": {\n",
|
| 372 |
+
" \"repo_id\": \"Alovestocode/router-gemma3-merged-awq\",\n",
|
| 373 |
+
" \"quantization\": \"awq\"\n",
|
| 374 |
+
" },\n",
|
| 375 |
+
" \"Router-Qwen3-32B-AWQ\": {\n",
|
| 376 |
+
" \"repo_id\": \"Alovestocode/router-qwen3-32b-merged-awq\",\n",
|
| 377 |
+
" \"quantization\": \"awq\"\n",
|
| 378 |
+
" }\n",
|
| 379 |
+
" }\n",
|
| 380 |
+
" ```\n"
|
| 381 |
+
]
|
| 382 |
}
|
| 383 |
],
|
| 384 |
"metadata": {
|