Spaces:
Sleeping
Sleeping
Commit
Β·
e08f8c4
1
Parent(s):
cef8ecd
Simplify AWQModifier usage - remove try/except wrapper
Browse files- AWQModifier() works without parameters
- Remove unnecessary try/except that might cause confusion
- Use default 4-bit AWQ settings
- quantize_to_awq_colab.ipynb +9 -12
quantize_to_awq_colab.ipynb
CHANGED
|
@@ -252,24 +252,21 @@
|
|
| 252 |
" print(f\" β Starting quantization with LLM Compressor...\")\n",
|
| 253 |
" print(f\" β This may take 30-60 minutes depending on model size...\")\n",
|
| 254 |
" \n",
|
| 255 |
-
" # AWQModifier
|
| 256 |
-
" #
|
| 257 |
-
"
|
| 258 |
-
"
|
| 259 |
-
"
|
| 260 |
-
" print(f\" β Using AWQModifier with default settings (4-bit AWQ)\")\n",
|
| 261 |
-
" except Exception as e:\n",
|
| 262 |
-
" print(f\" β AWQModifier() failed: {e}, trying alternative...\")\n",
|
| 263 |
-
" # Alternative: Try creating modifier differently or pass config to oneshot\n",
|
| 264 |
-
" modifiers = [AWQModifier()]\n",
|
| 265 |
" \n",
|
|
|
|
|
|
|
| 266 |
" oneshot(\n",
|
| 267 |
" model=repo_id,\n",
|
| 268 |
" output_dir=temp_output_dir,\n",
|
| 269 |
" modifiers=modifiers,\n",
|
| 270 |
" token=os.environ.get(\"HF_TOKEN\"),\n",
|
| 271 |
-
" # Calibration data
|
| 272 |
-
" calibration_data=calibration_texts[:min(calibration_dataset_size, 128)]
|
| 273 |
" )\n",
|
| 274 |
" \n",
|
| 275 |
" print(f\"β
Model quantized to AWQ successfully\")\n",
|
|
|
|
| 252 |
" print(f\" β Starting quantization with LLM Compressor...\")\n",
|
| 253 |
" print(f\" β This may take 30-60 minutes depending on model size...\")\n",
|
| 254 |
" \n",
|
| 255 |
+
" # AWQModifier API: Use AWQModifier() without parameters\n",
|
| 256 |
+
" # The modifier uses default 4-bit AWQ settings\n",
|
| 257 |
+
" print(f\" β Creating AWQModifier with default settings...\")\n",
|
| 258 |
+
" modifiers = [AWQModifier()]\n",
|
| 259 |
+
" print(f\" β AWQModifier created successfully\")\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
" \n",
|
| 261 |
+
" # Call oneshot with the modifier\n",
|
| 262 |
+
" print(f\" β Starting quantization process...\")\n",
|
| 263 |
" oneshot(\n",
|
| 264 |
" model=repo_id,\n",
|
| 265 |
" output_dir=temp_output_dir,\n",
|
| 266 |
" modifiers=modifiers,\n",
|
| 267 |
" token=os.environ.get(\"HF_TOKEN\"),\n",
|
| 268 |
+
" # Calibration data: list of strings\n",
|
| 269 |
+
" calibration_data=calibration_texts[:min(calibration_dataset_size, 128)]\n",
|
| 270 |
" )\n",
|
| 271 |
" \n",
|
| 272 |
" print(f\"β
Model quantized to AWQ successfully\")\n",
|