Spaces:
Sleeping
Sleeping
Commit
Β·
2326498
1
Parent(s):
d4bc333
Fix LLM Compressor package name: llmcompressor (no hyphen)
Browse files- Change from llm-compressor to llmcompressor (correct package name)
- Add fallback installation from GitHub if PyPI fails
- Add import error handling with helpful error messages
- Add troubleshooting tips in error messages
- quantize_to_awq_colab.ipynb +47 -4
quantize_to_awq_colab.ipynb
CHANGED
|
@@ -36,9 +36,29 @@
|
|
| 36 |
"source": [
|
| 37 |
"# Install required packages\n",
|
| 38 |
"# LLM Compressor is vLLM's native quantization tool\n",
|
| 39 |
-
"
|
|
|
|
| 40 |
"%pip install -q torch --index-url https://download.pytorch.org/whl/cu118\n",
|
| 41 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
"# Utility function to check disk space\n",
|
| 43 |
"import shutil\n",
|
| 44 |
"def check_disk_space():\n",
|
|
@@ -125,8 +145,21 @@
|
|
| 125 |
"outputs": [],
|
| 126 |
"source": [
|
| 127 |
"# LLM Compressor (vLLM native quantization tool)\n",
|
| 128 |
-
"
|
| 129 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
"from transformers import AutoTokenizer\n",
|
| 131 |
"from huggingface_hub import HfApi, scan_cache_dir, delete_revisions, upload_folder\n",
|
| 132 |
"import torch\n",
|
|
@@ -196,11 +229,17 @@
|
|
| 196 |
" print(f\"Config: {awq_config}\")\n",
|
| 197 |
" print(\"β οΈ LLM Compressor will load the model, quantize it, and save to local directory\")\n",
|
| 198 |
" \n",
|
|
|
|
|
|
|
|
|
|
| 199 |
" try:\n",
|
| 200 |
" # LLM Compressor's oneshot function handles everything:\n",
|
| 201 |
" # - Loading the model\n",
|
| 202 |
" # - Quantization with calibration data\n",
|
| 203 |
" # - Saving quantized model\n",
|
|
|
|
|
|
|
|
|
|
| 204 |
" oneshot(\n",
|
| 205 |
" model=repo_id,\n",
|
| 206 |
" output_dir=temp_output_dir,\n",
|
|
@@ -217,9 +256,13 @@
|
|
| 217 |
" calibration_data=calibration_texts[:min(calibration_dataset_size, 128)] # Limit for efficiency\n",
|
| 218 |
" )\n",
|
| 219 |
" \n",
|
| 220 |
-
" print(f\"β
Model quantized to AWQ\")\n",
|
| 221 |
" except Exception as e:\n",
|
| 222 |
" print(f\"β Quantization failed: {e}\")\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
" import traceback\n",
|
| 224 |
" traceback.print_exc()\n",
|
| 225 |
" raise\n",
|
|
|
|
| 36 |
"source": [
|
| 37 |
"# Install required packages\n",
|
| 38 |
"# LLM Compressor is vLLM's native quantization tool\n",
|
| 39 |
+
"# Note: Package name is 'llmcompressor' (no hyphen), may need to install from GitHub\n",
|
| 40 |
+
"%pip install -q transformers accelerate huggingface_hub\n",
|
| 41 |
"%pip install -q torch --index-url https://download.pytorch.org/whl/cu118\n",
|
| 42 |
"\n",
|
| 43 |
+
"# Try installing llmcompressor from PyPI first, fallback to GitHub if not available\n",
|
| 44 |
+
"try:\n",
|
| 45 |
+
" import llmcompressor\n",
|
| 46 |
+
" print(\"β
llmcompressor already installed\")\n",
|
| 47 |
+
"except ImportError:\n",
|
| 48 |
+
" print(\"Installing llmcompressor...\")\n",
|
| 49 |
+
" # Try PyPI first\n",
|
| 50 |
+
" import subprocess\n",
|
| 51 |
+
" import sys\n",
|
| 52 |
+
" result = subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"llmcompressor\"], \n",
|
| 53 |
+
" capture_output=True, text=True)\n",
|
| 54 |
+
" if result.returncode != 0:\n",
|
| 55 |
+
" # Fallback to GitHub installation\n",
|
| 56 |
+
" print(\"PyPI installation failed, trying GitHub...\")\n",
|
| 57 |
+
" subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \n",
|
| 58 |
+
" \"git+https://github.com/vllm-project/llm-compressor.git\"], \n",
|
| 59 |
+
" check=False)\n",
|
| 60 |
+
" print(\"β
llmcompressor installed\")\n",
|
| 61 |
+
"\n",
|
| 62 |
"# Utility function to check disk space\n",
|
| 63 |
"import shutil\n",
|
| 64 |
"def check_disk_space():\n",
|
|
|
|
| 145 |
"outputs": [],
|
| 146 |
"source": [
|
| 147 |
"# LLM Compressor (vLLM native quantization tool)\n",
|
| 148 |
+
"# Import with error handling in case installation failed\n",
|
| 149 |
+
"try:\n",
|
| 150 |
+
" from llmcompressor import oneshot\n",
|
| 151 |
+
" from llmcompressor.modifiers.quantization import AWQModifier\n",
|
| 152 |
+
" LLM_COMPRESSOR_AVAILABLE = True\n",
|
| 153 |
+
" print(\"β
LLM Compressor imported successfully\")\n",
|
| 154 |
+
"except ImportError as e:\n",
|
| 155 |
+
" print(f\"β Failed to import llmcompressor: {e}\")\n",
|
| 156 |
+
" print(\"Please ensure llmcompressor is installed:\")\n",
|
| 157 |
+
" print(\" %pip install llmcompressor\")\n",
|
| 158 |
+
" print(\" OR\")\n",
|
| 159 |
+
" print(\" %pip install git+https://github.com/vllm-project/llm-compressor.git\")\n",
|
| 160 |
+
" LLM_COMPRESSOR_AVAILABLE = False\n",
|
| 161 |
+
" raise\n",
|
| 162 |
+
"\n",
|
| 163 |
"from transformers import AutoTokenizer\n",
|
| 164 |
"from huggingface_hub import HfApi, scan_cache_dir, delete_revisions, upload_folder\n",
|
| 165 |
"import torch\n",
|
|
|
|
| 229 |
" print(f\"Config: {awq_config}\")\n",
|
| 230 |
" print(\"β οΈ LLM Compressor will load the model, quantize it, and save to local directory\")\n",
|
| 231 |
" \n",
|
| 232 |
+
" if not LLM_COMPRESSOR_AVAILABLE:\n",
|
| 233 |
+
" raise ImportError(\"LLM Compressor is not available. Please install it first.\")\n",
|
| 234 |
+
" \n",
|
| 235 |
" try:\n",
|
| 236 |
" # LLM Compressor's oneshot function handles everything:\n",
|
| 237 |
" # - Loading the model\n",
|
| 238 |
" # - Quantization with calibration data\n",
|
| 239 |
" # - Saving quantized model\n",
|
| 240 |
+
" print(f\" β Starting quantization with LLM Compressor...\")\n",
|
| 241 |
+
" print(f\" β This may take 30-60 minutes depending on model size...\")\n",
|
| 242 |
+
" \n",
|
| 243 |
" oneshot(\n",
|
| 244 |
" model=repo_id,\n",
|
| 245 |
" output_dir=temp_output_dir,\n",
|
|
|
|
| 256 |
" calibration_data=calibration_texts[:min(calibration_dataset_size, 128)] # Limit for efficiency\n",
|
| 257 |
" )\n",
|
| 258 |
" \n",
|
| 259 |
+
" print(f\"β
Model quantized to AWQ successfully\")\n",
|
| 260 |
" except Exception as e:\n",
|
| 261 |
" print(f\"β Quantization failed: {e}\")\n",
|
| 262 |
+
" print(f\"\\nTroubleshooting:\")\n",
|
| 263 |
+
" print(f\"1. Ensure llmcompressor is installed: %pip install llmcompressor\")\n",
|
| 264 |
+
" print(f\"2. Or install from GitHub: %pip install git+https://github.com/vllm-project/llm-compressor.git\")\n",
|
| 265 |
+
" print(f\"3. Check that you have sufficient GPU memory (40GB+ recommended)\")\n",
|
| 266 |
" import traceback\n",
|
| 267 |
" traceback.print_exc()\n",
|
| 268 |
" raise\n",
|