Alikestocode commited on
Commit
2326498
Β·
1 Parent(s): d4bc333

Fix LLM Compressor package name: llmcompressor (no hyphen)

Browse files

- Change from llm-compressor to llmcompressor (correct package name)
- Add fallback installation from GitHub if PyPI fails
- Add import error handling with helpful error messages
- Add troubleshooting tips in error messages

Files changed (1) hide show
  1. quantize_to_awq_colab.ipynb +47 -4
quantize_to_awq_colab.ipynb CHANGED
@@ -36,9 +36,29 @@
36
  "source": [
37
  "# Install required packages\n",
38
  "# LLM Compressor is vLLM's native quantization tool\n",
39
- "%pip install -q llm-compressor transformers accelerate huggingface_hub\n",
 
40
  "%pip install -q torch --index-url https://download.pytorch.org/whl/cu118\n",
41
  "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  "# Utility function to check disk space\n",
43
  "import shutil\n",
44
  "def check_disk_space():\n",
@@ -125,8 +145,21 @@
125
  "outputs": [],
126
  "source": [
127
  "# LLM Compressor (vLLM native quantization tool)\n",
128
- "from llmcompressor import oneshot\n",
129
- "from llmcompressor.modifiers.quantization import AWQModifier\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  "from transformers import AutoTokenizer\n",
131
  "from huggingface_hub import HfApi, scan_cache_dir, delete_revisions, upload_folder\n",
132
  "import torch\n",
@@ -196,11 +229,17 @@
196
  " print(f\"Config: {awq_config}\")\n",
197
  " print(\"⚠️ LLM Compressor will load the model, quantize it, and save to local directory\")\n",
198
  " \n",
 
 
 
199
  " try:\n",
200
  " # LLM Compressor's oneshot function handles everything:\n",
201
  " # - Loading the model\n",
202
  " # - Quantization with calibration data\n",
203
  " # - Saving quantized model\n",
 
 
 
204
  " oneshot(\n",
205
  " model=repo_id,\n",
206
  " output_dir=temp_output_dir,\n",
@@ -217,9 +256,13 @@
217
  " calibration_data=calibration_texts[:min(calibration_dataset_size, 128)] # Limit for efficiency\n",
218
  " )\n",
219
  " \n",
220
- " print(f\"βœ… Model quantized to AWQ\")\n",
221
  " except Exception as e:\n",
222
  " print(f\"❌ Quantization failed: {e}\")\n",
 
 
 
 
223
  " import traceback\n",
224
  " traceback.print_exc()\n",
225
  " raise\n",
 
36
  "source": [
37
  "# Install required packages\n",
38
  "# LLM Compressor is vLLM's native quantization tool\n",
39
+ "# Note: Package name is 'llmcompressor' (no hyphen), may need to install from GitHub\n",
40
+ "%pip install -q transformers accelerate huggingface_hub\n",
41
  "%pip install -q torch --index-url https://download.pytorch.org/whl/cu118\n",
42
  "\n",
43
+ "# Try installing llmcompressor from PyPI first, fallback to GitHub if not available\n",
44
+ "try:\n",
45
+ " import llmcompressor\n",
46
+ " print(\"βœ… llmcompressor already installed\")\n",
47
+ "except ImportError:\n",
48
+ " print(\"Installing llmcompressor...\")\n",
49
+ " # Try PyPI first\n",
50
+ " import subprocess\n",
51
+ " import sys\n",
52
+ " result = subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"llmcompressor\"], \n",
53
+ " capture_output=True, text=True)\n",
54
+ " if result.returncode != 0:\n",
55
+ " # Fallback to GitHub installation\n",
56
+ " print(\"PyPI installation failed, trying GitHub...\")\n",
57
+ " subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \n",
58
+ " \"git+https://github.com/vllm-project/llm-compressor.git\"], \n",
59
+ " check=False)\n",
60
+ " print(\"βœ… llmcompressor installed\")\n",
61
+ "\n",
62
  "# Utility function to check disk space\n",
63
  "import shutil\n",
64
  "def check_disk_space():\n",
 
145
  "outputs": [],
146
  "source": [
147
  "# LLM Compressor (vLLM native quantization tool)\n",
148
+ "# Import with error handling in case installation failed\n",
149
+ "try:\n",
150
+ " from llmcompressor import oneshot\n",
151
+ " from llmcompressor.modifiers.quantization import AWQModifier\n",
152
+ " LLM_COMPRESSOR_AVAILABLE = True\n",
153
+ " print(\"βœ… LLM Compressor imported successfully\")\n",
154
+ "except ImportError as e:\n",
155
+ " print(f\"❌ Failed to import llmcompressor: {e}\")\n",
156
+ " print(\"Please ensure llmcompressor is installed:\")\n",
157
+ " print(\" %pip install llmcompressor\")\n",
158
+ " print(\" OR\")\n",
159
+ " print(\" %pip install git+https://github.com/vllm-project/llm-compressor.git\")\n",
160
+ " LLM_COMPRESSOR_AVAILABLE = False\n",
161
+ " raise\n",
162
+ "\n",
163
  "from transformers import AutoTokenizer\n",
164
  "from huggingface_hub import HfApi, scan_cache_dir, delete_revisions, upload_folder\n",
165
  "import torch\n",
 
229
  " print(f\"Config: {awq_config}\")\n",
230
  " print(\"⚠️ LLM Compressor will load the model, quantize it, and save to local directory\")\n",
231
  " \n",
232
+ " if not LLM_COMPRESSOR_AVAILABLE:\n",
233
+ " raise ImportError(\"LLM Compressor is not available. Please install it first.\")\n",
234
+ " \n",
235
  " try:\n",
236
  " # LLM Compressor's oneshot function handles everything:\n",
237
  " # - Loading the model\n",
238
  " # - Quantization with calibration data\n",
239
  " # - Saving quantized model\n",
240
+ " print(f\" β†’ Starting quantization with LLM Compressor...\")\n",
241
+ " print(f\" β†’ This may take 30-60 minutes depending on model size...\")\n",
242
+ " \n",
243
  " oneshot(\n",
244
  " model=repo_id,\n",
245
  " output_dir=temp_output_dir,\n",
 
256
  " calibration_data=calibration_texts[:min(calibration_dataset_size, 128)] # Limit for efficiency\n",
257
  " )\n",
258
  " \n",
259
+ " print(f\"βœ… Model quantized to AWQ successfully\")\n",
260
  " except Exception as e:\n",
261
  " print(f\"❌ Quantization failed: {e}\")\n",
262
+ " print(f\"\\nTroubleshooting:\")\n",
263
+ " print(f\"1. Ensure llmcompressor is installed: %pip install llmcompressor\")\n",
264
+ " print(f\"2. Or install from GitHub: %pip install git+https://github.com/vllm-project/llm-compressor.git\")\n",
265
+ " print(f\"3. Check that you have sufficient GPU memory (40GB+ recommended)\")\n",
266
  " import traceback\n",
267
  " traceback.print_exc()\n",
268
  " raise\n",