Spaces:

Alovestocode
/

ZeroGPU-LLM-Inference

Sleeping

Alikestocode commited on Nov 10

Commit

2326498

1 Parent(s): d4bc333

Fix LLM Compressor package name: llmcompressor (no hyphen)

- Change from llm-compressor to llmcompressor (correct package name)
- Add fallback installation from GitHub if PyPI fails
- Add import error handling with helpful error messages
- Add troubleshooting tips in error messages

Files changed (1) hide show

quantize_to_awq_colab.ipynb +47 -4

quantize_to_awq_colab.ipynb CHANGED Viewed

@@ -36,9 +36,29 @@
       "source": [
         "# Install required packages\n",
         "# LLM Compressor is vLLM's native quantization tool\n",
-        "%pip install -q llm-compressor transformers accelerate huggingface_hub\n",
         "%pip install -q torch --index-url https://download.pytorch.org/whl/cu118\n",
         "\n",
         "# Utility function to check disk space\n",
         "import shutil\n",
         "def check_disk_space():\n",
@@ -125,8 +145,21 @@
       "outputs": [],
       "source": [
         "# LLM Compressor (vLLM native quantization tool)\n",
-        "from llmcompressor import oneshot\n",
-        "from llmcompressor.modifiers.quantization import AWQModifier\n",
         "from transformers import AutoTokenizer\n",
         "from huggingface_hub import HfApi, scan_cache_dir, delete_revisions, upload_folder\n",
         "import torch\n",
@@ -196,11 +229,17 @@
         "    print(f\"Config: {awq_config}\")\n",
         "    print(\"⚠️ LLM Compressor will load the model, quantize it, and save to local directory\")\n",
         "    \n",
         "    try:\n",
         "        # LLM Compressor's oneshot function handles everything:\n",
         "        # - Loading the model\n",
         "        # - Quantization with calibration data\n",
         "        # - Saving quantized model\n",
         "        oneshot(\n",
         "            model=repo_id,\n",
         "            output_dir=temp_output_dir,\n",
@@ -217,9 +256,13 @@
         "            calibration_data=calibration_texts[:min(calibration_dataset_size, 128)]  # Limit for efficiency\n",
         "        )\n",
         "        \n",
-        "        print(f\"✅ Model quantized to AWQ\")\n",
         "    except Exception as e:\n",
         "        print(f\"❌ Quantization failed: {e}\")\n",
         "        import traceback\n",
         "        traceback.print_exc()\n",
         "        raise\n",

       "source": [
         "# Install required packages\n",
         "# LLM Compressor is vLLM's native quantization tool\n",
+        "# Note: Package name is 'llmcompressor' (no hyphen), may need to install from GitHub\n",
+        "%pip install -q transformers accelerate huggingface_hub\n",
         "%pip install -q torch --index-url https://download.pytorch.org/whl/cu118\n",
         "\n",
+        "# Try installing llmcompressor from PyPI first, fallback to GitHub if not available\n",
+        "try:\n",
+        "    import llmcompressor\n",
+        "    print(\"✅ llmcompressor already installed\")\n",
+        "except ImportError:\n",
+        "    print(\"Installing llmcompressor...\")\n",
+        "    # Try PyPI first\n",
+        "    import subprocess\n",
+        "    import sys\n",
+        "    result = subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"llmcompressor\"], \n",
+        "                          capture_output=True, text=True)\n",
+        "    if result.returncode != 0:\n",
+        "        # Fallback to GitHub installation\n",
+        "        print(\"PyPI installation failed, trying GitHub...\")\n",
+        "        subprocess.run([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \n",
+        "                       \"git+https://github.com/vllm-project/llm-compressor.git\"], \n",
+        "                      check=False)\n",
+        "    print(\"✅ llmcompressor installed\")\n",
+        "\n",
         "# Utility function to check disk space\n",
         "import shutil\n",
         "def check_disk_space():\n",
       "outputs": [],
       "source": [
         "# LLM Compressor (vLLM native quantization tool)\n",
+        "# Import with error handling in case installation failed\n",
+        "try:\n",
+        "    from llmcompressor import oneshot\n",
+        "    from llmcompressor.modifiers.quantization import AWQModifier\n",
+        "    LLM_COMPRESSOR_AVAILABLE = True\n",
+        "    print(\"✅ LLM Compressor imported successfully\")\n",
+        "except ImportError as e:\n",
+        "    print(f\"❌ Failed to import llmcompressor: {e}\")\n",
+        "    print(\"Please ensure llmcompressor is installed:\")\n",
+        "    print(\"  %pip install llmcompressor\")\n",
+        "    print(\"  OR\")\n",
+        "    print(\"  %pip install git+https://github.com/vllm-project/llm-compressor.git\")\n",
+        "    LLM_COMPRESSOR_AVAILABLE = False\n",
+        "    raise\n",
+        "\n",
         "from transformers import AutoTokenizer\n",
         "from huggingface_hub import HfApi, scan_cache_dir, delete_revisions, upload_folder\n",
         "import torch\n",
         "    print(f\"Config: {awq_config}\")\n",
         "    print(\"⚠️ LLM Compressor will load the model, quantize it, and save to local directory\")\n",
         "    \n",
+        "    if not LLM_COMPRESSOR_AVAILABLE:\n",
+        "        raise ImportError(\"LLM Compressor is not available. Please install it first.\")\n",
+        "    \n",
         "    try:\n",
         "        # LLM Compressor's oneshot function handles everything:\n",
         "        # - Loading the model\n",
         "        # - Quantization with calibration data\n",
         "        # - Saving quantized model\n",
+        "        print(f\"  → Starting quantization with LLM Compressor...\")\n",
+        "        print(f\"  → This may take 30-60 minutes depending on model size...\")\n",
+        "        \n",
         "        oneshot(\n",
         "            model=repo_id,\n",
         "            output_dir=temp_output_dir,\n",
         "            calibration_data=calibration_texts[:min(calibration_dataset_size, 128)]  # Limit for efficiency\n",
         "        )\n",
         "        \n",
+        "        print(f\"✅ Model quantized to AWQ successfully\")\n",
         "    except Exception as e:\n",
         "        print(f\"❌ Quantization failed: {e}\")\n",
+        "        print(f\"\\nTroubleshooting:\")\n",
+        "        print(f\"1. Ensure llmcompressor is installed: %pip install llmcompressor\")\n",
+        "        print(f\"2. Or install from GitHub: %pip install git+https://github.com/vllm-project/llm-compressor.git\")\n",
+        "        print(f\"3. Check that you have sufficient GPU memory (40GB+ recommended)\")\n",
         "        import traceback\n",
         "        traceback.print_exc()\n",
         "        raise\n",