Spaces:

Alovestocode
/

ZeroGPU-LLM-Inference

Sleeping

Alikestocode commited on Nov 10

Commit

e08f8c4

1 Parent(s): cef8ecd

Simplify AWQModifier usage - remove try/except wrapper

- AWQModifier() works without parameters
- Remove unnecessary try/except that might cause confusion
- Use default 4-bit AWQ settings

Files changed (1) hide show

quantize_to_awq_colab.ipynb +9 -12

quantize_to_awq_colab.ipynb CHANGED Viewed

@@ -252,24 +252,21 @@
         "        print(f\"  → Starting quantization with LLM Compressor...\")\n",
         "        print(f\"  → This may take 30-60 minutes depending on model size...\")\n",
         "        \n",
-        "        # AWQModifier may not accept parameters directly - try with defaults first\n",
-        "        # If that fails, we'll try passing config via oneshot parameters\n",
-        "        try:\n",
-        "            # Try with AWQModifier() - no parameters (uses defaults)\n",
-        "            modifiers = [AWQModifier()]\n",
-        "            print(f\"  → Using AWQModifier with default settings (4-bit AWQ)\")\n",
-        "        except Exception as e:\n",
-        "            print(f\"  → AWQModifier() failed: {e}, trying alternative...\")\n",
-        "            # Alternative: Try creating modifier differently or pass config to oneshot\n",
-        "            modifiers = [AWQModifier()]\n",
         "        \n",
         "        oneshot(\n",
         "            model=repo_id,\n",
         "            output_dir=temp_output_dir,\n",
         "            modifiers=modifiers,\n",
         "            token=os.environ.get(\"HF_TOKEN\"),\n",
-        "            # Calibration data can be passed as a list of strings\n",
-        "            calibration_data=calibration_texts[:min(calibration_dataset_size, 128)]  # Limit for efficiency\n",
         "        )\n",
         "        \n",
         "        print(f\"✅ Model quantized to AWQ successfully\")\n",

         "        print(f\"  → Starting quantization with LLM Compressor...\")\n",
         "        print(f\"  → This may take 30-60 minutes depending on model size...\")\n",
         "        \n",
+        "        # AWQModifier API: Use AWQModifier() without parameters\n",
+        "        # The modifier uses default 4-bit AWQ settings\n",
+        "        print(f\"  → Creating AWQModifier with default settings...\")\n",
+        "        modifiers = [AWQModifier()]\n",
+        "        print(f\"  → AWQModifier created successfully\")\n",
         "        \n",
+        "        # Call oneshot with the modifier\n",
+        "        print(f\"  → Starting quantization process...\")\n",
         "        oneshot(\n",
         "            model=repo_id,\n",
         "            output_dir=temp_output_dir,\n",
         "            modifiers=modifiers,\n",
         "            token=os.environ.get(\"HF_TOKEN\"),\n",
+        "            # Calibration data: list of strings\n",
+        "            calibration_data=calibration_texts[:min(calibration_dataset_size, 128)]\n",
         "        )\n",
         "        \n",
         "        print(f\"✅ Model quantized to AWQ successfully\")\n",