stablediff

Sleeping

App Files Files Community

DB2169 commited on Oct 15

Commit

1d5499e

verified ·

1 Parent(s): 5e98324

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -47

app.py CHANGED Viewed

@@ -3,7 +3,8 @@ from typing import List, Dict, Any, Optional
 from PIL import Image
 import torch
 import gradio as gr
-from huggingface_hub import snapshot_download  # pulls your repo at startup
 from diffusers import (
     StableDiffusionXLPipeline,
     StableDiffusionPipeline,
@@ -15,14 +16,15 @@ from diffusers import (
     PNDMScheduler,
 )
-# -------- Configuration (set these in Space Secrets for private repos) --------
-MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "DB2169/CyberPony_Lora")          # e.g., your repo id
-CHECKPOINT_FILENAME = os.getenv("CHECKPOINT_FILENAME", "SAFETENSORS_FILENAME.safetensors")  # exact base ckpt filename
-HF_TOKEN = os.getenv("HF_TOKEN", None)                                        # optional if repo is public
 # -------- Runtime defaults --------
-device = "cuda" if torch.cuda.is_available() else "cpu"
-dtype = torch.float16 if device == "cuda" else torch.float32
 SCHEDULERS = {
     "default": None,
@@ -34,48 +36,44 @@ SCHEDULERS = {
     "dpmpp_2m": DPMSolverMultistepScheduler,
 }
-# Globals filled on startup
 pipe = None
 IS_SDXL = True
 LORA_MANIFEST: Dict[str, Dict[str, str]] = {}
-REPO_DIR = "/home/user/model"  # cached snapshot location in Spaces
-# -------- Model bootstrap --------
 def bootstrap_model():
     global pipe, IS_SDXL, LORA_MANIFEST
-    # Download/copy all repo files locally (weights + manifest)
     local_dir = snapshot_download(
         repo_id=MODEL_REPO_ID,
         token=HF_TOKEN,
         local_dir=REPO_DIR,
         ignore_patterns=["*.md"],
-    )  # downloads your model repo into the container cache [web:362]
     ckpt_path = os.path.join(local_dir, CHECKPOINT_FILENAME)
     if not os.path.exists(ckpt_path):
         raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}")
-    # Try SDXL single-file, then SD 1.x/2.x single-file
     try:
         _pipe = StableDiffusionXLPipeline.from_single_file(
-            ckpt_path, torch_dtype=dtype, use_safetensors=True, add_watermarker=False
-        )  # SDXL loader [web:104]
         sdxl = True
     except Exception:
         _pipe = StableDiffusionPipeline.from_single_file(
-            ckpt_path, torch_dtype=dtype, use_safetensors=True
-        )  # SD 1.x/2.x fallback [web:104]
         sdxl = False
     if hasattr(_pipe, "enable_attention_slicing"):
         _pipe.enable_attention_slicing("max")
     if hasattr(_pipe, "enable_vae_slicing"):
         _pipe.enable_vae_slicing()
     if hasattr(_pipe, "set_progress_bar_config"):
         _pipe.set_progress_bar_config(disable=True)
-    _pipe.to(device)
-    # Load LoRA manifest if present
     man_path = os.path.join(local_dir, "loras.json")
     manifest = {}
     if os.path.exists(man_path):
@@ -85,20 +83,21 @@ def bootstrap_model():
         except Exception as e:
             print(f"[WARN] Failed to parse loras.json: {e}")
-    # Publish globals
-    return _pipe, sdxl, manifest
-def apply_loras(selected: List[str], scale: float):
     if not selected or scale <= 0:
         return
-    # Each selected LoRA should exist in manifest; supports repo/weight_name or local 'path'
     for name in selected:
         meta = LORA_MANIFEST.get(name)
         if not meta:
             continue
         try:
             if "path" in meta:
-                pipe.load_lora_weights(os.path.join(REPO_DIR, meta["path"]), adapter_name=name)
             else:
                 pipe.load_lora_weights(meta.get("repo", ""), weight_name=meta.get("weight_name"), adapter_name=name)
         except Exception as e:
@@ -108,6 +107,8 @@ def apply_loras(selected: List[str], scale: float):
     except Exception as e:
         print(f"[WARN] set_adapters failed: {e}")
 def txt2img(
     prompt: str,
     negative: str,
@@ -122,6 +123,11 @@ def txt2img(
     lora_scale: float,
     fuse_lora: bool,
 ):
     # Scheduler swap
     if scheduler in SCHEDULERS and SCHEDULERS[scheduler] is not None:
         try:
@@ -129,8 +135,8 @@ def txt2img(
         except Exception as e:
             print(f"[WARN] Scheduler switch failed: {e}")
-    # Apply LoRAs
-    apply_loras(loras, lora_scale)
     if fuse_lora and loras:
         try:
             pipe.fuse_lora(lora_scale=float(lora_scale))
@@ -138,7 +144,7 @@ def txt2img(
             print(f"[WARN] fuse_lora failed: {e}")
     # Determinism
-    generator = torch.Generator(device=device).manual_seed(int(seed)) if seed not in (None, "") else None
     kwargs: Dict[str, Any] = dict(
         prompt=prompt or "",
@@ -150,60 +156,66 @@ def txt2img(
         num_images_per_prompt=int(images),
         generator=generator,
     )
-    out = pipe(**kwargs)
     return out.images
 def warmup():
-    # Small, fast call to initialize kernels/graphs so first user is instant
     try:
         _ = txt2img("warmup", "", 512, 512, 4, 4.0, 1, 1234, "default", [], 0.0, False)
     except Exception as e:
         print(f"[WARN] Warmup failed: {e}")
-# --------------------------- Build the UI inside Blocks ---------------------------
-with gr.Blocks(title="SDXL Space (single-file, LoRA-ready)") as demo:  # Blocks context required for events [web:371]
-    gr.Markdown("### SDXL text‑to‑image (single‑file checkpoint) with optional LoRAs")  # UI heading [web:147]
     with gr.Row():
         prompt = gr.Textbox(label="Prompt", lines=3)
         negative = gr.Textbox(label="Negative Prompt", lines=3)
     with gr.Row():
         width = gr.Slider(256, 1536, 1024, step=64, label="Width")
         height = gr.Slider(256, 1536, 1024, step=64, label="Height")
     with gr.Row():
         steps = gr.Slider(5, 80, 30, step=1, label="Steps")
         guidance = gr.Slider(0.0, 20.0, 6.5, step=0.1, label="Guidance")
         images = gr.Slider(1, 4, 1, step=1, label="Images")
     with gr.Row():
         seed = gr.Number(value=None, precision=0, label="Seed (blank=random)")
         scheduler = gr.Dropdown(list(SCHEDULERS.keys()), value="dpmpp_2m", label="Scheduler")
-    # LoRA multi-select populated after manifest loads
-    lora_names = gr.CheckboxGroup(choices=[], label="LoRAs (from loras.json)")
     lora_scale = gr.Slider(0.0, 1.5, 0.7, step=0.05, label="LoRA scale")
     fuse = gr.Checkbox(label="Fuse LoRA (faster after load)")
     btn = gr.Button("Generate", variant="primary")
     gallery = gr.Gallery(columns=4, height=420)
-    # Startup loader (runs at app load)
     def _startup():
-        global pipe, IS_SDXL, LORA_MANIFEST
-        pipe, IS_SDXL, LORA_MANIFEST = bootstrap_model()
         return gr.CheckboxGroup.update(choices=list(LORA_MANIFEST.keys()))
-    demo.load(_startup, outputs=[lora_names])  # fill LoRA list once model is ready [web:147]
-    # Warm-up pass after model load for snappy first request
-    demo.load(lambda: warmup(), inputs=None, outputs=None)  # performance warmup [web:356]
-    # Wire the button click inside Blocks, with per-event concurrency control
     btn.click(
         txt2img,
         inputs=[prompt, negative, width, height, steps, guidance, images, seed, scheduler, lora_names, lora_scale, fuse],
         outputs=[gallery],
         api_name="txt2img",
-        concurrency_limit=1,          # one GPU job at a time for SDXL
-        concurrency_id="gpu_queue",   # shared queue id if you add more GPU events
-    )  # per-event queue parameters in Gradio 4.x [web:388][web:373]
-# Global queue config (no deprecated args)
-demo.queue(max_size=32, default_concurrency_limit=1).launch()  # supported queue pattern in Gradio 4.x [web:373][web:381]

 from PIL import Image
 import torch
 import gradio as gr
+import spaces  # ZeroGPU: decorate GPU-bound functions
+from huggingface_hub import snapshot_download
 from diffusers import (
     StableDiffusionXLPipeline,
     StableDiffusionPipeline,
     PNDMScheduler,
 )
+# -------- Configuration (set as Space Secrets if needed) --------
+MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "DB2169/CyberPony_Lora")  # your model repo id
+CHECKPOINT_FILENAME = os.getenv("CHECKPOINT_FILENAME", "SAFETENSORS_FILENAME.safetensors")  # exact .safetensors name
+HF_TOKEN = os.getenv("HF_TOKEN", None)  # only required for private repos
+DO_WARMUP = os.getenv("WARMUP", "1") == "1"  # set to "0" to disable warmup
 # -------- Runtime defaults --------
+REPO_DIR = "/home/user/model"  # local cache mount for snapshot_download
+# Defer CUDA detection to GPU-run function for ZeroGPU; do not move to CUDA at import time
 SCHEDULERS = {
     "default": None,
     "dpmpp_2m": DPMSolverMultistepScheduler,
 }
+# Globals populated on startup
 pipe = None
 IS_SDXL = True
 LORA_MANIFEST: Dict[str, Dict[str, str]] = {}
+# -------- Model bootstrap (CPU) --------
 def bootstrap_model():
     global pipe, IS_SDXL, LORA_MANIFEST
     local_dir = snapshot_download(
         repo_id=MODEL_REPO_ID,
         token=HF_TOKEN,
         local_dir=REPO_DIR,
         ignore_patterns=["*.md"],
+    )
     ckpt_path = os.path.join(local_dir, CHECKPOINT_FILENAME)
     if not os.path.exists(ckpt_path):
         raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}")
     try:
         _pipe = StableDiffusionXLPipeline.from_single_file(
+            ckpt_path, torch_dtype=torch.float16, use_safetensors=True, add_watermarker=False
+        )
         sdxl = True
     except Exception:
         _pipe = StableDiffusionPipeline.from_single_file(
+            ckpt_path, torch_dtype=torch.float16, use_safetensors=True
+        )
         sdxl = False
+    # Keep on CPU until GPU-decorated call (ZeroGPU attaches GPU on demand)
     if hasattr(_pipe, "enable_attention_slicing"):
         _pipe.enable_attention_slicing("max")
     if hasattr(_pipe, "enable_vae_slicing"):
         _pipe.enable_vae_slicing()
     if hasattr(_pipe, "set_progress_bar_config"):
         _pipe.set_progress_bar_config(disable=True)
     man_path = os.path.join(local_dir, "loras.json")
     manifest = {}
     if os.path.exists(man_path):
         except Exception as e:
             print(f"[WARN] Failed to parse loras.json: {e}")
+    pipe = _pipe
+    IS_SDXL = sdxl
+    LORA_MANIFEST = manifest
+def apply_loras(selected: List[str], scale: float, repo_dir: str):
     if not selected or scale <= 0:
         return
     for name in selected:
         meta = LORA_MANIFEST.get(name)
         if not meta:
             continue
         try:
             if "path" in meta:
+                pipe.load_lora_weights(os.path.join(repo_dir, meta["path"]), adapter_name=name)
             else:
                 pipe.load_lora_weights(meta.get("repo", ""), weight_name=meta.get("weight_name"), adapter_name=name)
         except Exception as e:
     except Exception as e:
         print(f"[WARN] set_adapters failed: {e}")
+@spaces.GPU  # ZeroGPU: allocate/attach GPU for this function call
 def txt2img(
     prompt: str,
     negative: str,
     lora_scale: float,
     fuse_lora: bool,
 ):
+    # Resolve device inside GPU context
+    local_device = "cuda" if torch.cuda.is_available() else "cpu"
+    local_dtype = torch.float16 if local_device == "cuda" else torch.float32
+    pipe.to(local_device)
     # Scheduler swap
     if scheduler in SCHEDULERS and SCHEDULERS[scheduler] is not None:
         try:
         except Exception as e:
             print(f"[WARN] Scheduler switch failed: {e}")
+    # LoRAs
+    apply_loras(loras, lora_scale, REPO_DIR)
     if fuse_lora and loras:
         try:
             pipe.fuse_lora(lora_scale=float(lora_scale))
             print(f"[WARN] fuse_lora failed: {e}")
     # Determinism
+    generator = torch.Generator(device=local_device).manual_seed(int(seed)) if seed not in (None, "") else None
     kwargs: Dict[str, Any] = dict(
         prompt=prompt or "",
         num_images_per_prompt=int(images),
         generator=generator,
     )
+    with torch.inference_mode():
+        out = pipe(**kwargs)
     return out.images
 def warmup():
     try:
         _ = txt2img("warmup", "", 512, 512, 4, 4.0, 1, 1234, "default", [], 0.0, False)
     except Exception as e:
         print(f"[WARN] Warmup failed: {e}")
+# --------------------------- Build UI ---------------------------
+with gr.Blocks(title="SDXL Space (ZeroGPU, single-file checkpoint, LoRA-ready)") as demo:
+    gr.Markdown("### SDXL text‑to‑image with single‑file checkpoint and optional LoRAs")
     with gr.Row():
         prompt = gr.Textbox(label="Prompt", lines=3)
         negative = gr.Textbox(label="Negative Prompt", lines=3)
     with gr.Row():
         width = gr.Slider(256, 1536, 1024, step=64, label="Width")
         height = gr.Slider(256, 1536, 1024, step=64, label="Height")
     with gr.Row():
         steps = gr.Slider(5, 80, 30, step=1, label="Steps")
         guidance = gr.Slider(0.0, 20.0, 6.5, step=0.1, label="Guidance")
         images = gr.Slider(1, 4, 1, step=1, label="Images")
     with gr.Row():
         seed = gr.Number(value=None, precision=0, label="Seed (blank=random)")
         scheduler = gr.Dropdown(list(SCHEDULERS.keys()), value="dpmpp_2m", label="Scheduler")
+    lora_names = gr.CheckboxGroup(choices=[], label="LoRAs (from loras.json; select any)")
     lora_scale = gr.Slider(0.0, 1.5, 0.7, step=0.05, label="LoRA scale")
     fuse = gr.Checkbox(label="Fuse LoRA (faster after load)")
     btn = gr.Button("Generate", variant="primary")
     gallery = gr.Gallery(columns=4, height=420)
+    # Load model + manifest, then populate LoRA choices
     def _startup():
+        bootstrap_model()
         return gr.CheckboxGroup.update(choices=list(LORA_MANIFEST.keys()))
+    demo.load(_startup, outputs=[lora_names])
+    # Optional warmup (costs a tiny GPU run on first boot); set WARMUP=0 to skip
+    if DO_WARMUP:
+        demo.load(lambda: warmup(), inputs=None, outputs=None)
+    # Event binding inside Blocks; one GPU job at a time for SDXL
     btn.click(
         txt2img,
         inputs=[prompt, negative, width, height, steps, guidance, images, seed, scheduler, lora_names, lora_scale, fuse],
         outputs=[gallery],
         api_name="txt2img",
+        concurrency_limit=1,
+        concurrency_id="gpu_queue",
+    )
+# Global queue limits for Gradio 4.x
+demo.queue(max_size=32, default_concurrency_limit=1).launch()