import gradio as gr import torch from torch import nn from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation, DepthProImageProcessorFast, DepthProForDepthEstimation import numpy as np from PIL import Image, ImageFilter import os, hashlib from huggingface_hub import snapshot_download '''for Lens Blur''' # Global: load model & processor once MODEL_REPO = "apple/DepthPro-hf" CACHE_DIR = "./cache" # cache folder for model files EXPECTED_SHA256 = "9c6811e3165485b9a94a204329860cb333a79877e757eb795a179a4ea34bbcf7" # expected hash​:contentReference[oaicite:7]{index=7} # Download model repository (if not cached) and verify SHA-256 snapshot_path = snapshot_download(repo_id=MODEL_REPO, cache_dir=CACHE_DIR) model_file = os.path.join(snapshot_path, "model.safetensors") # Compute SHA-256 of the model file with open(model_file, "rb") as f: file_hash = hashlib.sha256(f.read()).hexdigest() if file_hash != EXPECTED_SHA256: raise RuntimeError("Model file hash mismatch! Download may be corrupted.") # Load model and processor (from local files, avoiding re-download) model = DepthProForDepthEstimation.from_pretrained(snapshot_path) processor = DepthProImageProcessorFast.from_pretrained(snapshot_path) # Use GPU if available for speed device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device).eval() # Define the simple greeting function def greet_test(name): return "Hello " + name + "!!" # Define the Gaussian blur + segmentation function def gauss_blur(image, sigma): device = ( "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" ) # Ensure image is a PIL Image if not isinstance(image, Image.Image): image = Image.fromarray(image) # Load models image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing") model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing") model.to(device) # Run inference on image inputs = image_processor(images=image, return_tensors="pt").to(device) outputs = model(**inputs) logits = outputs.logits # Resize output to match input image dimensions upsampled_logits = nn.functional.interpolate( logits, size=image.size[::-1], # H x W mode='bilinear', align_corners=False ) # Get label masks labels = upsampled_logits.argmax(dim=1)[0] labels_viz = labels.cpu().numpy() # Create foreground mask foreground_mask = (labels_viz != 0).astype(np.uint8) # Apply Gaussian blur image_np = np.array(image) blurred_image = Image.fromarray(image_np).filter(ImageFilter.GaussianBlur(radius=sigma)) blurred_image_np = np.array(blurred_image) # Combine blurred background with original foreground result_image_np = ( image_np * foreground_mask[:, :, None] + blurred_image_np * (1 - foreground_mask[:, :, None]) ) return Image.fromarray(result_image_np.astype(np.uint8)) def lens_blur(image: Image.Image) -> Image.Image: """Apply depth-dependent lens blur to the input PIL image using DepthPro model.""" # 1. Preprocess input: resize (preserve aspect ratio up to 1536px) and prepare tensor orig_w, orig_h = image.size max_dim = max(orig_w, orig_h) if max_dim > 1536: # limit size for model ratio = 1536.0 / max_dim new_size = (int(orig_w * ratio), int(orig_h * ratio)) image_resized = image.resize(new_size, Image.LANCZOS) else: image_resized = image # Prepare model input inputs = processor(images=image_resized, return_tensors="pt") # includes resizing to 1536x1536 internally inputs = {k: v.to(device) for k, v in inputs.items()} # 2. Inference: predict depth map with torch.no_grad(): outputs = model(**inputs) # Post-process to get depth map at original image resolution depth_map = processor.post_process_depth_estimation( outputs, target_sizes=[(orig_h, orig_w)] )[0]["predicted_depth"] depth_map = depth_map.squeeze().cpu().float().numpy() # H x W depth values # Normalize depth to [0,1] depth_min, depth_max = depth_map.min(), depth_map.max() if depth_max > depth_min: depth_norm = (depth_map - depth_min) / (depth_max - depth_min) else: depth_norm = depth_map * 0.0 # all pixels same depth # 3. Create blurred version of the original image blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15)) blurred_np = np.array(blurred_image, dtype=np.float32) original_np = np.array(image, dtype=np.float32) # Ensure depth mask has shape (H, W, 1) for broadcasting across color channels depth_mask = depth_norm.astype(np.float32)[..., None] # 4. Blend images: near (mask~0) -> original, far (mask~1) -> blurred blended_np = original_np * (1 - depth_mask) + blurred_np * depth_mask blended_np = blended_np.clip(0, 255).astype(np.uint8) result_image = Image.fromarray(blended_np) return result_image # Build the Gradio app with Tabs with gr.Blocks() as demo: gr.Markdown("# Gaussian Blur and Lens Blur Demo") with gr.Tab("Greeting (Basic Test)"): gr.Interface(fn=greet_test, inputs="text", outputs="text") with gr.Tab("Gaussian Blur on Foreground"): gr.Interface(fn=gauss_blur, inputs=["image", "number"], outputs="image", title="Gaussian Blur", description="Apply Gaussian blur to the background of the image while keeping the foreground sharp. Adjust the sigma value to control the blur intensity.", ) with gr.Tab("Lens Blur"): gr.Interface(fn=lens_blur, inputs=gr.Image(type="pil"), outputs="image", title="Lens Blur", description="Apply depth-dependent lens blur to the image using the Apple DepthPro model. The blur intensity varies based on the depth of each pixel.", ) demo.launch(share=True) # Uncomment to enable sharing