Spaces:

jbrownkramer
/

makemeazombie

Running

App Files Files Community

Josh Brown Kramer commited on Jul 12

Commit

d16a1d1

1 Parent(s): 1decf0a

Removed faceparsing reference

Browse files

Files changed (3) hide show

app.py +2 -3
faceparsing.py +57 -33
requirements.txt +10 -7

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from huggingface_hub import hf_hub_download
 import onnxruntime as ort
 import numpy as np
 from PIL import Image
-from faceparsing import get_face_mask
 # import torch
 # from your_pix2pixhd_code import YourPix2PixHDModel, load_image, tensor2im # Adapt these imports
@@ -71,5 +71,4 @@ demo = gr.Interface(
     article=article,
 )
-demo.launch()
-#demo.launch(debug=True)

 import onnxruntime as ort
 import numpy as np
 from PIL import Image
+#from faceparsing import get_face_mask
 # import torch
 # from your_pix2pixhd_code import YourPix2PixHDModel, load_image, tensor2im # Adapt these imports
     article=article,
 )
+demo.launch()#demo.launch(debug=True)

faceparsing.py CHANGED Viewed

@@ -6,6 +6,10 @@ import numpy as np
 from PIL import Image
 import matplotlib.pyplot as plt
 # convenience expression for automatically determining device
 device = (
     "cuda"
@@ -17,44 +21,64 @@ device = (
     else "cpu"
 )
-# load models
-image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing")
-model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing")
-model.to(device)
 def get_face_mask(image):
-    # run inference on image
-    inputs = image_processor(images=image, return_tensors="pt").to(device)
-    outputs = model(**inputs)
-    logits = outputs.logits  # shape (batch_size, num_labels, ~height/4, ~width/4)
-    # resize output to match input image dimensions
-    upsampled_logits = nn.functional.interpolate(logits,
-                    size=image.size[::-1], # H x W
-                    mode='bilinear',
-                    align_corners=False)
-    # get label masks
-    labels = upsampled_logits.argmax(dim=1)[0]
-    # move to CPU to visualize in matplotlib
-    labels_viz = labels.cpu().numpy()
-    # Debug: print label statistics
-    print(f"Labels min: {labels_viz.min()}, max: {labels_viz.max()}, unique: {np.unique(labels_viz)}")
-    #Map to something more colorful.  Use a color map to map the labels to a color.
-    #Create a color map for colors 0 through 18
-    color_map = plt.get_cmap('tab20')
-    #Map the labels to a color - normalize labels to 0-1 range for the colormap
-    # Face parsing models typically have 19 classes (0-18), so normalize by 18
-    normalized_labels = labels_viz.astype(np.float32) / 18.0
-    colors = color_map(normalized_labels)
-    #Convert to PIL Image - take only RGB channels (drop alpha)
-    colors_rgb = colors[:, :, :3]  # Remove alpha channel
-    colors_pil = Image.fromarray((colors_rgb * 255).astype(np.uint8))
-    return colors_pil

 from PIL import Image
 import matplotlib.pyplot as plt
+# Global variables for lazy loading
+image_processor = None
+model = None
 # convenience expression for automatically determining device
 device = (
     "cuda"
     else "cpu"
 )
+def load_face_parsing_models():
+    """Load face parsing models lazily"""
+    global image_processor, model
+    if image_processor is None or model is None:
+        try:
+            print(f"Loading face parsing models on device: {device}")
+            image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing")
+            model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing")
+            model.to(device)
+            print("Face parsing models loaded successfully")
+        except Exception as e:
+            print(f"Error loading face parsing models: {e}")
+            return False
+    return True
 def get_face_mask(image):
+    # Load models if not already loaded
+    if not load_face_parsing_models():
+        return "Error: Failed to load face parsing models"
+    try:
+        # run inference on image
+        inputs = image_processor(images=image, return_tensors="pt").to(device)
+        outputs = model(**inputs)
+        logits = outputs.logits  # shape (batch_size, num_labels, ~height/4, ~width/4)
+        # resize output to match input image dimensions
+        upsampled_logits = nn.functional.interpolate(logits,
+                        size=image.size[::-1], # H x W
+                        mode='bilinear',
+                        align_corners=False)
+        # get label masks
+        labels = upsampled_logits.argmax(dim=1)[0]
+        # move to CPU to visualize in matplotlib
+        labels_viz = labels.cpu().numpy()
+        # Debug: print label statistics
+        print(f"Labels min: {labels_viz.min()}, max: {labels_viz.max()}, unique: {np.unique(labels_viz)}")
+        #Map to something more colorful.  Use a color map to map the labels to a color.
+        #Create a color map for colors 0 through 18
+        color_map = plt.get_cmap('tab20')
+        #Map the labels to a color - normalize labels to 0-1 range for the colormap
+        # Face parsing models typically have 19 classes (0-18), so normalize by 18
+        normalized_labels = labels_viz.astype(np.float32) / 18.0
+        colors = color_map(normalized_labels)
+        #Convert to PIL Image - take only RGB channels (drop alpha)
+        colors_rgb = colors[:, :, :3]  # Remove alpha channel
+        colors_pil = Image.fromarray((colors_rgb * 255).astype(np.uint8))
+        return colors_pil
+    except Exception as e:
+        print(f"Error in face parsing: {e}")
+        return f"Error: {str(e)}"

requirements.txt CHANGED Viewed

@@ -1,7 +1,10 @@
-gradio
-onnxruntime-gpu
-opencv-python
-numpy
-mediapipe
-torch
-transformers

+gradio>=4.0.0
+onnxruntime>=1.15.0
+opencv-python>=4.8.0
+numpy>=1.24.0
+mediapipe>=0.10.0
+torch>=2.0.0
+transformers>=4.30.0
+Pillow>=9.5.0
+matplotlib>=3.7.0
+huggingface-hub>=0.16.0