Josh Brown Kramer commited on
Commit
d16a1d1
·
1 Parent(s): 1decf0a

Removed faceparsing reference

Browse files
Files changed (3) hide show
  1. app.py +2 -3
  2. faceparsing.py +57 -33
  3. requirements.txt +10 -7
app.py CHANGED
@@ -4,7 +4,7 @@ from huggingface_hub import hf_hub_download
4
  import onnxruntime as ort
5
  import numpy as np
6
  from PIL import Image
7
- from faceparsing import get_face_mask
8
 
9
  # import torch
10
  # from your_pix2pixhd_code import YourPix2PixHDModel, load_image, tensor2im # Adapt these imports
@@ -71,5 +71,4 @@ demo = gr.Interface(
71
  article=article,
72
  )
73
 
74
- demo.launch()
75
- #demo.launch(debug=True)
 
4
  import onnxruntime as ort
5
  import numpy as np
6
  from PIL import Image
7
+ #from faceparsing import get_face_mask
8
 
9
  # import torch
10
  # from your_pix2pixhd_code import YourPix2PixHDModel, load_image, tensor2im # Adapt these imports
 
71
  article=article,
72
  )
73
 
74
+ demo.launch()#demo.launch(debug=True)
 
faceparsing.py CHANGED
@@ -6,6 +6,10 @@ import numpy as np
6
  from PIL import Image
7
  import matplotlib.pyplot as plt
8
 
 
 
 
 
9
  # convenience expression for automatically determining device
10
  device = (
11
  "cuda"
@@ -17,44 +21,64 @@ device = (
17
  else "cpu"
18
  )
19
 
20
- # load models
21
- image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing")
22
- model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing")
23
- model.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def get_face_mask(image):
26
- # run inference on image
27
- inputs = image_processor(images=image, return_tensors="pt").to(device)
28
- outputs = model(**inputs)
29
- logits = outputs.logits # shape (batch_size, num_labels, ~height/4, ~width/4)
30
-
31
- # resize output to match input image dimensions
32
- upsampled_logits = nn.functional.interpolate(logits,
33
- size=image.size[::-1], # H x W
34
- mode='bilinear',
35
- align_corners=False)
36
-
37
- # get label masks
38
- labels = upsampled_logits.argmax(dim=1)[0]
39
-
40
- # move to CPU to visualize in matplotlib
41
- labels_viz = labels.cpu().numpy()
42
 
43
- # Debug: print label statistics
44
- print(f"Labels min: {labels_viz.min()}, max: {labels_viz.max()}, unique: {np.unique(labels_viz)}")
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- #Map to something more colorful. Use a color map to map the labels to a color.
47
- #Create a color map for colors 0 through 18
48
- color_map = plt.get_cmap('tab20')
49
- #Map the labels to a color - normalize labels to 0-1 range for the colormap
50
- # Face parsing models typically have 19 classes (0-18), so normalize by 18
51
- normalized_labels = labels_viz.astype(np.float32) / 18.0
52
- colors = color_map(normalized_labels)
53
 
 
 
 
 
 
 
 
54
 
55
- #Convert to PIL Image - take only RGB channels (drop alpha)
56
- colors_rgb = colors[:, :, :3] # Remove alpha channel
57
- colors_pil = Image.fromarray((colors_rgb * 255).astype(np.uint8))
58
 
59
- return colors_pil
 
 
 
 
60
 
 
6
  from PIL import Image
7
  import matplotlib.pyplot as plt
8
 
9
+ # Global variables for lazy loading
10
+ image_processor = None
11
+ model = None
12
+
13
  # convenience expression for automatically determining device
14
  device = (
15
  "cuda"
 
21
  else "cpu"
22
  )
23
 
24
+ def load_face_parsing_models():
25
+ """Load face parsing models lazily"""
26
+ global image_processor, model
27
+
28
+ if image_processor is None or model is None:
29
+ try:
30
+ print(f"Loading face parsing models on device: {device}")
31
+ image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing")
32
+ model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing")
33
+ model.to(device)
34
+ print("Face parsing models loaded successfully")
35
+ except Exception as e:
36
+ print(f"Error loading face parsing models: {e}")
37
+ return False
38
+
39
+ return True
40
 
41
  def get_face_mask(image):
42
+ # Load models if not already loaded
43
+ if not load_face_parsing_models():
44
+ return "Error: Failed to load face parsing models"
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ try:
47
+ # run inference on image
48
+ inputs = image_processor(images=image, return_tensors="pt").to(device)
49
+ outputs = model(**inputs)
50
+ logits = outputs.logits # shape (batch_size, num_labels, ~height/4, ~width/4)
51
+
52
+ # resize output to match input image dimensions
53
+ upsampled_logits = nn.functional.interpolate(logits,
54
+ size=image.size[::-1], # H x W
55
+ mode='bilinear',
56
+ align_corners=False)
57
+
58
+ # get label masks
59
+ labels = upsampled_logits.argmax(dim=1)[0]
60
 
61
+ # move to CPU to visualize in matplotlib
62
+ labels_viz = labels.cpu().numpy()
63
+
64
+ # Debug: print label statistics
65
+ print(f"Labels min: {labels_viz.min()}, max: {labels_viz.max()}, unique: {np.unique(labels_viz)}")
 
 
66
 
67
+ #Map to something more colorful. Use a color map to map the labels to a color.
68
+ #Create a color map for colors 0 through 18
69
+ color_map = plt.get_cmap('tab20')
70
+ #Map the labels to a color - normalize labels to 0-1 range for the colormap
71
+ # Face parsing models typically have 19 classes (0-18), so normalize by 18
72
+ normalized_labels = labels_viz.astype(np.float32) / 18.0
73
+ colors = color_map(normalized_labels)
74
 
75
+ #Convert to PIL Image - take only RGB channels (drop alpha)
76
+ colors_rgb = colors[:, :, :3] # Remove alpha channel
77
+ colors_pil = Image.fromarray((colors_rgb * 255).astype(np.uint8))
78
 
79
+ return colors_pil
80
+
81
+ except Exception as e:
82
+ print(f"Error in face parsing: {e}")
83
+ return f"Error: {str(e)}"
84
 
requirements.txt CHANGED
@@ -1,7 +1,10 @@
1
- gradio
2
- onnxruntime-gpu
3
- opencv-python
4
- numpy
5
- mediapipe
6
- torch
7
- transformers
 
 
 
 
1
+ gradio>=4.0.0
2
+ onnxruntime>=1.15.0
3
+ opencv-python>=4.8.0
4
+ numpy>=1.24.0
5
+ mediapipe>=0.10.0
6
+ torch>=2.0.0
7
+ transformers>=4.30.0
8
+ Pillow>=9.5.0
9
+ matplotlib>=3.7.0
10
+ huggingface-hub>=0.16.0