Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
e16e634
1
Parent(s):
ee03864
Fix: preload CLIP model during Docker build
Browse files- Dockerfile +3 -0
- vit_captioning/generate.py +2 -1
- vit_captioning/models/encoder.py +2 -1
Dockerfile
CHANGED
|
@@ -10,6 +10,9 @@ RUN apt-get update && apt-get install -y wget
|
|
| 10 |
WORKDIR /app
|
| 11 |
COPY . .
|
| 12 |
|
|
|
|
|
|
|
|
|
|
| 13 |
RUN mkdir -p vit_captioning/artifacts && \
|
| 14 |
wget https://huggingface.co/datasets/ClemSummer/clip-checkpoints/resolve/main/CLIPEncoder_40epochs_unfreeze12.pth \
|
| 15 |
-O vit_captioning/artifacts/CLIPEncoder_40epochs_unfreeze12.pth
|
|
|
|
| 10 |
WORKDIR /app
|
| 11 |
COPY . .
|
| 12 |
|
| 13 |
+
RUN mkdir -p /models/clip && \
|
| 14 |
+
python3 -c "from transformers import CLIPModel; CLIPModel.from_pretrained('openai/clip-vit-base-patch32').save_pretrained('/models/clip')"
|
| 15 |
+
|
| 16 |
RUN mkdir -p vit_captioning/artifacts && \
|
| 17 |
wget https://huggingface.co/datasets/ClemSummer/clip-checkpoints/resolve/main/CLIPEncoder_40epochs_unfreeze12.pth \
|
| 18 |
-O vit_captioning/artifacts/CLIPEncoder_40epochs_unfreeze12.pth
|
vit_captioning/generate.py
CHANGED
|
@@ -40,7 +40,8 @@ class CaptionGenerator:
|
|
| 40 |
self.encoder_dim = 512
|
| 41 |
#self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 42 |
#HF needs all model downloads to a special read-write cache dir
|
| 43 |
-
self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir="/tmp")
|
|
|
|
| 44 |
else:
|
| 45 |
raise ValueError("Unknown model type")
|
| 46 |
|
|
|
|
| 40 |
self.encoder_dim = 512
|
| 41 |
#self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 42 |
#HF needs all model downloads to a special read-write cache dir
|
| 43 |
+
#self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir="/tmp")
|
| 44 |
+
self.processor = CLIPProcessor.from_pretrained("/models/clip")
|
| 45 |
else:
|
| 46 |
raise ValueError("Unknown model type")
|
| 47 |
|
vit_captioning/models/encoder.py
CHANGED
|
@@ -35,7 +35,8 @@ class CLIPEncoder(nn.Module):
|
|
| 35 |
super(CLIPEncoder, self).__init__()
|
| 36 |
#self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
| 37 |
#HF needs all model downloads to a special read-write cache dir
|
| 38 |
-
self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir="/tmp")
|
|
|
|
| 39 |
|
| 40 |
def forward(self, pixel_values):
|
| 41 |
# ✅ Directly get the pooled image features (already the final representation)
|
|
|
|
| 35 |
super(CLIPEncoder, self).__init__()
|
| 36 |
#self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
| 37 |
#HF needs all model downloads to a special read-write cache dir
|
| 38 |
+
#self.clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir="/tmp")
|
| 39 |
+
self.clip = CLIPModel.from_pretrained("/models/clip")
|
| 40 |
|
| 41 |
def forward(self, pixel_values):
|
| 42 |
# ✅ Directly get the pooled image features (already the final representation)
|