|
|
|
|
|
import torch |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
from typing import Tuple, Optional, Union |
|
|
import torchvision.transforms as transforms |
|
|
|
|
|
class ImageProcessorManager: |
|
|
"""Image validation, preprocessing and format standardization""" |
|
|
|
|
|
def __init__(self): |
|
|
self.supported_formats = ['JPEG', 'PNG', 'WEBP', 'JPG'] |
|
|
self.min_resolution = (224, 224) |
|
|
|
|
|
|
|
|
self.clip_transform = transforms.Compose([ |
|
|
transforms.Resize((336, 336), interpolation=transforms.InterpolationMode.BICUBIC), |
|
|
transforms.ToTensor(), |
|
|
transforms.Normalize( |
|
|
mean=[0.48145466, 0.4578275, 0.40821073], |
|
|
std=[0.26862954, 0.26130258, 0.27577711] |
|
|
) |
|
|
]) |
|
|
|
|
|
def load_image(self, file_path: Union[str, Image.Image]) -> Image.Image: |
|
|
"""Load and validate image""" |
|
|
if isinstance(file_path, Image.Image): |
|
|
image = file_path |
|
|
else: |
|
|
try: |
|
|
image = Image.open(file_path) |
|
|
except Exception as e: |
|
|
raise ValueError(f"Failed to load image: {e}") |
|
|
|
|
|
|
|
|
if image.mode != 'RGB': |
|
|
image = image.convert('RGB') |
|
|
|
|
|
|
|
|
if image.size[0] < self.min_resolution[0] or image.size[1] < self.min_resolution[1]: |
|
|
raise ValueError(f"Image resolution too low, minimum required: {self.min_resolution}") |
|
|
|
|
|
return image |
|
|
|
|
|
def preprocess_for_yolo(self, image: Image.Image) -> np.ndarray: |
|
|
"""Preprocess image for YOLO (keep original format)""" |
|
|
return np.array(image) |
|
|
|
|
|
def preprocess_for_clip(self, image: Image.Image) -> torch.Tensor: |
|
|
"""Preprocess image for CLIP (336x336, ImageNet normalization)""" |
|
|
return self.clip_transform(image) |
|
|
|
|
|
def preprocess_for_qwen(self, image: Image.Image) -> Image.Image: |
|
|
"""Preprocess image for Qwen2.5-VL (dynamic resolution)""" |
|
|
return image |
|
|
|
|
|
def resize_with_aspect_ratio(self, image: Image.Image, max_size: int = 1024) -> Image.Image: |
|
|
"""Resize image while maintaining aspect ratio""" |
|
|
width, height = image.size |
|
|
if max(width, height) > max_size: |
|
|
if width > height: |
|
|
new_width = max_size |
|
|
new_height = int(height * (max_size / width)) |
|
|
else: |
|
|
new_height = max_size |
|
|
new_width = int(width * (max_size / height)) |
|
|
image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) |
|
|
return image |
|
|
|
|
|
print("✓ ImageProcessorManager defined") |
|
|
|