ibrahimdaud/raw-food-recognition
Viewer • Updated • 12.5k • 55
This repository contains both single-class and multi-label classification models trained for raw food ingredient recognition.
| Model | Parameters | Validation Accuracy | Architecture |
|---|---|---|---|
| ResNet-50 | ~25.6M | 97.84% | Standard residual network |
| SE-ResNet-50 | ~26.0M | 95.72% | ResNet-50 with SE attention |
Both models were trained on the ibrahimdaud/raw-food-recognition dataset, which contains 90+ raw food categories.
| Model | Training Mode | Parameters | Best mAP | Architecture |
|---|---|---|---|---|
| Multi-Label ResNet-50 | Freeze Encoder | ~24,656,463 | 0.3747 | ResNet-50 encoder (frozen) + classifier |
Multi-label models were trained on the ibrahimdaud/multi-label-food-recognition dataset for recognizing multiple ingredients in a single image.
from huggingface_hub import hf_hub_download
import torch
from models.resnet50 import create_resnet50
from models.se_resnet50 import create_se_resnet50
# Download ResNet-50 checkpoint
resnet_path = hf_hub_download(
repo_id="ibrahimdaud/raw-food-recognition-models",
filename="resnet50_pytorch_model.bin"
)
resnet_checkpoint = torch.load(resnet_path, map_location='cpu')
# Download SE-ResNet-50 checkpoint
se_resnet_path = hf_hub_download(
repo_id="ibrahimdaud/raw-food-recognition-models",
filename="se_resnet50_pytorch_model.bin"
)
se_resnet_checkpoint = torch.load(se_resnet_path, map_location='cpu')
# Create ResNet-50 model
resnet_model = create_resnet50(
num_classes=90,
pretrained=False
)
resnet_model.load_state_dict(resnet_checkpoint['model_state_dict'])
resnet_model.eval()
# Create SE-ResNet-50 model
se_resnet_model = create_se_resnet50(
num_classes=90,
pretrained=False,
reduction=16
)
se_resnet_model.load_state_dict(se_resnet_checkpoint['model_state_dict'])
se_resnet_model.eval()
from huggingface_hub import hf_hub_download
import torch
from models.multilabel_resnet50 import create_multilabel_resnet50
# Download Freeze Encoder model
freeze_path = hf_hub_download(
repo_id="ibrahimdaud/raw-food-recognition-models",
filename="multilabel_freeze_pytorch_model.bin"
)
freeze_checkpoint = torch.load(freeze_path, map_location='cpu')
# Download Full Training model
full_path = hf_hub_download(
repo_id="ibrahimdaud/raw-food-recognition-models",
filename="multilabel_full_pytorch_model.bin"
)
full_checkpoint = torch.load(full_path, map_location='cpu')
# Download Fine-Tuning model
finetune_path = hf_hub_download(
repo_id="ibrahimdaud/raw-food-recognition-models",
filename="multilabel_finetune_pytorch_model.bin"
)
finetune_checkpoint = torch.load(finetune_path, map_location='cpu')
# Load Freeze Encoder model
freeze_model = create_multilabel_resnet50(
num_classes=freeze_checkpoint['num_classes'],
pretrained=False
)
freeze_model.load_state_dict(freeze_checkpoint['model_state_dict'])
freeze_model.eval()
# Load Full Training model
full_model = create_multilabel_resnet50(
num_classes=full_checkpoint['num_classes'],
pretrained=False
)
full_model.load_state_dict(full_checkpoint['model_state_dict'])
full_model.eval()
# Load Fine-Tuning model
finetune_model = create_multilabel_resnet50(
num_classes=finetune_checkpoint['num_classes'],
pretrained=False
)
finetune_model.load_state_dict(finetune_checkpoint['model_state_dict'])
finetune_model.eval()
import torch
from PIL import Image
import torchvision.transforms as transforms
# Preprocess image
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
image = Image.open('path/to/image.jpg').convert('RGB')
image_tensor = transform(image).unsqueeze(0)
# Get multi-label predictions
with torch.no_grad():
logits = freeze_model(image_tensor) # or full_model, finetune_model
probs = torch.sigmoid(logits) # Multi-label probabilities
# Get top-k predictions
top_k = 5
top_probs, top_indices = torch.topk(probs[0], top_k)
# Assuming you have class names
for prob, idx in zip(top_probs, top_indices):
print(f"Class {{idx.item()}}: {{prob.item():.4f}}")
import torch
from PIL import Image
import torchvision.transforms as transforms
# Preprocess image
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
image = Image.open('path/to/image.jpg').convert('RGB')
image_tensor = transform(image).unsqueeze(0)
# ResNet-50 prediction
with torch.no_grad():
resnet_outputs = resnet_model(image_tensor)
resnet_probs = torch.nn.functional.softmax(resnet_outputs[0], dim=0)
resnet_pred_id = torch.argmax(resnet_probs).item()
resnet_pred_class = resnet_checkpoint['class_names'][resnet_pred_id]
resnet_confidence = resnet_probs[resnet_pred_id].item()
# SE-ResNet-50 prediction
with torch.no_grad():
se_resnet_outputs = se_resnet_model(image_tensor)
se_resnet_probs = torch.nn.functional.softmax(se_resnet_outputs[0], dim=0)
se_resnet_pred_id = torch.argmax(se_resnet_probs).item()
se_resnet_pred_class = se_resnet_checkpoint['class_names'][se_resnet_pred_id]
se_resnet_confidence = se_resnet_probs[se_resnet_pred_id].item()
# Compare results
print("ResNet-50 Prediction:")
print(f" Class: {resnet_pred_class}")
print(f" Confidence: {resnet_confidence*100:.2f}%")
print("\nSE-ResNet-50 Prediction:")
print(f" Class: {se_resnet_pred_class}")
print(f" Confidence: {se_resnet_confidence*100:.2f}%")
resnet50_pytorch_model.bin - ResNet-50 model weightsse_resnet50_pytorch_model.bin - SE-ResNet-50 model weightsresnet50_metadata.json - ResNet-50 metadatase_resnet50_metadata.json - SE-ResNet-50 metadatamultilabel_freeze_pytorch_model.bin - Multi-label ResNet-50 (Freeze Encoder)
multilabel_freeze_metadata.json - Freeze Encoder metadata
README.md - This file
If you use these models, please cite:
@model{raw_food_recognition_models_2024,
title={Raw Food Recognition Models: Single-Class and Multi-Label Classification},
author={Ibrahim Daud},
year={2024},
publisher={HuggingFace},
url={https://huggingface.co/ibrahimdaud/raw-food-recognition-models}
}
MIT License