Spaces:

iBrokeTheCode
/

Multimodal_Product_Classification

Sleeping

iBrokeTheCode commited on Aug 27

Commit

238f86d

1 Parent(s): 9470ff7

chore: Upload missing project files

Files changed (6) hide show

Dockerfile.train ADDED Viewed

+# Use the official Python 3.9.6 image from DockerHub
+FROM python:3.9.6-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install necessary system packages for h5py and TensorFlow
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    pkg-config \
+    libhdf5-dev \
+    zlib1g-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libblas-dev \
+    gfortran
+# Install pip 21.2.3
+RUN pip install --upgrade pip==21.2.3
+RUN pip install -r requirements.txt
+# Install Jupyter Notebook
+RUN pip install jupyter
+# Copy the entire project into the container
+COPY . .
+# Expose port 8888 for Jupyter Notebook
+EXPOSE 8888
+# Set environment variable to prevent Python from buffering output
+ENV PYTHONUNBUFFERED=1
+# Set the default command to start Jupyter Notebook
+CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root"]

data/images/.gitkeep ADDED Viewed

File without changes

embeddings/.gitkeep ADDED Viewed

File without changes

pytest.ini ADDED Viewed

+[pytest]
+filterwarnings =
+    ignore::DeprecationWarning
+    ignore::UserWarning
+    ignore::FutureWarning

src/classifiers_mlp.py CHANGED Viewed

@@ -459,6 +459,7 @@ def train_mlp(
     # Train the model using the training data and validation data
     history = None
     if train_model:
         history = model.fit(
             train_loader,
             validation_data=test_loader,
@@ -469,7 +470,7 @@ def train_mlp(
         )
     if test_mlp_model:
-        # Test the model on the test set
         y_true, y_pred, y_prob = [], [], []
         for batch in test_loader:
             features, labels = batch
@@ -501,7 +502,7 @@ def train_mlp(
         if report:
             test_model(y_true, y_pred, y_prob, encoder=train_loader.encoder)
-        # Store results in a dataframe and save in the results folder
         if text_input_size is not None and image_input_size is not None:
             model_type = "multimodal"
         elif text_input_size is not None:
@@ -516,6 +517,14 @@ def train_mlp(
             # create results folder if it does not exist
             os.makedirs("results", exist_ok=True)
             results.to_csv(f"results/{model_type}_results.csv", index=False)
     else:
         test_accuracy, f1, macro_auc = None, None, None

     # Train the model using the training data and validation data
     history = None
     if train_model:
+        # 📌  Train the model
         history = model.fit(
             train_loader,
             validation_data=test_loader,
         )
     if test_mlp_model:
+        # 📌 Test the model on the test set
         y_true, y_pred, y_prob = [], [], []
         for batch in test_loader:
             features, labels = batch
         if report:
             test_model(y_true, y_pred, y_prob, encoder=train_loader.encoder)
+        # 📌 Store results in a dataframe and save in the results folder
         if text_input_size is not None and image_input_size is not None:
             model_type = "multimodal"
         elif text_input_size is not None:
             # create results folder if it does not exist
             os.makedirs("results", exist_ok=True)
             results.to_csv(f"results/{model_type}_results.csv", index=False)
+        # 📌 Save the model
+        models_dir = "trained_models"
+        os.makedirs(models_dir, exist_ok=True)
+        model_filename = os.path.join(models_dir, f"{model_type}_model")
+        model.save(model_filename)
+        print(f"✅ {model_type} model saved successfully")
     else:
         test_accuracy, f1, macro_auc = None, None, None

src/vision_embeddings_tf.py CHANGED Viewed

@@ -372,7 +372,7 @@ def get_embeddings_df(
     path="data/images",
     dataset_name="",
     backbone="resnet50",
-    directory="Embeddings",
     image_files=None,
 ):
     """
@@ -394,7 +394,7 @@ def get_embeddings_df(
         The name of the backbone model to use for generating embeddings. The default is 'resnet50'.
         Other possible options include models like 'convnext_tiny', 'vit_base', etc.
     directory : str, optional
-        The root directory where the embeddings CSV file will be saved. Default is 'Embeddings'.
     image_files : list, optional
         A pre-defined list of image file names to process. If not provided, the function will automatically detect
         image files in the `path` directory.

     path="data/images",
     dataset_name="",
     backbone="resnet50",
+    directory="embeddings",
     image_files=None,
 ):
     """
         The name of the backbone model to use for generating embeddings. The default is 'resnet50'.
         Other possible options include models like 'convnext_tiny', 'vit_base', etc.
     directory : str, optional
+        The root directory where the embeddings CSV file will be saved. Default is 'embeddings'.
     image_files : list, optional
         A pre-defined list of image file names to process. If not provided, the function will automatically detect
         image files in the `path` directory.