Advanced post-processing

Files changed (4) hide show

Demo.ipynb +2 -2
post_processing/page_elt_pp.py +203 -0
post_processing/text_pp.py +225 -0
post_processing/wbf.py +292 -0

Demo.ipynb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1452423fcb5fbc1cb08085f1169727e61238f763d0994f3d8a98b98621a0fc89
-size 302483

 version https://git-lfs.github.com/spec/v1
+oid sha256:98c4ca2b9c91864ea8f45b0d91900ce9460582ce2a9419a02efe8d5188f60b88
+size 1482484

post_processing/page_elt_pp.py ADDED Viewed

	@@ -0,0 +1,203 @@

+import numpy as np
+def expand_boxes(boxes, r_x=(1, 1), r_y=(1, 1), size_agnostic=True):
+    """
+    Expands bounding boxes by a specified ratio.
+    Expected box format is normalized [x_min, y_min, x_max, y_max].
+    Args:
+        boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
+        r_x (tuple, optional): Left, right expansion ratios. Defaults to (1, 1) (no expansion).
+        r_y (tuple, optional): Up, down expansion ratios. Defaults to (1, 1) (no expansion).
+        size_agnostic (bool, optional): Expand independently of the bbox shape. Defaults to True.
+    Returns:
+        numpy.ndarray: Adjusted bounding boxes clipped to the [0, 1] range.
+    """
+    old_boxes = boxes.copy()
+    if not size_agnostic:
+        h = boxes[:, 3] - boxes[:, 1]
+        w = boxes[:, 2] - boxes[:, 0]
+    else:
+        h, w = 1, 1
+    boxes[:, 0] -= w * (r_x[0] - 1)  # left
+    boxes[:, 2] += w * (r_x[1] - 1)  # right
+    boxes[:, 1] -= h * (r_y[0] - 1)  # up
+    boxes[:, 3] += h * (r_y[1] - 1)  # down
+    boxes = np.clip(boxes, 0, 1)
+    # Enforce non-overlapping boxes
+    for i in range(len(boxes)):
+        for j in range(i + 1, len(boxes)):
+            iou = bb_iou_array(boxes[i][None], boxes[j])[0]
+            old_iou = bb_iou_array(old_boxes[i][None], old_boxes[j])[0]
+            # print(iou, old_iou)
+            if iou > 0.05 and old_iou < 0.1:
+                if boxes[i, 1] < boxes[j, 1]:  # i above j
+                    boxes[j, 1] = min(old_boxes[j, 1], boxes[i, 3])
+                    if old_iou > 0:
+                        boxes[i, 3] = max(old_boxes[i, 3], boxes[j, 1])
+                else:
+                    boxes[i, 1] = min(old_boxes[i, 1], boxes[j, 3])
+                    if old_iou > 0:
+                        boxes[j, 3] = max(old_boxes[j, 3], boxes[i, 1])
+    return boxes
+def merge_boxes(b1, b2):
+    """
+    Merges two bounding boxes into a single box that encompasses both.
+    Args:
+        b1 (numpy.ndarray): First bounding box [x_min, y_min, x_max, y_max].
+        b2 (numpy.ndarray): Second bounding box [x_min, y_min, x_max, y_max].
+    Returns:
+        numpy.ndarray: A single bounding box that covers both input boxes.
+    """
+    b = b1.copy()
+    b[0] = min(b1[0], b2[0])
+    b[1] = min(b1[1], b2[1])
+    b[2] = max(b1[2], b2[2])
+    b[3] = max(b1[3], b2[3])
+    return b
+def bb_iou_array(boxes, new_box):
+    """
+    Calculates the Intersection over Union (IoU) between a box and an array of boxes.
+    Args:
+        boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
+        new_box (numpy.ndarray): A single bounding box [x_min, y_min, x_max, y_max].
+    Returns:
+        numpy.ndarray: Array of IoU values between the new_box and each box in the array.
+    """
+    # bb interesection over union
+    xA = np.maximum(boxes[:, 0], new_box[0])
+    yA = np.maximum(boxes[:, 1], new_box[1])
+    xB = np.minimum(boxes[:, 2], new_box[2])
+    yB = np.minimum(boxes[:, 3], new_box[3])
+    interArea = np.maximum(xB - xA, 0) * np.maximum(yB - yA, 0)
+    # compute the area of both the prediction and ground-truth rectangles
+    boxAArea = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+    boxBArea = (new_box[2] - new_box[0]) * (new_box[3] - new_box[1])
+    iou = interArea / (boxAArea + boxBArea - interArea)
+    return iou
+def match_with_title(
+    bbox, title_bboxes, match_dist=0.1, delta=1.5, already_matched=[]
+):
+    """
+    Matches a bounding box with a title bounding box based on IoU or proximity.
+    Args:
+        bbox (numpy.ndarray): Bounding box to match with title [x_min, y_min, x_max, y_max].
+        title_bboxes (numpy.ndarray): Array of title bounding boxes with shape (N, 4).
+        match_dist (float, optional): Maximum distance for matching. Defaults to 0.1.
+        delta (float, optional): Multiplier for matching several titles. Defaults to 1.5.
+        already_matched (list, optional): List of already matched title indices. Defaults to [].
+    Returns:
+        tuple or None: If matched, returns a tuple of (merged_bbox, updated_title_bboxes).
+                       If no match is found, returns None, None.
+    """
+    if not len(title_bboxes):
+        return None, None
+    dist_above = np.abs(title_bboxes[:, 3] - bbox[1])
+    dist_below = np.abs(bbox[3] - title_bboxes[:, 1])
+    dist_left = np.abs(title_bboxes[:, 0] - bbox[0])
+    dist_center = np.abs(title_bboxes[:, 0] + title_bboxes[:, 2] - bbox[0] - bbox[2]) / 2
+    dists = np.min([dist_above, dist_below], 0)
+    dists += np.min([dist_left, dist_center], 0) / 2
+    ious = bb_iou_array(title_bboxes, bbox)
+    dists = np.where(ious > 0, min(match_dist, np.min(dists)), dists)
+    if len(already_matched):
+        dists[already_matched] = match_dist * 10  # Remove already matched titles
+    # print(dists)
+    matches = None  # noqa
+    if np.min(dists) <= match_dist:
+        matches = np.where(
+            dists <= min(match_dist, np.min(dists) * delta)
+        )[0]
+    if matches is not None:
+        new_bbox = bbox
+        for match in matches:
+            new_bbox = merge_boxes(new_bbox, title_bboxes[match])
+        return new_bbox, list(matches)
+    else:
+        return None, None
+def match_boxes_with_title(
+    boxes, confs, labels, classes, to_match_labels=["chart"], remove_matched_titles=False
+):
+    """
+    Matches charts with title.
+    Args:
+        boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
+        confs (numpy.ndarray): Array of confidence scores with shape (N,).
+        labels (numpy.ndarray): Array of labels with shape (N,).
+        classes (list): List of class names.
+        to_match_labels (list): List of class names to match with titles.
+        remove_matched_titles (bool): Whether to remove matched titles from the boxes.
+    Returns:
+        boxes (numpy.ndarray): Array of bounding boxes with shape (M, 4).
+        confs (numpy.ndarray): Array of confidence scores with shape (M,).
+        labels (numpy.ndarray): Array of labels with shape (M,).
+        found_title (list): List of indices of matched titles.
+        no_found_title (list): List of indices of unmatched titles.
+    """
+    # Put titles at the end
+    title_ids = np.where(labels == classes.index("title"))[0]
+    order = np.concatenate([np.delete(np.arange(len(boxes)), title_ids), title_ids])
+    boxes = boxes[order]
+    confs = confs[order]
+    labels = labels[order]
+    # Ids
+    title_ids = np.where(labels == classes.index("title"))[0]
+    to_match = np.where(np.isin(labels, [classes.index(c) for c in to_match_labels]))[0]
+    # Matching
+    found_title, already_matched = [], []
+    for i in range(len(boxes)):
+        if i not in to_match:
+            continue
+        merged_box, matched_title_ids = match_with_title(
+            boxes[i],
+            boxes[title_ids],
+            already_matched=already_matched,
+        )
+        if matched_title_ids is not None:
+            # print(f'Merged {classes[int(labels[i])]} at idx #{i} with title {matched_title_ids[-1]}')  # noqa
+            boxes[i] = merged_box
+            already_matched += matched_title_ids
+            found_title.append(i)
+    if remove_matched_titles and len(already_matched):
+        boxes = np.delete(boxes, title_ids[already_matched], axis=0)
+        confs = np.delete(confs, title_ids[already_matched], axis=0)
+        labels = np.delete(labels, title_ids[already_matched], axis=0)
+    return boxes, confs, labels, found_title

post_processing/text_pp.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import numpy as np
+def get_overlaps(boxes, other_boxes, normalize="box_only"):
+    """
+    Checks if a box overlaps with any other box.
+    Boxes are expeceted in format (x0, y0, x1, y1)
+    Args:
+        boxes (np array [4] or [n x 4]): Boxes.
+        other_boxes (np array [m x 4]): Other boxes.
+    Returns:
+        np array [n x m]: Overlaps.
+    """
+    if boxes.ndim == 1:
+        boxes = boxes[None, :]
+    x0, y0, x1, y1 = (
+        boxes[:, 0][:, None], boxes[:, 1][:, None], boxes[:, 2][:, None], boxes[:, 3][:, None]
+    )
+    areas = ((y1 - y0) * (x1 - x0))
+    x0_other, y0_other, x1_other, y1_other = (
+        other_boxes[:, 0][None, :],
+        other_boxes[:, 1][None, :],
+        other_boxes[:, 2][None, :],
+        other_boxes[:, 3][None, :]
+    )
+    areas_other = ((y1_other - y0_other) * (x1_other - x0_other))
+    # Intersection
+    inter_y0 = np.maximum(y0, y0_other)
+    inter_y1 = np.minimum(y1, y1_other)
+    inter_x0 = np.maximum(x0, x0_other)
+    inter_x1 = np.minimum(x1, x1_other)
+    inter_area = np.maximum(0, inter_y1 - inter_y0) * np.maximum(0, inter_x1 - inter_x0)
+    # Overlap
+    if normalize == "box_only":  # Only consider box included in other box
+        overlaps = inter_area / areas
+    elif normalize == "all":  # Consider box included in other box and other box included in box
+        overlaps = inter_area / np.minimum(areas, areas_other[:, None])
+    else:
+        raise ValueError(f"Invalid normalization: {normalize}")
+    return overlaps
+def get_distances(title_boxes, other_boxes):
+    """
+    Computes the distances between title and table/chart boxes.
+    Distance is computed as the sum of the vertical and horizontal distances.
+    Horizontal distance uses min(boxes center dist, boxes left dist).
+    Vertical distance uses min(top_title to bottom_other dists, bottom_title to top_other dists).
+    Args:
+        title_boxes (np array [n_titles x 4]): Title boxes.
+        other_boxes (np array [n_other x 4]): Other boxes.
+    Returns:
+        np array [n_titles x n_other]: Distances between titles and other boxes.
+    """
+    x0_title, xc_title, y0_title, y1_title = (
+        title_boxes[:, 0],
+        (title_boxes[:, 0] + title_boxes[:, 2]) / 2,
+        title_boxes[:, 1],
+        title_boxes[:, 3]
+    )
+    x0_other, xc_other, y0_other, y1_other = (
+        other_boxes[:, 0],
+        (other_boxes[:, 0] + other_boxes[:, 2]) / 2,
+        other_boxes[:, 1],
+        other_boxes[:, 3]
+    )
+    x_dists = np.min([
+        np.abs(xc_title[:, None] - xc_other[None, :]),  # Title center to other center
+        np.abs(x0_title[:, None] - x0_other[None, :]),  # Title left to other left
+    ], axis=0)
+    y_dists = np.min([
+        np.abs(y1_title[:, None] - y0_other[None, :]),  # Title above other
+        np.abs(y0_title[:, None] - y1_other[None, :]),  # Title below other
+    ], axis=0)
+    dists = y_dists + x_dists / 2
+    return dists
+def find_titles(title_boxes, table_boxes, chart_boxes, max_dist=0.1):
+    """
+    Associates titles to tables and charts.
+    Args:
+        title_boxes (np array [n_titles x 4]): Title boxes.
+        table_boxes (np array [n_tables x 4]): Table boxes.
+        chart_boxes (np array [n_charts x 4]): Chart boxes.
+        max_dist (float, optional): Maximum distance between title and table/chart. Defaults to 0.1.
+    Returns:
+        dict: Dictionary of assigned titles.
+            - Keys are the indices of the titles,
+            - Values are tuples of:
+                - str: Whether the title is assigned to a "chart" or "table"
+                - int: index of the assigned table/chart
+    """
+    if not len(title_boxes) or not (len(table_boxes) or len(chart_boxes)):
+        return {}
+    # print(title_boxes.shape, table_boxes.shape, chart_boxes.shape)
+    # Get distances
+    chart_distances = np.ones((len(title_boxes), 0))
+    if len(chart_boxes):
+        chart_distances = get_distances(title_boxes, chart_boxes)
+        chart_overlaps = get_overlaps(title_boxes, chart_boxes, normalize="box_only")
+        # print(chart_overlaps, "chart_overlaps", chart_overlaps.shape)
+        # print(chart_distances, "chart_distances", chart_distances.shape)
+        chart_distances = np.where(chart_overlaps > 0.25, 0, chart_distances)
+    # print(chart_distances)
+    table_distances = np.ones((len(title_boxes), 0))
+    if len(table_boxes):
+        table_distances = get_distances(title_boxes, table_boxes)
+        if len(chart_boxes):  # Penalize table titles that are inside charts
+            table_distances = np.where(
+                chart_overlaps.max(1, keepdims=True) > 0.25, table_distances * 10, table_distances
+            )
+    # print(table_distances, "table_distances")
+    # Assign to tables
+    assigned_titles = {}
+    for i, table in enumerate(table_boxes):
+        best_match = np.argmin(table_distances[:, i])
+        if table_distances[best_match, i] < max_dist:
+            assigned_titles[best_match] = ("table", i)
+            table_distances[best_match] = np.inf
+            chart_distances[best_match] = np.inf
+    # Assign to charts
+    for i, chart in enumerate(chart_boxes):
+        best_match = np.argmin(chart_distances[:, i])
+        if chart_distances[best_match, i] < max_dist:
+            assigned_titles[best_match] = ("chart", i)
+            chart_distances[best_match] = np.inf
+    return assigned_titles
+def postprocess_included(
+    boxes, labels, confs, class_="title", classes=["table", "chart", "title", "infographic"]
+):
+    """
+    Post process title predictions.
+    - Remove titles that are included in other boxes
+    Args:
+        boxes (numpy.ndarray [N, 4]): Array of bounding boxes.
+        labels (numpy.ndarray [N]): Array of labels.
+        confs (numpy.ndarray [N]): Array of confidences.
+        class_ (str, optional): Class to postprocess. Defaults to "title".
+        classes (list, optional): Classes. Defaults to ["table", "chart", "title", "infographic"].
+    Returns:
+        boxes (numpy.ndarray): Array of bounding boxes.
+        labels (numpy.ndarray): Array of labels.
+        confs (numpy.ndarray): Array of confidences.
+    """
+    boxes_to_pp = boxes[labels == classes.index(class_)]
+    confs_to_pp = confs[labels == classes.index(class_)]
+    order = np.argsort(confs_to_pp)  # least to most confident for NMS
+    boxes_to_pp, confs_to_pp = boxes_to_pp[order], confs_to_pp[order]
+    if len(boxes_to_pp) == 0:
+        return boxes, labels, confs
+    # other_boxes = boxes[labels != classes.index("title")]
+    inclusion_classes = ["table", "infographic", "chart"]
+    if class_ in ["header_footer", "title"]:
+        inclusion_classes.append("paragraph")
+    other_boxes = boxes[np.isin(
+        labels,
+        [classes.index(c) for c in inclusion_classes])
+    ]
+    # Remove boxes included in other_boxes
+    kept_boxes, kept_confs = [], []
+    for i, b in enumerate(boxes_to_pp):
+        # # Inclusion NMS
+        # if i < len(titles) - 1:
+        #     overlaps_titles = get_overlaps(t, titles[i + 1:], normalize="all")
+        #     if overlaps_titles.max() > 0.9:
+        #         continue
+        # print(t)
+        # print(other_boxes)
+        if len(other_boxes) > 0:
+            overlaps = get_overlaps(b, other_boxes, normalize="box_only")
+            if overlaps.max() > 0.9:
+                continue
+        kept_boxes.append(b)
+        kept_confs.append(confs_to_pp[i])
+    # Aggregate
+    kept_boxes = np.stack(kept_boxes) if len(kept_boxes) else np.empty((0, 4))
+    kept_confs = np.stack(kept_confs) if len(kept_confs) else np.empty(0)
+    boxes_pp = np.concatenate(
+        [boxes[labels != classes.index(class_)], kept_boxes]
+    )
+    confs_pp = np.concatenate(
+        [confs[labels != classes.index(class_)], kept_confs]
+    )
+    labels_pp = np.concatenate([
+        labels[labels != classes.index(class_)],
+        np.ones(len(kept_boxes)) * classes.index(class_)
+    ])
+    return boxes_pp, labels_pp, confs_pp

post_processing/wbf.py ADDED Viewed

	@@ -0,0 +1,292 @@

+# Adapted from:
+# https://github.com/ZFTurbo/Weighted-Boxes-Fusion/blob/master/ensemble_boxes/ensemble_boxes_wbf.py
+import warnings
+import numpy as np
+def prefilter_boxes(boxes, scores, labels, weights, thr, class_agnostic=False):
+    """
+    Reformats and filters boxes.
+    Output is a dict of boxes to merge separately.
+    Args:
+        boxes (list[np array[n x 4]]): List of boxes. One list per model.
+        scores (list[np array[n]]): List of confidences.
+        labels (list[np array[n]]): List of labels.
+        weights (list): Model weights.
+        thr (float): Confidence threshold
+        class_agnostic (bool, optional): Merge boxes from different classes. Defaults to False.
+    Returns:
+        dict[np array [? x 8]]: Filtered boxes.
+    """
+    # Create dict with boxes stored by its label
+    new_boxes = dict()
+    for t in range(len(boxes)):
+        assert len(boxes[t]) == len(scores[t]), "len(boxes) != len(scores)"
+        assert len(boxes[t]) == len(labels[t]), "len(boxes) != len(labels)"
+        for j in range(len(boxes[t])):
+            score = scores[t][j]
+            if score < thr:
+                continue
+            label = int(labels[t][j])
+            box_part = boxes[t][j]
+            x1 = float(box_part[0])
+            y1 = float(box_part[1])
+            x2 = float(box_part[2])
+            y2 = float(box_part[3])
+            # Box data checks
+            if x2 < x1:
+                warnings.warn("X2 < X1 value in box. Swap them.")
+                x1, x2 = x2, x1
+            if y2 < y1:
+                warnings.warn("Y2 < Y1 value in box. Swap them.")
+                y1, y2 = y2, y1
+            array = np.array([x1, x2, y1, y2])
+            if array.min() < 0 or array.max() > 1:
+                warnings.warn("Coordinates outside [0, 1]")
+                array = np.clip(array, 0, 1)
+                x1, x2, y1, y2 = array
+            if (x2 - x1) * (y2 - y1) == 0.0:
+                warnings.warn("Zero area box skipped: {}.".format(box_part))
+                continue
+            # [label, score, weight, model index, x1, y1, x2, y2]
+            b = [int(label), float(score) * weights[t], weights[t], t, x1, y1, x2, y2]
+            label_k = "*" if class_agnostic else label
+            if label_k not in new_boxes:
+                new_boxes[label_k] = []
+            new_boxes[label_k].append(b)
+    # Sort each list in dict by score and transform it to numpy array
+    for k in new_boxes:
+        current_boxes = np.array(new_boxes[k])
+        new_boxes[k] = current_boxes[current_boxes[:, 1].argsort()[::-1]]
+    return new_boxes
+def merge_labels(labels, confs):
+    """
+    Custom function for merging labels.
+    If all labels are the same, return the unique value.
+    Else, return the label of the most confident non-title (class 2) box.
+    Args:
+        labels (np array [n]): Labels.
+        confs (np array [n]): Confidence.
+    Returns:
+        int: Label.
+    """
+    if len(np.unique(labels)) == 1:
+        return labels[0]
+    else:  # Most confident and not a title
+        confs = confs[confs != 2]
+        labels = labels[labels != 2]
+        return labels[np.argmax(confs)]
+def get_weighted_box(boxes, conf_type="avg"):
+    """
+    Merges boxes by using the weighted fusion.
+    Args:
+        boxes (np array [n x 8]): Boxes to merge.
+        conf_type (str, optional): Confidence merging type. Defaults to "avg".
+    Returns:
+        np array [8]: Merged box.
+    """
+    box = np.zeros(8, dtype=np.float32)
+    conf = 0
+    conf_list = []
+    w = 0
+    for b in boxes:
+        box[4:] += b[1] * b[4:]
+        conf += b[1]
+        conf_list.append(b[1])
+        w += b[2]
+    box[0] = merge_labels(
+        np.array([b[0] for b in boxes]), np.array([b[1] for b in boxes])
+    )
+    box[1] = np.max(conf_list) if conf_type == "max" else np.mean(conf_list)
+    box[2] = w
+    box[3] = -1  # model index field is retained for consistency but is not used.
+    box[4:] /= conf
+    return box
+def get_biggest_box(boxes, conf_type="avg"):
+    """
+    Merges boxes by using the biggest box.
+    Args:
+        boxes (np array [n x 8]): Boxes to merge.
+        conf_type (str, optional): Confidence merging type. Defaults to "avg".
+    Returns:
+        np array [8]: Merged box.
+    """
+    box = np.zeros(8, dtype=np.float32)
+    box[4:] = boxes[0][4:]
+    conf_list = []
+    w = 0
+    for b in boxes:
+        box[4] = min(box[4], b[4])
+        box[5] = min(box[5], b[5])
+        box[6] = max(box[6], b[6])
+        box[7] = max(box[7], b[7])
+        conf_list.append(b[1])
+        w += b[2]
+    box[0] = merge_labels(
+        np.array([b[0] for b in boxes]), np.array([b[1] for b in boxes])
+    )
+    #     print(box[0], np.array([b[0] for b in boxes]))
+    box[1] = np.max(conf_list) if conf_type == "max" else np.mean(conf_list)
+    box[2] = w
+    box[3] = -1  # model index field is retained for consistency but is not used.
+    return box
+def find_matching_box_fast(boxes_list, new_box, match_iou):
+    """
+    Reimplementation of find_matching_box with numpy instead of loops.
+    Gives significant speed up for larger arrays (~100x).
+    This was previously the bottleneck since the function is called for every entry in the array.
+    """
+    def bb_iou_array(boxes, new_box):
+        # bb interesection over union
+        xA = np.maximum(boxes[:, 0], new_box[0])
+        yA = np.maximum(boxes[:, 1], new_box[1])
+        xB = np.minimum(boxes[:, 2], new_box[2])
+        yB = np.minimum(boxes[:, 3], new_box[3])
+        interArea = np.maximum(xB - xA, 0) * np.maximum(yB - yA, 0)
+        # compute the area of both the prediction and ground-truth rectangles
+        boxAArea = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+        boxBArea = (new_box[2] - new_box[0]) * (new_box[3] - new_box[1])
+        iou = interArea / (boxAArea + boxBArea - interArea)
+        return iou
+    if boxes_list.shape[0] == 0:
+        return -1, match_iou
+    ious = bb_iou_array(boxes_list[:, 4:], new_box[4:])
+    # ious[boxes[:, 0] != new_box[0]] = -1
+    best_idx = np.argmax(ious)
+    best_iou = ious[best_idx]
+    if best_iou <= match_iou:
+        best_iou = match_iou
+        best_idx = -1
+    return best_idx, best_iou
+def weighted_boxes_fusion(
+    boxes_list,
+    scores_list,
+    labels_list,
+    iou_thr=0.5,
+    skip_box_thr=0.0,
+    conf_type="avg",
+    merge_type="weighted",
+    class_agnostic=False,
+):
+    """
+    Custom WBF implementation that supports a class_agnostic mode and a biggest box fusion.
+    Boxes are expected to be in normalized (x0, y0, x1, y1) format.
+    Args:
+        boxes_list (list[np.ndarray[n x 4]]): List of boxes. One list per model.
+        scores_list (list[np.ndarray[n]]): List of confidences.
+        labels_list (list[np.ndarray[n]]): List of labels.
+        iou_thr (float, optional): IoU threshold for matching. Defaults to 0.55.
+        skip_box_thr (float, optional): Exclude boxes with score < skip_box_thr. Defaults to 0.0.
+        conf_type (str, optional): Confidence merging type ("avg" or "max"). Defaults to "avg".
+        merge_type (str, optional): Merge type ("weighted" or "biggest"). Defaults to "weighted".
+        class_agnostic (bool, optional): Merge boxes from different classes. Defaults to False.
+    Returns:
+        np array[N x 4]: Merged boxes,
+        np array[N]: Merged confidences,
+        np array[N]: Merged labels.
+    """
+    weights = np.ones(len(boxes_list))
+    assert conf_type in ["avg", "max"], 'Conf type must be "avg" or "max"'
+    assert merge_type in ["weighted", "biggest"], 'Conf type must be "weighted" or "biggest"'
+    filtered_boxes = prefilter_boxes(
+        boxes_list,
+        scores_list,
+        labels_list,
+        weights,
+        skip_box_thr,
+        class_agnostic=class_agnostic,
+    )
+    if len(filtered_boxes) == 0:
+        return np.zeros((0, 4)), np.zeros((0,)), np.zeros((0,))
+    overall_boxes = []
+    for label in filtered_boxes:
+        boxes = filtered_boxes[label]
+        clusters = []
+        # Clusterize boxes
+        for j in range(len(boxes)):
+            ids = [i for i in range(len(boxes)) if i != j]
+            index, best_iou = find_matching_box_fast(boxes[ids], boxes[j], iou_thr)
+            if index != -1:
+                index = ids[index]
+                cluster_idx = [
+                    clust_idx
+                    for clust_idx, clust in enumerate(clusters)
+                    if (j in clust or index in clust)
+                ]
+                if len(cluster_idx):
+                    cluster_idx = cluster_idx[0]
+                    clusters[cluster_idx] = list(
+                        set(clusters[cluster_idx] + [index, j])
+                    )
+                else:
+                    clusters.append([index, j])
+            else:
+                clusters.append([j])
+        for j, c in enumerate(clusters):
+            if merge_type == "weighted":
+                weighted_box = get_weighted_box(boxes[c], conf_type)
+            elif merge_type == "biggest":
+                weighted_box = get_biggest_box(boxes[c], conf_type)
+            if conf_type == "max":
+                weighted_box[1] = weighted_box[1] / weights.max()
+            else:  # avg
+                weighted_box[1] = weighted_box[1] * len(c) / weights.sum()
+            overall_boxes.append(weighted_box)
+    overall_boxes = np.array(overall_boxes)
+    overall_boxes = overall_boxes[overall_boxes[:, 1].argsort()[::-1]]
+    boxes = overall_boxes[:, 4:]
+    scores = overall_boxes[:, 1]
+    labels = overall_boxes[:, 0]
+    return boxes, scores, labels