Spaces:
Runtime error
Runtime error
| # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved | |
| """ | |
| MaskFormer Training Script. | |
| This script is a simplified version of the training script in detectron2/tools. | |
| """ | |
| import copy | |
| import itertools | |
| import logging | |
| import os | |
| from collections import OrderedDict | |
| from typing import Any, Dict, List, Set | |
| import torch | |
| import detectron2.utils.comm as comm | |
| from detectron2.checkpoint import DetectionCheckpointer | |
| from detectron2.config import get_cfg | |
| from detectron2.data import MetadataCatalog, build_detection_train_loader | |
| from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch | |
| from detectron2.evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator, \ | |
| COCOEvaluator, COCOPanopticEvaluator, DatasetEvaluators, SemSegEvaluator, verify_results, \ | |
| DatasetEvaluator | |
| from detectron2.projects.deeplab import add_deeplab_config, build_lr_scheduler | |
| from detectron2.solver.build import maybe_add_gradient_clipping | |
| from detectron2.utils.logger import setup_logger | |
| from detectron2.utils.file_io import PathManager | |
| import numpy as np | |
| from PIL import Image | |
| import glob | |
| import pycocotools.mask as mask_util | |
| from detectron2.data import DatasetCatalog, MetadataCatalog | |
| from detectron2.utils.comm import all_gather, is_main_process, synchronize | |
| import json | |
| from torch.nn.parallel import DistributedDataParallel | |
| from detectron2.engine.train_loop import AMPTrainer, SimpleTrainer, TrainerBase, HookBase | |
| import weakref | |
| from detectron2.utils.events import EventStorage | |
| from detectron2.utils.logger import _log_api_usage | |
| # from detectron2.evaluation import SemSegGzeroEvaluator | |
| # from mask_former.evaluation.sem_seg_evaluation_gzero import SemSegGzeroEvaluator | |
| class SemSegGzeroEvaluator(DatasetEvaluator): | |
| """ | |
| Evaluate semantic segmentation metrics. | |
| """ | |
| def __init__( | |
| self, dataset_name, distributed, output_dir=None, *, num_classes=None, ignore_label=None | |
| ): | |
| """ | |
| Args: | |
| dataset_name (str): name of the dataset to be evaluated. | |
| distributed (True): if True, will collect results from all ranks for evaluation. | |
| Otherwise, will evaluate the results in the current process. | |
| output_dir (str): an output directory to dump results. | |
| num_classes, ignore_label: deprecated argument | |
| """ | |
| self._logger = logging.getLogger(__name__) | |
| if num_classes is not None: | |
| self._logger.warn( | |
| "SemSegEvaluator(num_classes) is deprecated! It should be obtained from metadata." | |
| ) | |
| if ignore_label is not None: | |
| self._logger.warn( | |
| "SemSegEvaluator(ignore_label) is deprecated! It should be obtained from metadata." | |
| ) | |
| self._dataset_name = dataset_name | |
| self._distributed = distributed | |
| self._output_dir = output_dir | |
| self._cpu_device = torch.device("cpu") | |
| self.input_file_to_gt_file = { | |
| dataset_record["file_name"]: dataset_record["sem_seg_file_name"] | |
| for dataset_record in DatasetCatalog.get(dataset_name) | |
| } | |
| meta = MetadataCatalog.get(dataset_name) | |
| # Dict that maps contiguous training ids to COCO category ids | |
| try: | |
| c2d = meta.stuff_dataset_id_to_contiguous_id | |
| self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()} | |
| except AttributeError: | |
| self._contiguous_id_to_dataset_id = None | |
| self._class_names = meta.stuff_classes | |
| self._val_extra_classes = meta.val_extra_classes | |
| self._num_classes = len(meta.stuff_classes) | |
| if num_classes is not None: | |
| assert self._num_classes == num_classes, f"{self._num_classes} != {num_classes}" | |
| self._ignore_label = ignore_label if ignore_label is not None else meta.ignore_label | |
| def reset(self): | |
| self._conf_matrix = np.zeros((self._num_classes + 1, self._num_classes + 1), dtype=np.int64) | |
| self._predictions = [] | |
| def process(self, inputs, outputs): | |
| """ | |
| Args: | |
| inputs: the inputs to a model. | |
| It is a list of dicts. Each dict corresponds to an image and | |
| contains keys like "height", "width", "file_name". | |
| outputs: the outputs of a model. It is either list of semantic segmentation predictions | |
| (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic | |
| segmentation prediction in the same format. | |
| """ | |
| for input, output in zip(inputs, outputs): | |
| output = output["sem_seg"].argmax(dim=0).to(self._cpu_device) | |
| pred = np.array(output, dtype=np.int) | |
| with PathManager.open(self.input_file_to_gt_file[input["file_name"]], "rb") as f: | |
| gt = np.array(Image.open(f), dtype=np.int) | |
| gt[gt == self._ignore_label] = self._num_classes | |
| self._conf_matrix += np.bincount( | |
| (self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1), | |
| minlength=self._conf_matrix.size, | |
| ).reshape(self._conf_matrix.shape) | |
| self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"])) | |
| def evaluate(self): | |
| """ | |
| Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): | |
| * Mean intersection-over-union averaged across classes (mIoU) | |
| * Frequency Weighted IoU (fwIoU) | |
| * Mean pixel accuracy averaged across classes (mACC) | |
| * Pixel Accuracy (pACC) | |
| """ | |
| if self._distributed: | |
| synchronize() | |
| conf_matrix_list = all_gather(self._conf_matrix) | |
| self._predictions = all_gather(self._predictions) | |
| self._predictions = list(itertools.chain(*self._predictions)) | |
| if not is_main_process(): | |
| return | |
| self._conf_matrix = np.zeros_like(self._conf_matrix) | |
| for conf_matrix in conf_matrix_list: | |
| self._conf_matrix += conf_matrix | |
| if self._output_dir: | |
| PathManager.mkdirs(self._output_dir) | |
| file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") | |
| with PathManager.open(file_path, "w") as f: | |
| f.write(json.dumps(self._predictions)) | |
| acc = np.full(self._num_classes, np.nan, dtype=np.float) | |
| iou = np.full(self._num_classes, np.nan, dtype=np.float) | |
| tp = self._conf_matrix.diagonal()[:-1].astype(np.float) | |
| pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) | |
| class_weights = pos_gt / np.sum(pos_gt) | |
| pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) | |
| acc_valid = pos_gt > 0 | |
| acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] | |
| iou_valid = (pos_gt + pos_pred) > 0 | |
| union = pos_gt + pos_pred - tp | |
| iou[acc_valid] = tp[acc_valid] / union[acc_valid] | |
| macc = np.sum(acc[acc_valid]) / np.sum(acc_valid) | |
| miou = np.sum(iou[acc_valid]) / np.sum(iou_valid) | |
| fiou = np.sum(iou[acc_valid] * class_weights[acc_valid]) | |
| pacc = np.sum(tp) / np.sum(pos_gt) | |
| seen_IoU = 0 | |
| unseen_IoU = 0 | |
| seen_acc = 0 | |
| unseen_acc = 0 | |
| res = {} | |
| res["mIoU"] = 100 * miou | |
| res["fwIoU"] = 100 * fiou | |
| for i, name in enumerate(self._class_names): | |
| res["IoU-{}".format(name)] = 100 * iou[i] | |
| if name in self._val_extra_classes: | |
| unseen_IoU = unseen_IoU + 100 * iou[i] | |
| else: | |
| seen_IoU = seen_IoU + 100 * iou[i] | |
| unseen_IoU = unseen_IoU / len(self._val_extra_classes) | |
| seen_IoU = seen_IoU / (self._num_classes - len(self._val_extra_classes)) | |
| res["mACC"] = 100 * macc | |
| res["pACC"] = 100 * pacc | |
| for i, name in enumerate(self._class_names): | |
| res["ACC-{}".format(name)] = 100 * acc[i] | |
| if name in self._val_extra_classes: | |
| unseen_acc = unseen_acc + 100 * iou[i] | |
| else: | |
| seen_acc = seen_acc + 100 * iou[i] | |
| unseen_acc = unseen_acc / len(self._val_extra_classes) | |
| seen_acc = seen_acc / (self._num_classes - len(self._val_extra_classes)) | |
| res["seen_IoU"] = seen_IoU | |
| res["unseen_IoU"] = unseen_IoU | |
| res["harmonic mean"] = 2 * seen_IoU * unseen_IoU / (seen_IoU + unseen_IoU) | |
| # res["unseen_acc"] = unseen_acc | |
| # res["seen_acc"] = seen_acc | |
| if self._output_dir: | |
| file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") | |
| with PathManager.open(file_path, "wb") as f: | |
| torch.save(res, f) | |
| results = OrderedDict({"sem_seg": res}) | |
| self._logger.info(results) | |
| return results | |
| def encode_json_sem_seg(self, sem_seg, input_file_name): | |
| """ | |
| Convert semantic segmentation to COCO stuff format with segments encoded as RLEs. | |
| See http://cocodataset.org/#format-results | |
| """ | |
| json_list = [] | |
| for label in np.unique(sem_seg): | |
| if self._contiguous_id_to_dataset_id is not None: | |
| # import ipdb; ipdb.set_trace() | |
| assert ( | |
| label in self._contiguous_id_to_dataset_id | |
| ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name) | |
| dataset_id = self._contiguous_id_to_dataset_id[label] | |
| else: | |
| dataset_id = int(label) | |
| mask = (sem_seg == label).astype(np.uint8) | |
| mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0] | |
| mask_rle["counts"] = mask_rle["counts"].decode("utf-8") | |
| json_list.append( | |
| {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle} | |
| ) | |
| return json_list | |
| # MaskFormer | |
| from cat_seg import ( | |
| DETRPanopticDatasetMapper, | |
| MaskFormerPanopticDatasetMapper, | |
| MaskFormerSemanticDatasetMapper, | |
| SemanticSegmentorWithTTA, | |
| add_mask_former_config, | |
| ) | |
| def create_ddp_model(model, *, fp16_compression=False, **kwargs): | |
| """ | |
| Create a DistributedDataParallel model if there are >1 processes. | |
| Args: | |
| model: a torch.nn.Module | |
| fp16_compression: add fp16 compression hooks to the ddp object. | |
| See more at https://pytorch.org/docs/stable/ddp_comm_hooks.html#torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook | |
| kwargs: other arguments of :module:`torch.nn.parallel.DistributedDataParallel`. | |
| """ # noqa | |
| if comm.get_world_size() == 1: | |
| return model | |
| if "device_ids" not in kwargs: | |
| kwargs["device_ids"] = [comm.get_local_rank()] | |
| ddp = DistributedDataParallel(model, **kwargs) | |
| if fp16_compression: | |
| from torch.distributed.algorithms.ddp_comm_hooks import default as comm_hooks | |
| ddp.register_comm_hook(state=None, hook=comm_hooks.fp16_compress_hook) | |
| return ddp | |
| class Trainer(DefaultTrainer): | |
| """ | |
| Extension of the Trainer class adapted to DETR. | |
| """ | |
| def __init__(self, cfg): | |
| # super().__init__(cfg) | |
| self._hooks: List[HookBase] = [] | |
| self.iter: int = 0 | |
| self.start_iter: int = 0 | |
| self.max_iter: int | |
| self.storage: EventStorage | |
| _log_api_usage("trainer." + self.__class__.__name__) | |
| logger = logging.getLogger("detectron2") | |
| if not logger.isEnabledFor(logging.INFO): # setup_logger is not called for d2 | |
| setup_logger() | |
| cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size()) | |
| # Assume these objects must be constructed in this order. | |
| model = self.build_model(cfg) | |
| optimizer = self.build_optimizer(cfg, model) | |
| data_loader = self.build_train_loader(cfg) | |
| model = create_ddp_model(model, broadcast_buffers=False, find_unused_parameters=True) | |
| self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)( | |
| model, data_loader, optimizer | |
| ) | |
| self.scheduler = self.build_lr_scheduler(cfg, optimizer) | |
| self.checkpointer = DetectionCheckpointer( | |
| # Assume you want to save checkpoints together with logs/statistics | |
| model, | |
| cfg.OUTPUT_DIR, | |
| trainer=weakref.proxy(self), | |
| ) | |
| self.start_iter = 0 | |
| self.max_iter = cfg.SOLVER.MAX_ITER | |
| self.cfg = cfg | |
| self.register_hooks(self.build_hooks()) | |
| def build_evaluator(cls, cfg, dataset_name, output_folder=None): | |
| """ | |
| Create evaluator(s) for a given dataset. | |
| This uses the special metadata "evaluator_type" associated with each | |
| builtin dataset. For your own dataset, you can simply create an | |
| evaluator manually in your script and do not have to worry about the | |
| hacky if-else logic here. | |
| """ | |
| if output_folder is None: | |
| output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") | |
| evaluator_list = [] | |
| evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type | |
| if evaluator_type in ["sem_seg", "ade20k_panoptic_seg"]: | |
| evaluator_list.append( | |
| SemSegEvaluator( | |
| dataset_name, | |
| distributed=True, | |
| output_dir=output_folder, | |
| ) | |
| ) | |
| # import pdb; pdb.set_trace() | |
| if evaluator_type == "sem_seg_gzero": | |
| evaluator_list.append( | |
| SemSegGzeroEvaluator( | |
| dataset_name, | |
| distributed=True, | |
| output_dir=output_folder, | |
| ) | |
| ) | |
| if evaluator_type == "coco": | |
| evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder)) | |
| if evaluator_type in [ | |
| "coco_panoptic_seg", | |
| "ade20k_panoptic_seg", | |
| "cityscapes_panoptic_seg", | |
| ]: | |
| evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) | |
| if evaluator_type == "cityscapes_instance": | |
| assert ( | |
| torch.cuda.device_count() >= comm.get_rank() | |
| ), "CityscapesEvaluator currently do not work with multiple machines." | |
| return CityscapesInstanceEvaluator(dataset_name) | |
| if evaluator_type == "cityscapes_sem_seg": | |
| assert ( | |
| torch.cuda.device_count() >= comm.get_rank() | |
| ), "CityscapesEvaluator currently do not work with multiple machines." | |
| return CityscapesSemSegEvaluator(dataset_name) | |
| if evaluator_type == "cityscapes_panoptic_seg": | |
| assert ( | |
| torch.cuda.device_count() >= comm.get_rank() | |
| ), "CityscapesEvaluator currently do not work with multiple machines." | |
| evaluator_list.append(CityscapesSemSegEvaluator(dataset_name)) | |
| if len(evaluator_list) == 0: | |
| raise NotImplementedError( | |
| "no Evaluator for the dataset {} with the type {}".format( | |
| dataset_name, evaluator_type | |
| ) | |
| ) | |
| elif len(evaluator_list) == 1: | |
| return evaluator_list[0] | |
| return DatasetEvaluators(evaluator_list) | |
| def build_train_loader(cls, cfg): | |
| # Semantic segmentation dataset mapper | |
| if cfg.INPUT.DATASET_MAPPER_NAME == "mask_former_semantic": | |
| mapper = MaskFormerSemanticDatasetMapper(cfg, True) | |
| # Panoptic segmentation dataset mapper | |
| elif cfg.INPUT.DATASET_MAPPER_NAME == "mask_former_panoptic": | |
| mapper = MaskFormerPanopticDatasetMapper(cfg, True) | |
| # DETR-style dataset mapper for COCO panoptic segmentation | |
| elif cfg.INPUT.DATASET_MAPPER_NAME == "detr_panoptic": | |
| mapper = DETRPanopticDatasetMapper(cfg, True) | |
| else: | |
| mapper = None | |
| return build_detection_train_loader(cfg, mapper=mapper) | |
| def build_lr_scheduler(cls, cfg, optimizer): | |
| """ | |
| It now calls :func:`detectron2.solver.build_lr_scheduler`. | |
| Overwrite it if you'd like a different scheduler. | |
| """ | |
| return build_lr_scheduler(cfg, optimizer) | |
| def build_optimizer(cls, cfg, model): | |
| weight_decay_norm = cfg.SOLVER.WEIGHT_DECAY_NORM | |
| weight_decay_embed = cfg.SOLVER.WEIGHT_DECAY_EMBED | |
| defaults = {} | |
| defaults["lr"] = cfg.SOLVER.BASE_LR | |
| defaults["weight_decay"] = cfg.SOLVER.WEIGHT_DECAY | |
| norm_module_types = ( | |
| torch.nn.BatchNorm1d, | |
| torch.nn.BatchNorm2d, | |
| torch.nn.BatchNorm3d, | |
| torch.nn.SyncBatchNorm, | |
| # NaiveSyncBatchNorm inherits from BatchNorm2d | |
| torch.nn.GroupNorm, | |
| torch.nn.InstanceNorm1d, | |
| torch.nn.InstanceNorm2d, | |
| torch.nn.InstanceNorm3d, | |
| torch.nn.LayerNorm, | |
| torch.nn.LocalResponseNorm, | |
| ) | |
| params: List[Dict[str, Any]] = [] | |
| memo: Set[torch.nn.parameter.Parameter] = set() | |
| for module_name, module in model.named_modules(): | |
| for module_param_name, value in module.named_parameters(recurse=False): | |
| if not value.requires_grad: | |
| continue | |
| # Avoid duplicating parameters | |
| if value in memo: | |
| continue | |
| memo.add(value) | |
| hyperparams = copy.copy(defaults) | |
| if "backbone" in module_name: | |
| hyperparams["lr"] = hyperparams["lr"] * cfg.SOLVER.BACKBONE_MULTIPLIER | |
| if ( | |
| "relative_position_bias_table" in module_param_name | |
| or "absolute_pos_embed" in module_param_name | |
| ): | |
| print(module_param_name) | |
| hyperparams["weight_decay"] = 0.0 | |
| if isinstance(module, norm_module_types): | |
| hyperparams["weight_decay"] = weight_decay_norm | |
| if isinstance(module, torch.nn.Embedding): | |
| hyperparams["weight_decay"] = weight_decay_embed | |
| params.append({"params": [value], **hyperparams}) | |
| def maybe_add_full_model_gradient_clipping(optim): | |
| # detectron2 doesn't have full model gradient clipping now | |
| clip_norm_val = cfg.SOLVER.CLIP_GRADIENTS.CLIP_VALUE | |
| enable = ( | |
| cfg.SOLVER.CLIP_GRADIENTS.ENABLED | |
| and cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE == "full_model" | |
| and clip_norm_val > 0.0 | |
| ) | |
| class FullModelGradientClippingOptimizer(optim): | |
| def step(self, closure=None): | |
| all_params = itertools.chain(*[x["params"] for x in self.param_groups]) | |
| torch.nn.utils.clip_grad_norm_(all_params, clip_norm_val) | |
| super().step(closure=closure) | |
| return FullModelGradientClippingOptimizer if enable else optim | |
| optimizer_type = cfg.SOLVER.OPTIMIZER | |
| if optimizer_type == "SGD": | |
| optimizer = maybe_add_full_model_gradient_clipping(torch.optim.SGD)( | |
| params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM | |
| ) | |
| elif optimizer_type == "ADAMW": | |
| optimizer = maybe_add_full_model_gradient_clipping(torch.optim.AdamW)( | |
| params, cfg.SOLVER.BASE_LR | |
| ) | |
| else: | |
| raise NotImplementedError(f"no optimizer type {optimizer_type}") | |
| if not cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE == "full_model": | |
| optimizer = maybe_add_gradient_clipping(cfg, optimizer) | |
| return optimizer | |
| def test_with_TTA(cls, cfg, model): | |
| logger = logging.getLogger("detectron2.trainer") | |
| # In the end of training, run an evaluation with TTA. | |
| logger.info("Running inference with test-time augmentation ...") | |
| model = SemanticSegmentorWithTTA(cfg, model) | |
| evaluators = [ | |
| cls.build_evaluator( | |
| cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") | |
| ) | |
| for name in cfg.DATASETS.TEST | |
| ] | |
| res = cls.test(cfg, model, evaluators) | |
| res = OrderedDict({k + "_TTA": v for k, v in res.items()}) | |
| return res | |
| def setup(args): | |
| """ | |
| Create configs and perform basic setups. | |
| """ | |
| cfg = get_cfg() | |
| # for poly lr schedule | |
| add_deeplab_config(cfg) | |
| add_mask_former_config(cfg) | |
| cfg.merge_from_file(args.config_file) | |
| cfg.merge_from_list(args.opts) | |
| cfg.freeze() | |
| default_setup(cfg, args) | |
| # Setup logger for "mask_former" module | |
| setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="mask_former") | |
| return cfg | |
| def main(args): | |
| cfg = setup(args) | |
| if args.eval_only: | |
| model = Trainer.build_model(cfg) | |
| DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( | |
| cfg.MODEL.WEIGHTS, resume=args.resume | |
| ) | |
| res = Trainer.test(cfg, model) | |
| if cfg.TEST.AUG.ENABLED: | |
| res.update(Trainer.test_with_TTA(cfg, model)) | |
| if comm.is_main_process(): | |
| verify_results(cfg, res) | |
| return res | |
| trainer = Trainer(cfg) | |
| trainer.resume_or_load(resume=args.resume) | |
| return trainer.train() | |
| if __name__ == "__main__": | |
| args = default_argument_parser().parse_args() | |
| print("Command Line Args:", args) | |
| launch( | |
| main, | |
| args.num_gpus, | |
| num_machines=args.num_machines, | |
| machine_rank=args.machine_rank, | |
| dist_url=args.dist_url, | |
| args=(args,), | |
| ) | |