# Copyright (c) OpenMMLab. All rights reserved. import numpy as np from mmcv.parallel import DataContainer as DC from mmdet3d.core.bbox import BaseInstance3DBoxes from mmdet3d.core.points import BasePoints from mmdet.datasets.builder import PIPELINES from mmdet.datasets.pipelines import to_tensor import torch @PIPELINES.register_module() class DefaultFormatBundle3D: """Default formatting bundle. It simplifies the pipeline of formatting common fields for voxels, including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg". These fields are formatted as follows. - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) - proposals: (1)to tensor, (2)to DataContainer - gt_bboxes: (1)to tensor, (2)to DataContainer - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer - gt_labels: (1)to tensor, (2)to DataContainer """ def __init__( self, classes, with_gt: bool = True, with_label: bool = True, ) -> None: super().__init__() self.class_names = classes self.with_gt = with_gt self.with_label = with_label def __call__(self, results): """Call function to transform and format common fields in results. Args: results (dict): Result dict contains the data to convert. Returns: dict: The result dict contains the data that is formatted with default bundle. """ # Format 3D data if "points" in results: assert isinstance(results["points"], BasePoints) results["points"] = DC(results["points"].tensor) if "radar" in results and hasattr(results["radar"], "tensor"): results["radar"] = DC(results["radar"].tensor) for key in ["voxels", "coors", "voxel_centers", "num_points"]: if key not in results: continue results[key] = DC(to_tensor(results[key]), stack=False) if self.with_gt: # Clean GT bboxes in the final if "gt_bboxes_3d_mask" in results: gt_bboxes_3d_mask = results["gt_bboxes_3d_mask"] results["gt_bboxes_3d"] = results["gt_bboxes_3d"][gt_bboxes_3d_mask] if "gt_names_3d" in results: results["gt_names_3d"] = results["gt_names_3d"][gt_bboxes_3d_mask] if "centers2d" in results: results["centers2d"] = results["centers2d"][gt_bboxes_3d_mask] if "depths" in results: results["depths"] = results["depths"][gt_bboxes_3d_mask] if "gt_bboxes_mask" in results: gt_bboxes_mask = results["gt_bboxes_mask"] if "gt_bboxes" in results: results["gt_bboxes"] = results["gt_bboxes"][gt_bboxes_mask] results["gt_names"] = results["gt_names"][gt_bboxes_mask] if self.with_label: if "gt_names" in results and len(results["gt_names"]) == 0: results["gt_labels"] = np.array([], dtype=np.int64) results["attr_labels"] = np.array([], dtype=np.int64) elif "gt_names" in results and isinstance(results["gt_names"][0], list): # gt_labels might be a list of list in multi-view setting results["gt_labels"] = [ np.array( [self.class_names.index(n) for n in res], dtype=np.int64 ) for res in results["gt_names"] ] elif "gt_names" in results: results["gt_labels"] = np.array( [self.class_names.index(n) for n in results["gt_names"]], dtype=np.int64, ) # we still assume one pipeline for one frame LiDAR # thus, the 3D name is list[string] if "gt_names_3d" in results: results["gt_labels_3d"] = np.array( [self.class_names.index(n) for n in results["gt_names_3d"]], dtype=np.int64, ) if "img" in results: results["img"] = DC(torch.stack(results["img"]), stack=True) for key in [ "proposals", "gt_bboxes", "gt_bboxes_ignore", "gt_labels", "gt_labels_3d", "attr_labels", "centers2d", "depths", ]: if key not in results: continue if isinstance(results[key], list): results[key] = DC([to_tensor(res) for res in results[key]]) else: results[key] = DC(to_tensor(results[key])) if "gt_bboxes_3d" in results: if isinstance(results["gt_bboxes_3d"], BaseInstance3DBoxes): results["gt_bboxes_3d"] = DC(results["gt_bboxes_3d"], cpu_only=True) else: results["gt_bboxes_3d"] = DC(to_tensor(results["gt_bboxes_3d"])) return results @PIPELINES.register_module() class Collect3D: def __init__( self, keys, meta_keys=( "camera_intrinsics", "camera2ego", "img_aug_matrix", "lidar_aug_matrix", ), meta_lis_keys=( "filename", "timestamp", "ori_shape", "img_shape", "lidar2image", "depth2img", "cam2img", "pad_shape", "scale_factor", "flip", "pcd_horizontal_flip", "pcd_vertical_flip", "box_mode_3d", "box_type_3d", "img_norm_cfg", "pcd_trans", "token", "pcd_scale_factor", "pcd_rotation", "lidar_path", "transformation_3d_flow", ), ): self.keys = keys self.meta_keys = meta_keys # [fixme] note: need at least 1 meta lis key to perform training. self.meta_lis_keys = meta_lis_keys def __call__(self, results): """Call function to collect keys in results. The keys in ``meta_keys`` will be converted to :obj:`mmcv.DataContainer`. Args: results (dict): Result dict contains the data to collect. Returns: dict: The result dict contains the following keys - keys in ``self.keys`` - ``metas`` """ data = {} for key in self.keys: if key not in self.meta_keys: data[key] = results[key] for key in self.meta_keys: if key in results: val = np.array(results[key]) if isinstance(results[key], list): data[key] = DC(to_tensor(val), stack=True) else: data[key] = DC(to_tensor(val), stack=True, pad_dims=1) metas = {} for key in self.meta_lis_keys: if key in results: metas[key] = results[key] data["metas"] = DC(metas, cpu_only=True) return data