bev-project/mmdet3d/datasets/pipelines/formating.py

# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
from mmcv.parallel import DataContainer as DC

from mmdet3d.core.bbox import BaseInstance3DBoxes
from mmdet3d.core.points import BasePoints
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import to_tensor

import torch


@PIPELINES.register_module()
class DefaultFormatBundle3D:
    """Default formatting bundle.

    It simplifies the pipeline of formatting common fields for voxels,
    including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and
    "gt_semantic_seg".
    These fields are formatted as follows.

    - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
    - proposals: (1)to tensor, (2)to DataContainer
    - gt_bboxes: (1)to tensor, (2)to DataContainer
    - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
    - gt_labels: (1)to tensor, (2)to DataContainer
    """

    def __init__(
        self,
        classes,
        with_gt: bool = True,
        with_label: bool = True,
    ) -> None:
        super().__init__()
        self.class_names = classes
        self.with_gt = with_gt
        self.with_label = with_label

    def __call__(self, results):
        """Call function to transform and format common fields in results.

        Args:
            results (dict): Result dict contains the data to convert.

        Returns:
            dict: The result dict contains the data that is formatted with
                default bundle.
        """
        # Format 3D data
        if "points" in results:
            assert isinstance(results["points"], BasePoints)
            results["points"] = DC(results["points"].tensor)

        for key in ["voxels", "coors", "voxel_centers", "num_points"]:
            if key not in results:
                continue
            results[key] = DC(to_tensor(results[key]), stack=False)

        if self.with_gt:
            # Clean GT bboxes in the final
            if "gt_bboxes_3d_mask" in results:
                gt_bboxes_3d_mask = results["gt_bboxes_3d_mask"]
                results["gt_bboxes_3d"] = results["gt_bboxes_3d"][gt_bboxes_3d_mask]
                if "gt_names_3d" in results:
                    results["gt_names_3d"] = results["gt_names_3d"][gt_bboxes_3d_mask]
                if "centers2d" in results:
                    results["centers2d"] = results["centers2d"][gt_bboxes_3d_mask]
                if "depths" in results:
                    results["depths"] = results["depths"][gt_bboxes_3d_mask]
            if "gt_bboxes_mask" in results:
                gt_bboxes_mask = results["gt_bboxes_mask"]
                if "gt_bboxes" in results:
                    results["gt_bboxes"] = results["gt_bboxes"][gt_bboxes_mask]
                results["gt_names"] = results["gt_names"][gt_bboxes_mask]
            if self.with_label:
                if "gt_names" in results and len(results["gt_names"]) == 0:
                    results["gt_labels"] = np.array([], dtype=np.int64)
                    results["attr_labels"] = np.array([], dtype=np.int64)
                elif "gt_names" in results and isinstance(results["gt_names"][0], list):
                    # gt_labels might be a list of list in multi-view setting
                    results["gt_labels"] = [
                        np.array(
                            [self.class_names.index(n) for n in res], dtype=np.int64
                        )
                        for res in results["gt_names"]
                    ]
                elif "gt_names" in results:
                    results["gt_labels"] = np.array(
                        [self.class_names.index(n) for n in results["gt_names"]],
                        dtype=np.int64,
                    )
                # we still assume one pipeline for one frame LiDAR
                # thus, the 3D name is list[string]
                if "gt_names_3d" in results:
                    results["gt_labels_3d"] = np.array(
                        [self.class_names.index(n) for n in results["gt_names_3d"]],
                        dtype=np.int64,
                    )
        if "img" in results:
            results["img"] = DC(torch.stack(results["img"]), stack=True)

        for key in [
            "proposals",
            "gt_bboxes",
            "gt_bboxes_ignore",
            "gt_labels",
            "gt_labels_3d",
            "attr_labels",
            "centers2d",
            "depths",
        ]:
            if key not in results:
                continue
            if isinstance(results[key], list):
                results[key] = DC([to_tensor(res) for res in results[key]])
            else:
                results[key] = DC(to_tensor(results[key]))
        if "gt_bboxes_3d" in results:
            if isinstance(results["gt_bboxes_3d"], BaseInstance3DBoxes):
                results["gt_bboxes_3d"] = DC(results["gt_bboxes_3d"], cpu_only=True)
            else:
                results["gt_bboxes_3d"] = DC(to_tensor(results["gt_bboxes_3d"]))
        return results


@PIPELINES.register_module()
class Collect3D:
    def __init__(
        self,
        keys,
        meta_keys=(
            "camera_intrinsics",
            "camera2ego",
            "img_aug_matrix",
            "lidar_aug_matrix",
        ),
        meta_lis_keys=(
            "filename",
            "timestamp",
            "ori_shape",
            "img_shape",
            "lidar2image",
            "depth2img",
            "cam2img",
            "pad_shape",
            "scale_factor",
            "flip",
            "pcd_horizontal_flip",
            "pcd_vertical_flip",
            "box_mode_3d",
            "box_type_3d",
            "img_norm_cfg",
            "pcd_trans",
            "token",
            "pcd_scale_factor",
            "pcd_rotation",
            "lidar_path",
            "transformation_3d_flow",
        ),
    ):
        self.keys = keys
        self.meta_keys = meta_keys
        # [fixme] note: need at least 1 meta lis key to perform training.
        self.meta_lis_keys = meta_lis_keys

    def __call__(self, results):
        """Call function to collect keys in results. The keys in ``meta_keys``
        will be converted to :obj:`mmcv.DataContainer`.

        Args:
            results (dict): Result dict contains the data to collect.

        Returns:
            dict: The result dict contains the following keys
                - keys in ``self.keys``
                - ``metas``
        """
        data = {}
        for key in self.keys:
            if key not in self.meta_keys:
                data[key] = results[key]
        for key in self.meta_keys:
            if key in results:
                val = np.array(results[key])
                if isinstance(results[key], list):
                    data[key] = DC(to_tensor(val), stack=True)
                else:
                    data[key] = DC(to_tensor(val), stack=True, pad_dims=1)

        metas = {}
        for key in self.meta_lis_keys:
            if key in results:
                metas[key] = results[key]

        data["metas"] = DC(metas, cpu_only=True)
        return data
[Major] Code release. 2022-06-03 12:21:18 +08:00			`# Copyright (c) OpenMMLab. All rights reserved.`
			`import numpy as np`
			`from mmcv.parallel import DataContainer as DC`

			`from mmdet3d.core.bbox import BaseInstance3DBoxes`
			`from mmdet3d.core.points import BasePoints`
			`from mmdet.datasets.builder import PIPELINES`
			`from mmdet.datasets.pipelines import to_tensor`

Add training details (#150) * [Major] Update training details. * [Minor] Update README.md. * [Minor] Remove comment. 2022-09-27 06:24:38 +08:00			`import torch`

[Major] Code release. 2022-06-03 12:21:18 +08:00
			`@PIPELINES.register_module()`
			`class DefaultFormatBundle3D:`
			`"""Default formatting bundle.`

			`It simplifies the pipeline of formatting common fields for voxels,`
			`including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and`
			`"gt_semantic_seg".`
			`These fields are formatted as follows.`

			`- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)`
			`- proposals: (1)to tensor, (2)to DataContainer`
			`- gt_bboxes: (1)to tensor, (2)to DataContainer`
			`- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer`
			`- gt_labels: (1)to tensor, (2)to DataContainer`
			`"""`

			`def __init__(`
			`self,`
			`classes,`
			`with_gt: bool = True,`
			`with_label: bool = True,`
			`) -> None:`
			`super().__init__()`
			`self.class_names = classes`
			`self.with_gt = with_gt`
			`self.with_label = with_label`

			`def __call__(self, results):`
			`"""Call function to transform and format common fields in results.`

			`Args:`
			`results (dict): Result dict contains the data to convert.`

			`Returns:`
			`dict: The result dict contains the data that is formatted with`
			`default bundle.`
			`"""`
			`# Format 3D data`
			`if "points" in results:`
			`assert isinstance(results["points"], BasePoints)`
			`results["points"] = DC(results["points"].tensor)`

			`for key in ["voxels", "coors", "voxel_centers", "num_points"]:`
			`if key not in results:`
			`continue`
			`results[key] = DC(to_tensor(results[key]), stack=False)`

			`if self.with_gt:`
			`# Clean GT bboxes in the final`
			`if "gt_bboxes_3d_mask" in results:`
			`gt_bboxes_3d_mask = results["gt_bboxes_3d_mask"]`
			`results["gt_bboxes_3d"] = results["gt_bboxes_3d"][gt_bboxes_3d_mask]`
			`if "gt_names_3d" in results:`
			`results["gt_names_3d"] = results["gt_names_3d"][gt_bboxes_3d_mask]`
			`if "centers2d" in results:`
			`results["centers2d"] = results["centers2d"][gt_bboxes_3d_mask]`
			`if "depths" in results:`
			`results["depths"] = results["depths"][gt_bboxes_3d_mask]`
			`if "gt_bboxes_mask" in results:`
			`gt_bboxes_mask = results["gt_bboxes_mask"]`
			`if "gt_bboxes" in results:`
			`results["gt_bboxes"] = results["gt_bboxes"][gt_bboxes_mask]`
			`results["gt_names"] = results["gt_names"][gt_bboxes_mask]`
			`if self.with_label:`
			`if "gt_names" in results and len(results["gt_names"]) == 0:`
			`results["gt_labels"] = np.array([], dtype=np.int64)`
			`results["attr_labels"] = np.array([], dtype=np.int64)`
			`elif "gt_names" in results and isinstance(results["gt_names"][0], list):`
			`# gt_labels might be a list of list in multi-view setting`
			`results["gt_labels"] = [`
			`np.array(`
			`[self.class_names.index(n) for n in res], dtype=np.int64`
			`)`
			`for res in results["gt_names"]`
			`]`
			`elif "gt_names" in results:`
			`results["gt_labels"] = np.array(`
			`[self.class_names.index(n) for n in results["gt_names"]],`
			`dtype=np.int64,`
			`)`
			`# we still assume one pipeline for one frame LiDAR`
			`# thus, the 3D name is list[string]`
			`if "gt_names_3d" in results:`
			`results["gt_labels_3d"] = np.array(`
			`[self.class_names.index(n) for n in results["gt_names_3d"]],`
			`dtype=np.int64,`
			`)`
			`if "img" in results:`
Add training details (#150) * [Major] Update training details. * [Minor] Update README.md. * [Minor] Remove comment. 2022-09-27 06:24:38 +08:00			`results["img"] = DC(torch.stack(results["img"]), stack=True)`
[Major] Code release. 2022-06-03 12:21:18 +08:00
			`for key in [`
			`"proposals",`
			`"gt_bboxes",`
			`"gt_bboxes_ignore",`
			`"gt_labels",`
			`"gt_labels_3d",`
			`"attr_labels",`
			`"centers2d",`
			`"depths",`
			`]:`
			`if key not in results:`
			`continue`
			`if isinstance(results[key], list):`
			`results[key] = DC([to_tensor(res) for res in results[key]])`
			`else:`
			`results[key] = DC(to_tensor(results[key]))`
			`if "gt_bboxes_3d" in results:`
			`if isinstance(results["gt_bboxes_3d"], BaseInstance3DBoxes):`
			`results["gt_bboxes_3d"] = DC(results["gt_bboxes_3d"], cpu_only=True)`
			`else:`
			`results["gt_bboxes_3d"] = DC(to_tensor(results["gt_bboxes_3d"]))`
			`return results`


			`@PIPELINES.register_module()`
			`class Collect3D:`
			`def __init__(`
			`self,`
			`keys,`
			`meta_keys=(`
			`"camera_intrinsics",`
			`"camera2ego",`
			`"img_aug_matrix",`
			`"lidar_aug_matrix",`
			`),`
			`meta_lis_keys=(`
			`"filename",`
			`"timestamp",`
			`"ori_shape",`
			`"img_shape",`
			`"lidar2image",`
			`"depth2img",`
			`"cam2img",`
			`"pad_shape",`
			`"scale_factor",`
			`"flip",`
			`"pcd_horizontal_flip",`
			`"pcd_vertical_flip",`
			`"box_mode_3d",`
			`"box_type_3d",`
			`"img_norm_cfg",`
			`"pcd_trans",`
			`"token",`
			`"pcd_scale_factor",`
			`"pcd_rotation",`
			`"lidar_path",`
			`"transformation_3d_flow",`
			`),`
			`):`
			`self.keys = keys`
			`self.meta_keys = meta_keys`
			`# [fixme] note: need at least 1 meta lis key to perform training.`
			`self.meta_lis_keys = meta_lis_keys`

			`def __call__(self, results):`
			"""Call function to collect keys in results. The keys in ``meta_keys``
			will be converted to :obj:`mmcv.DataContainer`.

			`Args:`
			`results (dict): Result dict contains the data to collect.`

			`Returns:`
			`dict: The result dict contains the following keys`
			- keys in ``self.keys``
			- ``metas``
			`"""`
			`data = {}`
			`for key in self.keys:`
			`if key not in self.meta_keys:`
			`data[key] = results[key]`
			`for key in self.meta_keys:`
			`if key in results:`
			`val = np.array(results[key])`
			`if isinstance(results[key], list):`
			`data[key] = DC(to_tensor(val), stack=True)`
			`else:`
			`data[key] = DC(to_tensor(val), stack=True, pad_dims=1)`

			`metas = {}`
			`for key in self.meta_lis_keys:`
			`if key in results:`
			`metas[key] = results[key]`

			`data["metas"] = DC(metas, cpu_only=True)`
			`return data`