bev-project/mmdet3d/datasets/pipelines/formating.py

# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
from mmcv.parallel import DataContainer as DC

from mmdet3d.core.bbox import BaseInstance3DBoxes
from mmdet3d.core.points import BasePoints
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import to_tensor

import torch


@PIPELINES.register_module()
class DefaultFormatBundle3D:
    """Default formatting bundle.

    It simplifies the pipeline of formatting common fields for voxels,
    including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and
    "gt_semantic_seg".
    These fields are formatted as follows.

    - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
    - proposals: (1)to tensor, (2)to DataContainer
    - gt_bboxes: (1)to tensor, (2)to DataContainer
    - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
    - gt_labels: (1)to tensor, (2)to DataContainer
    """

    def __init__(
        self,
        classes,
        with_gt: bool = True,
        with_label: bool = True,
    ) -> None:
        super().__init__()
        self.class_names = classes
        self.with_gt = with_gt
        self.with_label = with_label

    def __call__(self, results):
        """Call function to transform and format common fields in results.

        Args:
            results (dict): Result dict contains the data to convert.

        Returns:
            dict: The result dict contains the data that is formatted with
                default bundle.
        """
        # Format 3D data
        if "points" in results:
            assert isinstance(results["points"], BasePoints)
            results["points"] = DC(results["points"].tensor)

        if "radar" in results and hasattr(results["radar"], "tensor"):
            results["radar"] = DC(results["radar"].tensor)

        for key in ["voxels", "coors", "voxel_centers", "num_points"]:
            if key not in results:
                continue
            results[key] = DC(to_tensor(results[key]), stack=False)

        if self.with_gt:
            # Clean GT bboxes in the final
            if "gt_bboxes_3d_mask" in results:
                gt_bboxes_3d_mask = results["gt_bboxes_3d_mask"]
                results["gt_bboxes_3d"] = results["gt_bboxes_3d"][gt_bboxes_3d_mask]
                if "gt_names_3d" in results:
                    results["gt_names_3d"] = results["gt_names_3d"][gt_bboxes_3d_mask]
                if "centers2d" in results:
                    results["centers2d"] = results["centers2d"][gt_bboxes_3d_mask]
                if "depths" in results:
                    results["depths"] = results["depths"][gt_bboxes_3d_mask]
            if "gt_bboxes_mask" in results:
                gt_bboxes_mask = results["gt_bboxes_mask"]
                if "gt_bboxes" in results:
                    results["gt_bboxes"] = results["gt_bboxes"][gt_bboxes_mask]
                results["gt_names"] = results["gt_names"][gt_bboxes_mask]
            if self.with_label:
                if "gt_names" in results and len(results["gt_names"]) == 0:
                    results["gt_labels"] = np.array([], dtype=np.int64)
                    results["attr_labels"] = np.array([], dtype=np.int64)
                elif "gt_names" in results and isinstance(results["gt_names"][0], list):
                    # gt_labels might be a list of list in multi-view setting
                    results["gt_labels"] = [
                        np.array(
                            [self.class_names.index(n) for n in res], dtype=np.int64
                        )
                        for res in results["gt_names"]
                    ]
                elif "gt_names" in results:
                    results["gt_labels"] = np.array(
                        [self.class_names.index(n) for n in results["gt_names"]],
                        dtype=np.int64,
                    )
                # we still assume one pipeline for one frame LiDAR
                # thus, the 3D name is list[string]
                if "gt_names_3d" in results:
                    results["gt_labels_3d"] = np.array(
                        [self.class_names.index(n) for n in results["gt_names_3d"]],
                        dtype=np.int64,
                    )
        if "img" in results:
            results["img"] = DC(torch.stack(results["img"]), stack=True)

        for key in [
            "proposals",
            "gt_bboxes",
            "gt_bboxes_ignore",
            "gt_labels",
            "gt_labels_3d",
            "attr_labels",
            "centers2d",
            "depths",
        ]:
            if key not in results:
                continue
            if isinstance(results[key], list):
                results[key] = DC([to_tensor(res) for res in results[key]])
            else:
                results[key] = DC(to_tensor(results[key]))
        if "gt_bboxes_3d" in results:
            if isinstance(results["gt_bboxes_3d"], BaseInstance3DBoxes):
                results["gt_bboxes_3d"] = DC(results["gt_bboxes_3d"], cpu_only=True)
            else:
                results["gt_bboxes_3d"] = DC(to_tensor(results["gt_bboxes_3d"]))
        return results


@PIPELINES.register_module()
class Collect3D:
    def __init__(
        self,
        keys,
        meta_keys=(
            "camera_intrinsics",
            "camera2ego",
            "img_aug_matrix",
            "lidar_aug_matrix",
        ),
        meta_lis_keys=(
            "filename",
            "timestamp",
            "ori_shape",
            "img_shape",
            "lidar2image",
            "depth2img",
            "cam2img",
            "pad_shape",
            "scale_factor",
            "flip",
            "pcd_horizontal_flip",
            "pcd_vertical_flip",
            "box_mode_3d",
            "box_type_3d",
            "img_norm_cfg",
            "pcd_trans",
            "token",
            "pcd_scale_factor",
            "pcd_rotation",
            "lidar_path",
            "transformation_3d_flow",
        ),
    ):
        self.keys = keys
        self.meta_keys = meta_keys
        # [fixme] note: need at least 1 meta lis key to perform training.
        self.meta_lis_keys = meta_lis_keys

    def __call__(self, results):
        """Call function to collect keys in results. The keys in ``meta_keys``
        will be converted to :obj:`mmcv.DataContainer`.

        Args:
            results (dict): Result dict contains the data to collect.

        Returns:
            dict: The result dict contains the following keys
                - keys in ``self.keys``
                - ``metas``
        """
        data = {}
        for key in self.keys:
            if key not in self.meta_keys:
                data[key] = results[key]
        for key in self.meta_keys:
            if key in results:
                val = np.array(results[key])
                if isinstance(results[key], list):
                    data[key] = DC(to_tensor(val), stack=True)
                else:
                    data[key] = DC(to_tensor(val), stack=True, pad_dims=1)

        metas = {}
        for key in self.meta_lis_keys:
            if key in results:
                metas[key] = results[key]

        data["metas"] = DC(metas, cpu_only=True)
        return data
[Major] Code release. 2022-06-03 12:21:18 +08:00			`# Copyright (c) OpenMMLab. All rights reserved.`
			`import numpy as np`
			`from mmcv.parallel import DataContainer as DC`

			`from mmdet3d.core.bbox import BaseInstance3DBoxes`
			`from mmdet3d.core.points import BasePoints`
			`from mmdet.datasets.builder import PIPELINES`
			`from mmdet.datasets.pipelines import to_tensor`

Add training details (#150) * [Major] Update training details. * [Minor] Update README.md. * [Minor] Remove comment. 2022-09-27 06:24:38 +08:00			`import torch`

[Major] Code release. 2022-06-03 12:21:18 +08:00
			`@PIPELINES.register_module()`
			`class DefaultFormatBundle3D:`
			`"""Default formatting bundle.`

			`It simplifies the pipeline of formatting common fields for voxels,`
			`including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and`
			`"gt_semantic_seg".`
			`These fields are formatted as follows.`

			`- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)`
			`- proposals: (1)to tensor, (2)to DataContainer`
			`- gt_bboxes: (1)to tensor, (2)to DataContainer`
			`- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer`
			`- gt_labels: (1)to tensor, (2)to DataContainer`
			`"""`

			`def __init__(`
			`self,`
			`classes,`
			`with_gt: bool = True,`
			`with_label: bool = True,`
			`) -> None:`
			`super().__init__()`
			`self.class_names = classes`
			`self.with_gt = with_gt`
			`self.with_label = with_label`

			`def __call__(self, results):`
			`"""Call function to transform and format common fields in results.`

			`Args:`
			`results (dict): Result dict contains the data to convert.`

			`Returns:`
			`dict: The result dict contains the data that is formatted with`
			`default bundle.`
			`"""`
			`# Format 3D data`
			`if "points" in results:`
			`assert isinstance(results["points"], BasePoints)`
			`results["points"] = DC(results["points"].tensor)`

Complete project state snapshot: Phase 4B RMT-PPAD Integration 🎯 Training Status: - Current Epoch: 2/10 (13.3% complete) - Segmentation Dice: 0.9594 - Detection IoU: 0.5742 - Training stable with 8 GPUs 🔧 Technical Achievements: - ✅ RMT-PPAD Transformer segmentation decoder integrated - ✅ Task-specific GCA architecture optimized - ✅ Multi-scale feature fusion (180×180, 360×360, 600×600) - ✅ Adaptive scale weight learning implemented - ✅ BEVFusion multi-task framework enhanced 📊 Performance Highlights: - Divider segmentation: 0.9793 Dice (excellent) - Pedestrian crossing: 0.9812 Dice (excellent) - Stop line: 0.9812 Dice (excellent) - Carpark area: 0.9802 Dice (excellent) - Walkway: 0.9401 Dice (good) - Drivable area: 0.8959 Dice (good) 🛠️ Code Changes Included: - Enhanced BEVFusion model (bevfusion.py) - RMT-PPAD integration modules (rmtppad_integration.py) - Transformer segmentation head (enhanced_transformer.py) - GCA module optimizations (gca.py) - Configuration updates (Phase 4B configs) - Training scripts and automation tools - Comprehensive documentation and analysis reports 📅 Snapshot Date: Fri Nov 14 09:06:09 UTC 2025 📍 Environment: Docker container 🎯 Phase: RMT-PPAD Integration Complete 2025-11-14 17:06:09 +08:00			`if "radar" in results and hasattr(results["radar"], "tensor"):`
Add BEVFusion-R (#443) 2023-07-08 10:53:36 +08:00			`results["radar"] = DC(results["radar"].tensor)`

[Major] Code release. 2022-06-03 12:21:18 +08:00			`for key in ["voxels", "coors", "voxel_centers", "num_points"]:`
			`if key not in results:`
			`continue`
			`results[key] = DC(to_tensor(results[key]), stack=False)`

			`if self.with_gt:`
			`# Clean GT bboxes in the final`
			`if "gt_bboxes_3d_mask" in results:`
			`gt_bboxes_3d_mask = results["gt_bboxes_3d_mask"]`
			`results["gt_bboxes_3d"] = results["gt_bboxes_3d"][gt_bboxes_3d_mask]`
			`if "gt_names_3d" in results:`
			`results["gt_names_3d"] = results["gt_names_3d"][gt_bboxes_3d_mask]`
			`if "centers2d" in results:`
			`results["centers2d"] = results["centers2d"][gt_bboxes_3d_mask]`
			`if "depths" in results:`
			`results["depths"] = results["depths"][gt_bboxes_3d_mask]`
			`if "gt_bboxes_mask" in results:`
			`gt_bboxes_mask = results["gt_bboxes_mask"]`
			`if "gt_bboxes" in results:`
			`results["gt_bboxes"] = results["gt_bboxes"][gt_bboxes_mask]`
			`results["gt_names"] = results["gt_names"][gt_bboxes_mask]`
			`if self.with_label:`
			`if "gt_names" in results and len(results["gt_names"]) == 0:`
			`results["gt_labels"] = np.array([], dtype=np.int64)`
			`results["attr_labels"] = np.array([], dtype=np.int64)`
			`elif "gt_names" in results and isinstance(results["gt_names"][0], list):`
			`# gt_labels might be a list of list in multi-view setting`
			`results["gt_labels"] = [`
			`np.array(`
			`[self.class_names.index(n) for n in res], dtype=np.int64`
			`)`
			`for res in results["gt_names"]`
			`]`
			`elif "gt_names" in results:`
			`results["gt_labels"] = np.array(`
			`[self.class_names.index(n) for n in results["gt_names"]],`
			`dtype=np.int64,`
			`)`
			`# we still assume one pipeline for one frame LiDAR`
			`# thus, the 3D name is list[string]`
			`if "gt_names_3d" in results:`
			`results["gt_labels_3d"] = np.array(`
			`[self.class_names.index(n) for n in results["gt_names_3d"]],`
			`dtype=np.int64,`
			`)`
			`if "img" in results:`
Add training details (#150) * [Major] Update training details. * [Minor] Update README.md. * [Minor] Remove comment. 2022-09-27 06:24:38 +08:00			`results["img"] = DC(torch.stack(results["img"]), stack=True)`
[Major] Code release. 2022-06-03 12:21:18 +08:00
			`for key in [`
			`"proposals",`
			`"gt_bboxes",`
			`"gt_bboxes_ignore",`
			`"gt_labels",`
			`"gt_labels_3d",`
			`"attr_labels",`
			`"centers2d",`
			`"depths",`
			`]:`
			`if key not in results:`
			`continue`
			`if isinstance(results[key], list):`
			`results[key] = DC([to_tensor(res) for res in results[key]])`
			`else:`
			`results[key] = DC(to_tensor(results[key]))`
			`if "gt_bboxes_3d" in results:`
			`if isinstance(results["gt_bboxes_3d"], BaseInstance3DBoxes):`
			`results["gt_bboxes_3d"] = DC(results["gt_bboxes_3d"], cpu_only=True)`
			`else:`
			`results["gt_bboxes_3d"] = DC(to_tensor(results["gt_bboxes_3d"]))`
			`return results`


			`@PIPELINES.register_module()`
			`class Collect3D:`
			`def __init__(`
			`self,`
			`keys,`
			`meta_keys=(`
			`"camera_intrinsics",`
			`"camera2ego",`
			`"img_aug_matrix",`
			`"lidar_aug_matrix",`
			`),`
			`meta_lis_keys=(`
			`"filename",`
			`"timestamp",`
			`"ori_shape",`
			`"img_shape",`
			`"lidar2image",`
			`"depth2img",`
			`"cam2img",`
			`"pad_shape",`
			`"scale_factor",`
			`"flip",`
			`"pcd_horizontal_flip",`
			`"pcd_vertical_flip",`
			`"box_mode_3d",`
			`"box_type_3d",`
			`"img_norm_cfg",`
			`"pcd_trans",`
			`"token",`
			`"pcd_scale_factor",`
			`"pcd_rotation",`
			`"lidar_path",`
			`"transformation_3d_flow",`
			`),`
			`):`
			`self.keys = keys`
			`self.meta_keys = meta_keys`
			`# [fixme] note: need at least 1 meta lis key to perform training.`
			`self.meta_lis_keys = meta_lis_keys`

			`def __call__(self, results):`
			"""Call function to collect keys in results. The keys in ``meta_keys``
			will be converted to :obj:`mmcv.DataContainer`.

			`Args:`
			`results (dict): Result dict contains the data to collect.`

			`Returns:`
			`dict: The result dict contains the following keys`
			- keys in ``self.keys``
			- ``metas``
			`"""`
			`data = {}`
			`for key in self.keys:`
			`if key not in self.meta_keys:`
			`data[key] = results[key]`
			`for key in self.meta_keys:`
			`if key in results:`
			`val = np.array(results[key])`
			`if isinstance(results[key], list):`
			`data[key] = DC(to_tensor(val), stack=True)`
			`else:`
			`data[key] = DC(to_tensor(val), stack=True, pad_dims=1)`

			`metas = {}`
			`for key in self.meta_lis_keys:`
			`if key in results:`
			`metas[key] = results[key]`

			`data["metas"] = DC(metas, cpu_only=True)`
			`return data`