bev-project/mmdet3d/datasets/pipelines/formating.py

201 lines
7.1 KiB
Python
Raw Normal View History

2022-06-03 12:21:18 +08:00
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
from mmcv.parallel import DataContainer as DC
from mmdet3d.core.bbox import BaseInstance3DBoxes
from mmdet3d.core.points import BasePoints
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import to_tensor
import torch
2022-06-03 12:21:18 +08:00
@PIPELINES.register_module()
class DefaultFormatBundle3D:
"""Default formatting bundle.
It simplifies the pipeline of formatting common fields for voxels,
including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and
"gt_semantic_seg".
These fields are formatted as follows.
- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
- proposals: (1)to tensor, (2)to DataContainer
- gt_bboxes: (1)to tensor, (2)to DataContainer
- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
- gt_labels: (1)to tensor, (2)to DataContainer
"""
def __init__(
self,
classes,
with_gt: bool = True,
with_label: bool = True,
) -> None:
super().__init__()
self.class_names = classes
self.with_gt = with_gt
self.with_label = with_label
def __call__(self, results):
"""Call function to transform and format common fields in results.
Args:
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data that is formatted with
default bundle.
"""
# Format 3D data
if "points" in results:
assert isinstance(results["points"], BasePoints)
results["points"] = DC(results["points"].tensor)
if "radar" in results and hasattr(results["radar"], "tensor"):
2023-07-08 10:53:36 +08:00
results["radar"] = DC(results["radar"].tensor)
2022-06-03 12:21:18 +08:00
for key in ["voxels", "coors", "voxel_centers", "num_points"]:
if key not in results:
continue
results[key] = DC(to_tensor(results[key]), stack=False)
if self.with_gt:
# Clean GT bboxes in the final
if "gt_bboxes_3d_mask" in results:
gt_bboxes_3d_mask = results["gt_bboxes_3d_mask"]
results["gt_bboxes_3d"] = results["gt_bboxes_3d"][gt_bboxes_3d_mask]
if "gt_names_3d" in results:
results["gt_names_3d"] = results["gt_names_3d"][gt_bboxes_3d_mask]
if "centers2d" in results:
results["centers2d"] = results["centers2d"][gt_bboxes_3d_mask]
if "depths" in results:
results["depths"] = results["depths"][gt_bboxes_3d_mask]
if "gt_bboxes_mask" in results:
gt_bboxes_mask = results["gt_bboxes_mask"]
if "gt_bboxes" in results:
results["gt_bboxes"] = results["gt_bboxes"][gt_bboxes_mask]
results["gt_names"] = results["gt_names"][gt_bboxes_mask]
if self.with_label:
if "gt_names" in results and len(results["gt_names"]) == 0:
results["gt_labels"] = np.array([], dtype=np.int64)
results["attr_labels"] = np.array([], dtype=np.int64)
elif "gt_names" in results and isinstance(results["gt_names"][0], list):
# gt_labels might be a list of list in multi-view setting
results["gt_labels"] = [
np.array(
[self.class_names.index(n) for n in res], dtype=np.int64
)
for res in results["gt_names"]
]
elif "gt_names" in results:
results["gt_labels"] = np.array(
[self.class_names.index(n) for n in results["gt_names"]],
dtype=np.int64,
)
# we still assume one pipeline for one frame LiDAR
# thus, the 3D name is list[string]
if "gt_names_3d" in results:
results["gt_labels_3d"] = np.array(
[self.class_names.index(n) for n in results["gt_names_3d"]],
dtype=np.int64,
)
if "img" in results:
results["img"] = DC(torch.stack(results["img"]), stack=True)
2022-06-03 12:21:18 +08:00
for key in [
"proposals",
"gt_bboxes",
"gt_bboxes_ignore",
"gt_labels",
"gt_labels_3d",
"attr_labels",
"centers2d",
"depths",
]:
if key not in results:
continue
if isinstance(results[key], list):
results[key] = DC([to_tensor(res) for res in results[key]])
else:
results[key] = DC(to_tensor(results[key]))
if "gt_bboxes_3d" in results:
if isinstance(results["gt_bboxes_3d"], BaseInstance3DBoxes):
results["gt_bboxes_3d"] = DC(results["gt_bboxes_3d"], cpu_only=True)
else:
results["gt_bboxes_3d"] = DC(to_tensor(results["gt_bboxes_3d"]))
return results
@PIPELINES.register_module()
class Collect3D:
def __init__(
self,
keys,
meta_keys=(
"camera_intrinsics",
"camera2ego",
"img_aug_matrix",
"lidar_aug_matrix",
),
meta_lis_keys=(
"filename",
"timestamp",
"ori_shape",
"img_shape",
"lidar2image",
"depth2img",
"cam2img",
"pad_shape",
"scale_factor",
"flip",
"pcd_horizontal_flip",
"pcd_vertical_flip",
"box_mode_3d",
"box_type_3d",
"img_norm_cfg",
"pcd_trans",
"token",
"pcd_scale_factor",
"pcd_rotation",
"lidar_path",
"transformation_3d_flow",
),
):
self.keys = keys
self.meta_keys = meta_keys
# [fixme] note: need at least 1 meta lis key to perform training.
self.meta_lis_keys = meta_lis_keys
def __call__(self, results):
"""Call function to collect keys in results. The keys in ``meta_keys``
will be converted to :obj:`mmcv.DataContainer`.
Args:
results (dict): Result dict contains the data to collect.
Returns:
dict: The result dict contains the following keys
- keys in ``self.keys``
- ``metas``
"""
data = {}
for key in self.keys:
if key not in self.meta_keys:
data[key] = results[key]
for key in self.meta_keys:
if key in results:
val = np.array(results[key])
if isinstance(results[key], list):
data[key] = DC(to_tensor(val), stack=True)
else:
data[key] = DC(to_tensor(val), stack=True, pad_dims=1)
metas = {}
for key in self.meta_lis_keys:
if key in results:
metas[key] = results[key]
data["metas"] = DC(metas, cpu_only=True)
return data