bev-project/configs/nuscenes/multitask/default.yaml

311 lines
7.2 KiB
YAML
Raw Normal View History

# BEVFusion 多任务配置同时进行3D检测和BEV分割
# 继承基础配置
_base_:
- ../default.yaml
model:
type: BEVFusion
# 多任务头配置
heads:
# 任务13D目标检测
object:
type: TransFusionHead
num_proposals: 200
auxiliary: true
in_channels: 512
num_classes: ${num_classes}
num_heads: 8
nms_kernel_size: 3
ffn_channel: 256
dropout: 0.1
bn_momentum: 0.1
activation: relu
common_heads:
center: [2, 2]
height: [1, 2]
dim: [3, 2]
rot: [2, 2]
vel: [2, 2]
bbox_coder:
type: TransFusionBBoxCoder
pc_range: ${point_cloud_range[:2]}
post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
score_threshold: 0.0
out_size_factor: 8
voxel_size: ${voxel_size[:2]}
code_size: 10
loss_cls:
type: FocalLoss
use_sigmoid: true
gamma: 2.0
alpha: 0.25
reduction: mean
loss_weight: 1.0
loss_bbox:
type: L1Loss
reduction: mean
loss_weight: 0.25
loss_iou:
type: GIoULoss
reduction: mean
loss_weight: 0.0
train_cfg:
point_cloud_range: ${point_cloud_range}
grid_size: [1440, 1440, 41]
voxel_size: ${voxel_size}
out_size_factor: 8
gaussian_overlap: 0.1
min_radius: 2
pos_weight: -1
code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
assigner:
type: HungarianAssigner3D
iou_calculator:
type: BboxOverlaps3D
coordinate: lidar
cls_cost:
type: FocalLossCost
gamma: 2.0
alpha: 0.25
weight: 0.15
reg_cost:
type: BBoxBEVL1Cost
weight: 0.25
iou_cost:
type: IoU3DCost
weight: 0.25
test_cfg:
point_cloud_range: ${point_cloud_range}
grid_size: [1440, 1440, 41]
voxel_size: ${voxel_size}
out_size_factor: 8
nms_type: null
# 任务2BEV地图分割
map:
type: BEVSegmentationHead
in_channels: 512
grid_transform:
input_scope: [[-54.0, 54.0, 0.75], [-54.0, 54.0, 0.75]]
output_scope: [[-50, 50, 0.5], [-50, 50, 0.5]]
classes: ${map_classes}
loss: focal
# 损失权重配置(可根据实际情况调整)
loss_scale:
object: 1.0 # 检测任务权重
map: 1.0 # 分割任务权重
# 训练数据pipeline需要同时包含检测和分割标注
train_pipeline:
- type: LoadMultiViewImageFromFiles
to_float32: true
- type: LoadPointsFromFile
coord_type: LIDAR
load_dim: ${load_dim}
use_dim: ${use_dim}
- type: LoadPointsFromMultiSweeps
sweeps_num: 9
load_dim: ${load_dim}
use_dim: ${use_dim}
pad_empty_sweeps: true
remove_close: true
- type: LoadAnnotations3D
with_bbox_3d: true
with_label_3d: true
with_attr_label: false
- type: LoadBEVSegmentation # 加载BEV分割标注
classes: ${map_classes}
- type: ObjectPaste
stop_epoch: -1
db_sampler:
dataset_root: ${dataset_root}
info_path: ${dataset_root + "nuscenes_dbinfos_train.pkl"}
rate: 1.0
prepare:
filter_by_difficulty: [-1]
filter_by_min_points:
car: 5
truck: 5
bus: 5
trailer: 5
construction_vehicle: 5
traffic_cone: 5
barrier: 5
motorcycle: 5
bicycle: 5
pedestrian: 5
classes: ${object_classes}
sample_groups:
car: 2
truck: 3
construction_vehicle: 7
bus: 4
trailer: 6
barrier: 2
motorcycle: 6
bicycle: 6
pedestrian: 2
traffic_cone: 2
points_loader:
type: LoadPointsFromFile
coord_type: LIDAR
load_dim: ${load_dim}
use_dim: ${use_dim}
- type: ImageAug3D
final_dim: ${image_size}
resize_lim: ${augment2d.resize[0]}
bot_pct_lim: [0.0, 0.0]
rot_lim: ${augment2d.rotate}
rand_flip: true
is_train: true
- type: GlobalRotScaleTrans
resize_lim: ${augment3d.scale}
rot_lim: ${augment3d.rotate}
trans_lim: ${augment3d.translate}
is_train: true
- type: RandomFlip3D
- type: PointsRangeFilter
point_cloud_range: ${point_cloud_range}
- type: ObjectRangeFilter
point_cloud_range: ${point_cloud_range}
- type: ObjectNameFilter
classes: ${object_classes}
- type: ImageNormalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- type: GridMask
use_h: true
use_w: true
max_epoch: ${max_epochs}
rotate: 1
offset: false
ratio: 0.5
mode: 1
prob: ${augment2d.gridmask.prob}
fixed_prob: ${augment2d.gridmask.fixed_prob}
- type: PointShuffle
- type: DefaultFormatBundle3D
classes: ${object_classes}
- type: Collect3D
keys:
- img
- points
- gt_bboxes_3d
- gt_labels_3d
- gt_masks_bev # 添加分割标注
meta_keys:
- camera_intrinsics
- camera2ego
- lidar2ego
- lidar2camera
- camera2lidar
- lidar2image
- img_aug_matrix
- lidar_aug_matrix
# 验证和测试pipeline也需要支持分割
test_pipeline:
- type: LoadMultiViewImageFromFiles
to_float32: true
- type: LoadPointsFromFile
coord_type: LIDAR
load_dim: ${load_dim}
use_dim: ${use_dim}
- type: LoadPointsFromMultiSweeps
sweeps_num: 9
load_dim: ${load_dim}
use_dim: ${use_dim}
pad_empty_sweeps: true
remove_close: true
- type: LoadAnnotations3D
with_bbox_3d: true
with_label_3d: true
with_attr_label: false
- type: LoadBEVSegmentation
classes: ${map_classes}
- type: ImageAug3D
final_dim: ${image_size}
resize_lim: ${augment2d.resize[1]}
bot_pct_lim: [0.0, 0.0]
rot_lim: [0.0, 0.0]
rand_flip: false
is_train: false
- type: ImageNormalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- type: DefaultFormatBundle3D
classes: ${object_classes}
- type: Collect3D
keys:
- img
- points
- gt_bboxes_3d
- gt_labels_3d
- gt_masks_bev
meta_keys:
- camera_intrinsics
- camera2ego
- lidar2ego
- lidar2camera
- camera2lidar
- lidar2image
- img_aug_matrix
- lidar_aug_matrix
# 评估配置:同时评估检测和分割
evaluation:
interval: 1
pipeline: ${test_pipeline}
# 检测评估指标
metric:
- bbox
- map
# 优化器配置(多任务可能需要更大学习率)
optimizer:
type: AdamW
lr: 2.0e-4
weight_decay: 0.01
paramwise_cfg:
custom_keys:
absolute_pos_embed:
decay_mult: 0
relative_position_bias_table:
decay_mult: 0
optimizer_config:
grad_clip:
max_norm: 35
norm_type: 2
# 学习率调度
lr_config:
policy: CosineAnnealing
warmup: linear
warmup_iters: 500
warmup_ratio: 0.33333333
min_lr_ratio: 1.0e-3
# 训练配置
runner:
type: EpochBasedRunner
max_epochs: 20
# 检查点和日志
checkpoint_config:
interval: 1
max_keep_ckpts: 5
log_config:
interval: 50
hooks:
- type: TextLoggerHook
# 其他配置
find_unused_parameters: false
sync_bn: true
cudnn_benchmark: true