bev-project/configs/nuscenes/multitask/default.yaml

311 lines
7.2 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# BEVFusion 多任务配置同时进行3D检测和BEV分割
# 继承基础配置
_base_:
- ../default.yaml
model:
type: BEVFusion
# 多任务头配置
heads:
# 任务13D目标检测
object:
type: TransFusionHead
num_proposals: 200
auxiliary: true
in_channels: 512
num_classes: ${num_classes}
num_heads: 8
nms_kernel_size: 3
ffn_channel: 256
dropout: 0.1
bn_momentum: 0.1
activation: relu
common_heads:
center: [2, 2]
height: [1, 2]
dim: [3, 2]
rot: [2, 2]
vel: [2, 2]
bbox_coder:
type: TransFusionBBoxCoder
pc_range: ${point_cloud_range[:2]}
post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
score_threshold: 0.0
out_size_factor: 8
voxel_size: ${voxel_size[:2]}
code_size: 10
loss_cls:
type: FocalLoss
use_sigmoid: true
gamma: 2.0
alpha: 0.25
reduction: mean
loss_weight: 1.0
loss_bbox:
type: L1Loss
reduction: mean
loss_weight: 0.25
loss_iou:
type: GIoULoss
reduction: mean
loss_weight: 0.0
train_cfg:
point_cloud_range: ${point_cloud_range}
grid_size: [1440, 1440, 41]
voxel_size: ${voxel_size}
out_size_factor: 8
gaussian_overlap: 0.1
min_radius: 2
pos_weight: -1
code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
assigner:
type: HungarianAssigner3D
iou_calculator:
type: BboxOverlaps3D
coordinate: lidar
cls_cost:
type: FocalLossCost
gamma: 2.0
alpha: 0.25
weight: 0.15
reg_cost:
type: BBoxBEVL1Cost
weight: 0.25
iou_cost:
type: IoU3DCost
weight: 0.25
test_cfg:
point_cloud_range: ${point_cloud_range}
grid_size: [1440, 1440, 41]
voxel_size: ${voxel_size}
out_size_factor: 8
nms_type: null
# 任务2BEV地图分割
map:
type: BEVSegmentationHead
in_channels: 512
grid_transform:
input_scope: [[-54.0, 54.0, 0.75], [-54.0, 54.0, 0.75]]
output_scope: [[-50, 50, 0.5], [-50, 50, 0.5]]
classes: ${map_classes}
loss: focal
# 损失权重配置(可根据实际情况调整)
loss_scale:
object: 1.0 # 检测任务权重
map: 1.0 # 分割任务权重
# 训练数据pipeline需要同时包含检测和分割标注
train_pipeline:
- type: LoadMultiViewImageFromFiles
to_float32: true
- type: LoadPointsFromFile
coord_type: LIDAR
load_dim: ${load_dim}
use_dim: ${use_dim}
- type: LoadPointsFromMultiSweeps
sweeps_num: 9
load_dim: ${load_dim}
use_dim: ${use_dim}
pad_empty_sweeps: true
remove_close: true
- type: LoadAnnotations3D
with_bbox_3d: true
with_label_3d: true
with_attr_label: false
- type: LoadBEVSegmentation # 加载BEV分割标注
classes: ${map_classes}
- type: ObjectPaste
stop_epoch: -1
db_sampler:
dataset_root: ${dataset_root}
info_path: ${dataset_root + "nuscenes_dbinfos_train.pkl"}
rate: 1.0
prepare:
filter_by_difficulty: [-1]
filter_by_min_points:
car: 5
truck: 5
bus: 5
trailer: 5
construction_vehicle: 5
traffic_cone: 5
barrier: 5
motorcycle: 5
bicycle: 5
pedestrian: 5
classes: ${object_classes}
sample_groups:
car: 2
truck: 3
construction_vehicle: 7
bus: 4
trailer: 6
barrier: 2
motorcycle: 6
bicycle: 6
pedestrian: 2
traffic_cone: 2
points_loader:
type: LoadPointsFromFile
coord_type: LIDAR
load_dim: ${load_dim}
use_dim: ${use_dim}
- type: ImageAug3D
final_dim: ${image_size}
resize_lim: ${augment2d.resize[0]}
bot_pct_lim: [0.0, 0.0]
rot_lim: ${augment2d.rotate}
rand_flip: true
is_train: true
- type: GlobalRotScaleTrans
resize_lim: ${augment3d.scale}
rot_lim: ${augment3d.rotate}
trans_lim: ${augment3d.translate}
is_train: true
- type: RandomFlip3D
- type: PointsRangeFilter
point_cloud_range: ${point_cloud_range}
- type: ObjectRangeFilter
point_cloud_range: ${point_cloud_range}
- type: ObjectNameFilter
classes: ${object_classes}
- type: ImageNormalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- type: GridMask
use_h: true
use_w: true
max_epoch: ${max_epochs}
rotate: 1
offset: false
ratio: 0.5
mode: 1
prob: ${augment2d.gridmask.prob}
fixed_prob: ${augment2d.gridmask.fixed_prob}
- type: PointShuffle
- type: DefaultFormatBundle3D
classes: ${object_classes}
- type: Collect3D
keys:
- img
- points
- gt_bboxes_3d
- gt_labels_3d
- gt_masks_bev # 添加分割标注
meta_keys:
- camera_intrinsics
- camera2ego
- lidar2ego
- lidar2camera
- camera2lidar
- lidar2image
- img_aug_matrix
- lidar_aug_matrix
# 验证和测试pipeline也需要支持分割
test_pipeline:
- type: LoadMultiViewImageFromFiles
to_float32: true
- type: LoadPointsFromFile
coord_type: LIDAR
load_dim: ${load_dim}
use_dim: ${use_dim}
- type: LoadPointsFromMultiSweeps
sweeps_num: 9
load_dim: ${load_dim}
use_dim: ${use_dim}
pad_empty_sweeps: true
remove_close: true
- type: LoadAnnotations3D
with_bbox_3d: true
with_label_3d: true
with_attr_label: false
- type: LoadBEVSegmentation
classes: ${map_classes}
- type: ImageAug3D
final_dim: ${image_size}
resize_lim: ${augment2d.resize[1]}
bot_pct_lim: [0.0, 0.0]
rot_lim: [0.0, 0.0]
rand_flip: false
is_train: false
- type: ImageNormalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- type: DefaultFormatBundle3D
classes: ${object_classes}
- type: Collect3D
keys:
- img
- points
- gt_bboxes_3d
- gt_labels_3d
- gt_masks_bev
meta_keys:
- camera_intrinsics
- camera2ego
- lidar2ego
- lidar2camera
- camera2lidar
- lidar2image
- img_aug_matrix
- lidar_aug_matrix
# 评估配置:同时评估检测和分割
evaluation:
interval: 1
pipeline: ${test_pipeline}
# 检测评估指标
metric:
- bbox
- map
# 优化器配置(多任务可能需要更大学习率)
optimizer:
type: AdamW
lr: 2.0e-4
weight_decay: 0.01
paramwise_cfg:
custom_keys:
absolute_pos_embed:
decay_mult: 0
relative_position_bias_table:
decay_mult: 0
optimizer_config:
grad_clip:
max_norm: 35
norm_type: 2
# 学习率调度
lr_config:
policy: CosineAnnealing
warmup: linear
warmup_iters: 500
warmup_ratio: 0.33333333
min_lr_ratio: 1.0e-3
# 训练配置
runner:
type: EpochBasedRunner
max_epochs: 20
# 检查点和日志
checkpoint_config:
interval: 1
max_keep_ckpts: 5
log_config:
interval: 50
hooks:
- type: TextLoggerHook
# 其他配置
find_unused_parameters: false
sync_bn: true
cudnn_benchmark: true