311 lines
7.2 KiB
YAML
311 lines
7.2 KiB
YAML
|
|
# BEVFusion 多任务配置:同时进行3D检测和BEV分割
|
|||
|
|
# 继承基础配置
|
|||
|
|
_base_:
|
|||
|
|
- ../default.yaml
|
|||
|
|
|
|||
|
|
model:
|
|||
|
|
type: BEVFusion
|
|||
|
|
|
|||
|
|
# 多任务头配置
|
|||
|
|
heads:
|
|||
|
|
# 任务1:3D目标检测
|
|||
|
|
object:
|
|||
|
|
type: TransFusionHead
|
|||
|
|
num_proposals: 200
|
|||
|
|
auxiliary: true
|
|||
|
|
in_channels: 512
|
|||
|
|
num_classes: ${num_classes}
|
|||
|
|
num_heads: 8
|
|||
|
|
nms_kernel_size: 3
|
|||
|
|
ffn_channel: 256
|
|||
|
|
dropout: 0.1
|
|||
|
|
bn_momentum: 0.1
|
|||
|
|
activation: relu
|
|||
|
|
common_heads:
|
|||
|
|
center: [2, 2]
|
|||
|
|
height: [1, 2]
|
|||
|
|
dim: [3, 2]
|
|||
|
|
rot: [2, 2]
|
|||
|
|
vel: [2, 2]
|
|||
|
|
bbox_coder:
|
|||
|
|
type: TransFusionBBoxCoder
|
|||
|
|
pc_range: ${point_cloud_range[:2]}
|
|||
|
|
post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
|
|||
|
|
score_threshold: 0.0
|
|||
|
|
out_size_factor: 8
|
|||
|
|
voxel_size: ${voxel_size[:2]}
|
|||
|
|
code_size: 10
|
|||
|
|
loss_cls:
|
|||
|
|
type: FocalLoss
|
|||
|
|
use_sigmoid: true
|
|||
|
|
gamma: 2.0
|
|||
|
|
alpha: 0.25
|
|||
|
|
reduction: mean
|
|||
|
|
loss_weight: 1.0
|
|||
|
|
loss_bbox:
|
|||
|
|
type: L1Loss
|
|||
|
|
reduction: mean
|
|||
|
|
loss_weight: 0.25
|
|||
|
|
loss_iou:
|
|||
|
|
type: GIoULoss
|
|||
|
|
reduction: mean
|
|||
|
|
loss_weight: 0.0
|
|||
|
|
train_cfg:
|
|||
|
|
point_cloud_range: ${point_cloud_range}
|
|||
|
|
grid_size: [1440, 1440, 41]
|
|||
|
|
voxel_size: ${voxel_size}
|
|||
|
|
out_size_factor: 8
|
|||
|
|
gaussian_overlap: 0.1
|
|||
|
|
min_radius: 2
|
|||
|
|
pos_weight: -1
|
|||
|
|
code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
|
|||
|
|
assigner:
|
|||
|
|
type: HungarianAssigner3D
|
|||
|
|
iou_calculator:
|
|||
|
|
type: BboxOverlaps3D
|
|||
|
|
coordinate: lidar
|
|||
|
|
cls_cost:
|
|||
|
|
type: FocalLossCost
|
|||
|
|
gamma: 2.0
|
|||
|
|
alpha: 0.25
|
|||
|
|
weight: 0.15
|
|||
|
|
reg_cost:
|
|||
|
|
type: BBoxBEVL1Cost
|
|||
|
|
weight: 0.25
|
|||
|
|
iou_cost:
|
|||
|
|
type: IoU3DCost
|
|||
|
|
weight: 0.25
|
|||
|
|
test_cfg:
|
|||
|
|
point_cloud_range: ${point_cloud_range}
|
|||
|
|
grid_size: [1440, 1440, 41]
|
|||
|
|
voxel_size: ${voxel_size}
|
|||
|
|
out_size_factor: 8
|
|||
|
|
nms_type: null
|
|||
|
|
|
|||
|
|
# 任务2:BEV地图分割
|
|||
|
|
map:
|
|||
|
|
type: BEVSegmentationHead
|
|||
|
|
in_channels: 512
|
|||
|
|
grid_transform:
|
|||
|
|
input_scope: [[-54.0, 54.0, 0.75], [-54.0, 54.0, 0.75]]
|
|||
|
|
output_scope: [[-50, 50, 0.5], [-50, 50, 0.5]]
|
|||
|
|
classes: ${map_classes}
|
|||
|
|
loss: focal
|
|||
|
|
|
|||
|
|
# 损失权重配置(可根据实际情况调整)
|
|||
|
|
loss_scale:
|
|||
|
|
object: 1.0 # 检测任务权重
|
|||
|
|
map: 1.0 # 分割任务权重
|
|||
|
|
|
|||
|
|
# 训练数据pipeline(需要同时包含检测和分割标注)
|
|||
|
|
train_pipeline:
|
|||
|
|
- type: LoadMultiViewImageFromFiles
|
|||
|
|
to_float32: true
|
|||
|
|
- type: LoadPointsFromFile
|
|||
|
|
coord_type: LIDAR
|
|||
|
|
load_dim: ${load_dim}
|
|||
|
|
use_dim: ${use_dim}
|
|||
|
|
- type: LoadPointsFromMultiSweeps
|
|||
|
|
sweeps_num: 9
|
|||
|
|
load_dim: ${load_dim}
|
|||
|
|
use_dim: ${use_dim}
|
|||
|
|
pad_empty_sweeps: true
|
|||
|
|
remove_close: true
|
|||
|
|
- type: LoadAnnotations3D
|
|||
|
|
with_bbox_3d: true
|
|||
|
|
with_label_3d: true
|
|||
|
|
with_attr_label: false
|
|||
|
|
- type: LoadBEVSegmentation # 加载BEV分割标注
|
|||
|
|
classes: ${map_classes}
|
|||
|
|
- type: ObjectPaste
|
|||
|
|
stop_epoch: -1
|
|||
|
|
db_sampler:
|
|||
|
|
dataset_root: ${dataset_root}
|
|||
|
|
info_path: ${dataset_root + "nuscenes_dbinfos_train.pkl"}
|
|||
|
|
rate: 1.0
|
|||
|
|
prepare:
|
|||
|
|
filter_by_difficulty: [-1]
|
|||
|
|
filter_by_min_points:
|
|||
|
|
car: 5
|
|||
|
|
truck: 5
|
|||
|
|
bus: 5
|
|||
|
|
trailer: 5
|
|||
|
|
construction_vehicle: 5
|
|||
|
|
traffic_cone: 5
|
|||
|
|
barrier: 5
|
|||
|
|
motorcycle: 5
|
|||
|
|
bicycle: 5
|
|||
|
|
pedestrian: 5
|
|||
|
|
classes: ${object_classes}
|
|||
|
|
sample_groups:
|
|||
|
|
car: 2
|
|||
|
|
truck: 3
|
|||
|
|
construction_vehicle: 7
|
|||
|
|
bus: 4
|
|||
|
|
trailer: 6
|
|||
|
|
barrier: 2
|
|||
|
|
motorcycle: 6
|
|||
|
|
bicycle: 6
|
|||
|
|
pedestrian: 2
|
|||
|
|
traffic_cone: 2
|
|||
|
|
points_loader:
|
|||
|
|
type: LoadPointsFromFile
|
|||
|
|
coord_type: LIDAR
|
|||
|
|
load_dim: ${load_dim}
|
|||
|
|
use_dim: ${use_dim}
|
|||
|
|
- type: ImageAug3D
|
|||
|
|
final_dim: ${image_size}
|
|||
|
|
resize_lim: ${augment2d.resize[0]}
|
|||
|
|
bot_pct_lim: [0.0, 0.0]
|
|||
|
|
rot_lim: ${augment2d.rotate}
|
|||
|
|
rand_flip: true
|
|||
|
|
is_train: true
|
|||
|
|
- type: GlobalRotScaleTrans
|
|||
|
|
resize_lim: ${augment3d.scale}
|
|||
|
|
rot_lim: ${augment3d.rotate}
|
|||
|
|
trans_lim: ${augment3d.translate}
|
|||
|
|
is_train: true
|
|||
|
|
- type: RandomFlip3D
|
|||
|
|
- type: PointsRangeFilter
|
|||
|
|
point_cloud_range: ${point_cloud_range}
|
|||
|
|
- type: ObjectRangeFilter
|
|||
|
|
point_cloud_range: ${point_cloud_range}
|
|||
|
|
- type: ObjectNameFilter
|
|||
|
|
classes: ${object_classes}
|
|||
|
|
- type: ImageNormalize
|
|||
|
|
mean: [0.485, 0.456, 0.406]
|
|||
|
|
std: [0.229, 0.224, 0.225]
|
|||
|
|
- type: GridMask
|
|||
|
|
use_h: true
|
|||
|
|
use_w: true
|
|||
|
|
max_epoch: ${max_epochs}
|
|||
|
|
rotate: 1
|
|||
|
|
offset: false
|
|||
|
|
ratio: 0.5
|
|||
|
|
mode: 1
|
|||
|
|
prob: ${augment2d.gridmask.prob}
|
|||
|
|
fixed_prob: ${augment2d.gridmask.fixed_prob}
|
|||
|
|
- type: PointShuffle
|
|||
|
|
- type: DefaultFormatBundle3D
|
|||
|
|
classes: ${object_classes}
|
|||
|
|
- type: Collect3D
|
|||
|
|
keys:
|
|||
|
|
- img
|
|||
|
|
- points
|
|||
|
|
- gt_bboxes_3d
|
|||
|
|
- gt_labels_3d
|
|||
|
|
- gt_masks_bev # 添加分割标注
|
|||
|
|
meta_keys:
|
|||
|
|
- camera_intrinsics
|
|||
|
|
- camera2ego
|
|||
|
|
- lidar2ego
|
|||
|
|
- lidar2camera
|
|||
|
|
- camera2lidar
|
|||
|
|
- lidar2image
|
|||
|
|
- img_aug_matrix
|
|||
|
|
- lidar_aug_matrix
|
|||
|
|
|
|||
|
|
# 验证和测试pipeline也需要支持分割
|
|||
|
|
test_pipeline:
|
|||
|
|
- type: LoadMultiViewImageFromFiles
|
|||
|
|
to_float32: true
|
|||
|
|
- type: LoadPointsFromFile
|
|||
|
|
coord_type: LIDAR
|
|||
|
|
load_dim: ${load_dim}
|
|||
|
|
use_dim: ${use_dim}
|
|||
|
|
- type: LoadPointsFromMultiSweeps
|
|||
|
|
sweeps_num: 9
|
|||
|
|
load_dim: ${load_dim}
|
|||
|
|
use_dim: ${use_dim}
|
|||
|
|
pad_empty_sweeps: true
|
|||
|
|
remove_close: true
|
|||
|
|
- type: LoadAnnotations3D
|
|||
|
|
with_bbox_3d: true
|
|||
|
|
with_label_3d: true
|
|||
|
|
with_attr_label: false
|
|||
|
|
- type: LoadBEVSegmentation
|
|||
|
|
classes: ${map_classes}
|
|||
|
|
- type: ImageAug3D
|
|||
|
|
final_dim: ${image_size}
|
|||
|
|
resize_lim: ${augment2d.resize[1]}
|
|||
|
|
bot_pct_lim: [0.0, 0.0]
|
|||
|
|
rot_lim: [0.0, 0.0]
|
|||
|
|
rand_flip: false
|
|||
|
|
is_train: false
|
|||
|
|
- type: ImageNormalize
|
|||
|
|
mean: [0.485, 0.456, 0.406]
|
|||
|
|
std: [0.229, 0.224, 0.225]
|
|||
|
|
- type: DefaultFormatBundle3D
|
|||
|
|
classes: ${object_classes}
|
|||
|
|
- type: Collect3D
|
|||
|
|
keys:
|
|||
|
|
- img
|
|||
|
|
- points
|
|||
|
|
- gt_bboxes_3d
|
|||
|
|
- gt_labels_3d
|
|||
|
|
- gt_masks_bev
|
|||
|
|
meta_keys:
|
|||
|
|
- camera_intrinsics
|
|||
|
|
- camera2ego
|
|||
|
|
- lidar2ego
|
|||
|
|
- lidar2camera
|
|||
|
|
- camera2lidar
|
|||
|
|
- lidar2image
|
|||
|
|
- img_aug_matrix
|
|||
|
|
- lidar_aug_matrix
|
|||
|
|
|
|||
|
|
# 评估配置:同时评估检测和分割
|
|||
|
|
evaluation:
|
|||
|
|
interval: 1
|
|||
|
|
pipeline: ${test_pipeline}
|
|||
|
|
# 检测评估指标
|
|||
|
|
metric:
|
|||
|
|
- bbox
|
|||
|
|
- map
|
|||
|
|
|
|||
|
|
# 优化器配置(多任务可能需要更大学习率)
|
|||
|
|
optimizer:
|
|||
|
|
type: AdamW
|
|||
|
|
lr: 2.0e-4
|
|||
|
|
weight_decay: 0.01
|
|||
|
|
paramwise_cfg:
|
|||
|
|
custom_keys:
|
|||
|
|
absolute_pos_embed:
|
|||
|
|
decay_mult: 0
|
|||
|
|
relative_position_bias_table:
|
|||
|
|
decay_mult: 0
|
|||
|
|
|
|||
|
|
optimizer_config:
|
|||
|
|
grad_clip:
|
|||
|
|
max_norm: 35
|
|||
|
|
norm_type: 2
|
|||
|
|
|
|||
|
|
# 学习率调度
|
|||
|
|
lr_config:
|
|||
|
|
policy: CosineAnnealing
|
|||
|
|
warmup: linear
|
|||
|
|
warmup_iters: 500
|
|||
|
|
warmup_ratio: 0.33333333
|
|||
|
|
min_lr_ratio: 1.0e-3
|
|||
|
|
|
|||
|
|
# 训练配置
|
|||
|
|
runner:
|
|||
|
|
type: EpochBasedRunner
|
|||
|
|
max_epochs: 20
|
|||
|
|
|
|||
|
|
# 检查点和日志
|
|||
|
|
checkpoint_config:
|
|||
|
|
interval: 1
|
|||
|
|
max_keep_ckpts: 5
|
|||
|
|
|
|||
|
|
log_config:
|
|||
|
|
interval: 50
|
|||
|
|
hooks:
|
|||
|
|
- type: TextLoggerHook
|
|||
|
|
|
|||
|
|
# 其他配置
|
|||
|
|
find_unused_parameters: false
|
|||
|
|
sync_bn: true
|
|||
|
|
cudnn_benchmark: true
|
|||
|
|
|