# BEVFusion 多任务配置:同时进行3D检测和BEV分割 # 继承基础配置 _base_: - ../default.yaml model: type: BEVFusion # 多任务头配置 heads: # 任务1:3D目标检测 object: type: TransFusionHead num_proposals: 200 auxiliary: true in_channels: 512 num_classes: ${num_classes} num_heads: 8 nms_kernel_size: 3 ffn_channel: 256 dropout: 0.1 bn_momentum: 0.1 activation: relu common_heads: center: [2, 2] height: [1, 2] dim: [3, 2] rot: [2, 2] vel: [2, 2] bbox_coder: type: TransFusionBBoxCoder pc_range: ${point_cloud_range[:2]} post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0] score_threshold: 0.0 out_size_factor: 8 voxel_size: ${voxel_size[:2]} code_size: 10 loss_cls: type: FocalLoss use_sigmoid: true gamma: 2.0 alpha: 0.25 reduction: mean loss_weight: 1.0 loss_bbox: type: L1Loss reduction: mean loss_weight: 0.25 loss_iou: type: GIoULoss reduction: mean loss_weight: 0.0 train_cfg: point_cloud_range: ${point_cloud_range} grid_size: [1440, 1440, 41] voxel_size: ${voxel_size} out_size_factor: 8 gaussian_overlap: 0.1 min_radius: 2 pos_weight: -1 code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2] assigner: type: HungarianAssigner3D iou_calculator: type: BboxOverlaps3D coordinate: lidar cls_cost: type: FocalLossCost gamma: 2.0 alpha: 0.25 weight: 0.15 reg_cost: type: BBoxBEVL1Cost weight: 0.25 iou_cost: type: IoU3DCost weight: 0.25 test_cfg: point_cloud_range: ${point_cloud_range} grid_size: [1440, 1440, 41] voxel_size: ${voxel_size} out_size_factor: 8 nms_type: null # 任务2:BEV地图分割 map: type: BEVSegmentationHead in_channels: 512 grid_transform: input_scope: [[-54.0, 54.0, 0.75], [-54.0, 54.0, 0.75]] output_scope: [[-50, 50, 0.5], [-50, 50, 0.5]] classes: ${map_classes} loss: focal # 损失权重配置(可根据实际情况调整) loss_scale: object: 1.0 # 检测任务权重 map: 1.0 # 分割任务权重 # 训练数据pipeline(需要同时包含检测和分割标注) train_pipeline: - type: LoadMultiViewImageFromFiles to_float32: true - type: LoadPointsFromFile coord_type: LIDAR load_dim: ${load_dim} use_dim: ${use_dim} - type: LoadPointsFromMultiSweeps sweeps_num: 9 load_dim: ${load_dim} use_dim: ${use_dim} pad_empty_sweeps: true remove_close: true - type: LoadAnnotations3D with_bbox_3d: true with_label_3d: true with_attr_label: false - type: LoadBEVSegmentation # 加载BEV分割标注 classes: ${map_classes} - type: ObjectPaste stop_epoch: -1 db_sampler: dataset_root: ${dataset_root} info_path: ${dataset_root + "nuscenes_dbinfos_train.pkl"} rate: 1.0 prepare: filter_by_difficulty: [-1] filter_by_min_points: car: 5 truck: 5 bus: 5 trailer: 5 construction_vehicle: 5 traffic_cone: 5 barrier: 5 motorcycle: 5 bicycle: 5 pedestrian: 5 classes: ${object_classes} sample_groups: car: 2 truck: 3 construction_vehicle: 7 bus: 4 trailer: 6 barrier: 2 motorcycle: 6 bicycle: 6 pedestrian: 2 traffic_cone: 2 points_loader: type: LoadPointsFromFile coord_type: LIDAR load_dim: ${load_dim} use_dim: ${use_dim} - type: ImageAug3D final_dim: ${image_size} resize_lim: ${augment2d.resize[0]} bot_pct_lim: [0.0, 0.0] rot_lim: ${augment2d.rotate} rand_flip: true is_train: true - type: GlobalRotScaleTrans resize_lim: ${augment3d.scale} rot_lim: ${augment3d.rotate} trans_lim: ${augment3d.translate} is_train: true - type: RandomFlip3D - type: PointsRangeFilter point_cloud_range: ${point_cloud_range} - type: ObjectRangeFilter point_cloud_range: ${point_cloud_range} - type: ObjectNameFilter classes: ${object_classes} - type: ImageNormalize mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - type: GridMask use_h: true use_w: true max_epoch: ${max_epochs} rotate: 1 offset: false ratio: 0.5 mode: 1 prob: ${augment2d.gridmask.prob} fixed_prob: ${augment2d.gridmask.fixed_prob} - type: PointShuffle - type: DefaultFormatBundle3D classes: ${object_classes} - type: Collect3D keys: - img - points - gt_bboxes_3d - gt_labels_3d - gt_masks_bev # 添加分割标注 meta_keys: - camera_intrinsics - camera2ego - lidar2ego - lidar2camera - camera2lidar - lidar2image - img_aug_matrix - lidar_aug_matrix # 验证和测试pipeline也需要支持分割 test_pipeline: - type: LoadMultiViewImageFromFiles to_float32: true - type: LoadPointsFromFile coord_type: LIDAR load_dim: ${load_dim} use_dim: ${use_dim} - type: LoadPointsFromMultiSweeps sweeps_num: 9 load_dim: ${load_dim} use_dim: ${use_dim} pad_empty_sweeps: true remove_close: true - type: LoadAnnotations3D with_bbox_3d: true with_label_3d: true with_attr_label: false - type: LoadBEVSegmentation classes: ${map_classes} - type: ImageAug3D final_dim: ${image_size} resize_lim: ${augment2d.resize[1]} bot_pct_lim: [0.0, 0.0] rot_lim: [0.0, 0.0] rand_flip: false is_train: false - type: ImageNormalize mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - type: DefaultFormatBundle3D classes: ${object_classes} - type: Collect3D keys: - img - points - gt_bboxes_3d - gt_labels_3d - gt_masks_bev meta_keys: - camera_intrinsics - camera2ego - lidar2ego - lidar2camera - camera2lidar - lidar2image - img_aug_matrix - lidar_aug_matrix # 评估配置:同时评估检测和分割 evaluation: interval: 1 pipeline: ${test_pipeline} # 检测评估指标 metric: - bbox - map # 优化器配置(多任务可能需要更大学习率) optimizer: type: AdamW lr: 2.0e-4 weight_decay: 0.01 paramwise_cfg: custom_keys: absolute_pos_embed: decay_mult: 0 relative_position_bias_table: decay_mult: 0 optimizer_config: grad_clip: max_norm: 35 norm_type: 2 # 学习率调度 lr_config: policy: CosineAnnealing warmup: linear warmup_iters: 500 warmup_ratio: 0.33333333 min_lr_ratio: 1.0e-3 # 训练配置 runner: type: EpochBasedRunner max_epochs: 20 # 检查点和日志 checkpoint_config: interval: 1 max_keep_ckpts: 5 log_config: interval: 50 hooks: - type: TextLoggerHook # 其他配置 find_unused_parameters: false sync_bn: true cudnn_benchmark: true