bev-project/configs/nuscenes/multitask/default.yaml

# BEVFusion 多任务配置：同时进行3D检测和BEV分割
# 继承基础配置
_base_:
  - ../default.yaml

model:
  type: BEVFusion

  # 多任务头配置
  heads:
    # 任务1：3D目标检测
    object:
      type: TransFusionHead
      num_proposals: 200
      auxiliary: true
      in_channels: 512
      num_classes: ${num_classes}
      num_heads: 8
      nms_kernel_size: 3
      ffn_channel: 256
      dropout: 0.1
      bn_momentum: 0.1
      activation: relu
      common_heads:
        center: [2, 2]
        height: [1, 2]
        dim: [3, 2]
        rot: [2, 2]
        vel: [2, 2]
      bbox_coder:
        type: TransFusionBBoxCoder
        pc_range: ${point_cloud_range[:2]}
        post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
        score_threshold: 0.0
        out_size_factor: 8
        voxel_size: ${voxel_size[:2]}
        code_size: 10
      loss_cls:
        type: FocalLoss
        use_sigmoid: true
        gamma: 2.0
        alpha: 0.25
        reduction: mean
        loss_weight: 1.0
      loss_bbox:
        type: L1Loss
        reduction: mean
        loss_weight: 0.25
      loss_iou:
        type: GIoULoss
        reduction: mean
        loss_weight: 0.0
      train_cfg:
        point_cloud_range: ${point_cloud_range}
        grid_size: [1440, 1440, 41]
        voxel_size: ${voxel_size}
        out_size_factor: 8
        gaussian_overlap: 0.1
        min_radius: 2
        pos_weight: -1
        code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
        assigner:
          type: HungarianAssigner3D
          iou_calculator:
            type: BboxOverlaps3D
            coordinate: lidar
          cls_cost:
            type: FocalLossCost
            gamma: 2.0
            alpha: 0.25
            weight: 0.15
          reg_cost:
            type: BBoxBEVL1Cost
            weight: 0.25
          iou_cost:
            type: IoU3DCost
            weight: 0.25
      test_cfg:
        point_cloud_range: ${point_cloud_range}
        grid_size: [1440, 1440, 41]
        voxel_size: ${voxel_size}
        out_size_factor: 8
        nms_type: null

    # 任务2：BEV地图分割
    map:
      type: BEVSegmentationHead
      in_channels: 512
      grid_transform:
        input_scope: [[-54.0, 54.0, 0.75], [-54.0, 54.0, 0.75]]
        output_scope: [[-50, 50, 0.5], [-50, 50, 0.5]]
      classes: ${map_classes}
      loss: focal

  # 损失权重配置（可根据实际情况调整）
  loss_scale:
    object: 1.0  # 检测任务权重
    map: 1.0     # 分割任务权重

# 训练数据pipeline（需要同时包含检测和分割标注）
train_pipeline:
  - type: LoadMultiViewImageFromFiles
    to_float32: true
  - type: LoadPointsFromFile
    coord_type: LIDAR
    load_dim: ${load_dim}
    use_dim: ${use_dim}
  - type: LoadPointsFromMultiSweeps
    sweeps_num: 9
    load_dim: ${load_dim}
    use_dim: ${use_dim}
    pad_empty_sweeps: true
    remove_close: true
  - type: LoadAnnotations3D
    with_bbox_3d: true
    with_label_3d: true
    with_attr_label: false
  - type: LoadBEVSegmentation  # 加载BEV分割标注
    classes: ${map_classes}
  - type: ObjectPaste
    stop_epoch: -1
    db_sampler:
      dataset_root: ${dataset_root}
      info_path: ${dataset_root + "nuscenes_dbinfos_train.pkl"}
      rate: 1.0
      prepare:
        filter_by_difficulty: [-1]
        filter_by_min_points:
          car: 5
          truck: 5
          bus: 5
          trailer: 5
          construction_vehicle: 5
          traffic_cone: 5
          barrier: 5
          motorcycle: 5
          bicycle: 5
          pedestrian: 5
      classes: ${object_classes}
      sample_groups:
        car: 2
        truck: 3
        construction_vehicle: 7
        bus: 4
        trailer: 6
        barrier: 2
        motorcycle: 6
        bicycle: 6
        pedestrian: 2
        traffic_cone: 2
      points_loader:
        type: LoadPointsFromFile
        coord_type: LIDAR
        load_dim: ${load_dim}
        use_dim: ${use_dim}
  - type: ImageAug3D
    final_dim: ${image_size}
    resize_lim: ${augment2d.resize[0]}
    bot_pct_lim: [0.0, 0.0]
    rot_lim: ${augment2d.rotate}
    rand_flip: true
    is_train: true
  - type: GlobalRotScaleTrans
    resize_lim: ${augment3d.scale}
    rot_lim: ${augment3d.rotate}
    trans_lim: ${augment3d.translate}
    is_train: true
  - type: RandomFlip3D
  - type: PointsRangeFilter
    point_cloud_range: ${point_cloud_range}
  - type: ObjectRangeFilter
    point_cloud_range: ${point_cloud_range}
  - type: ObjectNameFilter
    classes: ${object_classes}
  - type: ImageNormalize
    mean: [0.485, 0.456, 0.406]
    std: [0.229, 0.224, 0.225]
  - type: GridMask
    use_h: true
    use_w: true
    max_epoch: ${max_epochs}
    rotate: 1
    offset: false
    ratio: 0.5
    mode: 1
    prob: ${augment2d.gridmask.prob}
    fixed_prob: ${augment2d.gridmask.fixed_prob}
  - type: PointShuffle
  - type: DefaultFormatBundle3D
    classes: ${object_classes}
  - type: Collect3D
    keys:
      - img
      - points
      - gt_bboxes_3d
      - gt_labels_3d
      - gt_masks_bev  # 添加分割标注
    meta_keys:
      - camera_intrinsics
      - camera2ego
      - lidar2ego
      - lidar2camera
      - camera2lidar
      - lidar2image
      - img_aug_matrix
      - lidar_aug_matrix

# 验证和测试pipeline也需要支持分割
test_pipeline:
  - type: LoadMultiViewImageFromFiles
    to_float32: true
  - type: LoadPointsFromFile
    coord_type: LIDAR
    load_dim: ${load_dim}
    use_dim: ${use_dim}
  - type: LoadPointsFromMultiSweeps
    sweeps_num: 9
    load_dim: ${load_dim}
    use_dim: ${use_dim}
    pad_empty_sweeps: true
    remove_close: true
  - type: LoadAnnotations3D
    with_bbox_3d: true
    with_label_3d: true
    with_attr_label: false
  - type: LoadBEVSegmentation
    classes: ${map_classes}
  - type: ImageAug3D
    final_dim: ${image_size}
    resize_lim: ${augment2d.resize[1]}
    bot_pct_lim: [0.0, 0.0]
    rot_lim: [0.0, 0.0]
    rand_flip: false
    is_train: false
  - type: ImageNormalize
    mean: [0.485, 0.456, 0.406]
    std: [0.229, 0.224, 0.225]
  - type: DefaultFormatBundle3D
    classes: ${object_classes}
  - type: Collect3D
    keys:
      - img
      - points
      - gt_bboxes_3d
      - gt_labels_3d
      - gt_masks_bev
    meta_keys:
      - camera_intrinsics
      - camera2ego
      - lidar2ego
      - lidar2camera
      - camera2lidar
      - lidar2image
      - img_aug_matrix
      - lidar_aug_matrix

# 评估配置：同时评估检测和分割
evaluation:
  interval: 1
  pipeline: ${test_pipeline}
  # 检测评估指标
  metric:
    - bbox
    - map

# 优化器配置（多任务可能需要更大学习率）
optimizer:
  type: AdamW
  lr: 2.0e-4
  weight_decay: 0.01
  paramwise_cfg:
    custom_keys:
      absolute_pos_embed:
        decay_mult: 0
      relative_position_bias_table:
        decay_mult: 0

optimizer_config:
  grad_clip:
    max_norm: 35
    norm_type: 2

# 学习率调度
lr_config:
  policy: CosineAnnealing
  warmup: linear
  warmup_iters: 500
  warmup_ratio: 0.33333333
  min_lr_ratio: 1.0e-3

# 训练配置
runner:
  type: EpochBasedRunner
  max_epochs: 20

# 检查点和日志
checkpoint_config:
  interval: 1
  max_keep_ckpts: 5

log_config:
  interval: 50
  hooks:
    - type: TextLoggerHook

# 其他配置
find_unused_parameters: false
sync_bn: true
cudnn_benchmark: true