# 增强版多任务配置:检测 + 分割(Enhanced Segmentation Head) # # 改进点: # 1. EnhancedBEVSegmentationHead (ASPP + 双注意力 + 深层解码器) # 2. Focal Loss修复 (alpha=0.25) # 3. Dice Loss混合 (weight=0.5) # 4. 类别权重平衡 (stop_line×4, divider×3) # 5. Deep supervision # 6. 分割损失权重×5 # 7. 学习率降低(复用权重) # # 复用: epoch_19.pth (93.7%参数) # 预期: mIoU 60-65% (2-3天) _base_: ./convfuser.yaml # 添加分割头和pipeline model: # 确保fuser配置正确 fuser: type: ConvFuser in_channels: [80, 256] out_channels: 256 heads: # object head从convfuser.yaml继承 # 添加增强版map head map: type: EnhancedBEVSegmentationHead # ← 使用增强版! in_channels: 512 grid_transform: input_scope: [[-54.0, 54.0, 0.75], [-54.0, 54.0, 0.75]] output_scope: [[-50, 50, 0.5], [-50, 50, 0.5]] classes: ${map_classes} loss: focal # 类别权重(针对nuScenes不平衡) loss_weight: drivable_area: 1.0 ped_crossing: 3.0 walkway: 1.5 stop_line: 4.0 carpark_area: 2.0 divider: 3.0 # Focal Loss参数 focal_alpha: 0.25 focal_gamma: 2.0 # Dice Loss配置 use_dice_loss: true dice_weight: 0.5 # Deep Supervision deep_supervision: true # Decoder配置 decoder_channels: [256, 256, 128, 128] # 损失权重(分割权重×5) loss_scale: object: 1.0 map: 5.0 # 训练epochs max_epochs: 20 # 优化器(降低学习率) optimizer: lr: 1.0e-4 # 修改Collect3D,添加gt_masks_bev train_pipeline: - type: LoadMultiViewImageFromFiles to_float32: true - type: LoadPointsFromFile coord_type: LIDAR load_dim: ${load_dim} use_dim: ${use_dim} reduce_beams: ${reduce_beams} load_augmented: ${load_augmented} - type: LoadPointsFromMultiSweeps sweeps_num: 9 load_dim: ${load_dim} use_dim: ${use_dim} reduce_beams: ${reduce_beams} pad_empty_sweeps: true remove_close: true load_augmented: ${load_augmented} - type: LoadAnnotations3D with_bbox_3d: true with_label_3d: true with_attr_label: False - type: ObjectPaste stop_epoch: ${gt_paste_stop_epoch} db_sampler: dataset_root: ${dataset_root} info_path: ${dataset_root + "nuscenes_dbinfos_train.pkl"} rate: 1.0 prepare: filter_by_difficulty: [-1] filter_by_min_points: car: 5 truck: 5 bus: 5 trailer: 5 construction_vehicle: 5 traffic_cone: 5 barrier: 5 motorcycle: 5 bicycle: 5 pedestrian: 5 classes: ${object_classes} sample_groups: car: 2 truck: 3 construction_vehicle: 7 bus: 4 trailer: 6 barrier: 2 motorcycle: 6 bicycle: 6 pedestrian: 2 traffic_cone: 2 points_loader: type: LoadPointsFromFile coord_type: LIDAR load_dim: ${load_dim} use_dim: ${use_dim} reduce_beams: ${reduce_beams} - type: ImageAug3D final_dim: ${image_size} resize_lim: ${augment2d.resize[0]} bot_pct_lim: [0.0, 0.0] rot_lim: ${augment2d.rotate} rand_flip: true is_train: true - type: GlobalRotScaleTrans resize_lim: ${augment3d.scale} rot_lim: ${augment3d.rotate} trans_lim: ${augment3d.translate} is_train: true - type: LoadBEVSegmentation dataset_root: ${dataset_root} xbound: [-50.0, 50.0, 0.5] ybound: [-50.0, 50.0, 0.5] classes: ${map_classes} - type: RandomFlip3D - type: PointsRangeFilter point_cloud_range: ${point_cloud_range} - type: ObjectRangeFilter point_cloud_range: ${point_cloud_range} - type: ObjectNameFilter classes: ${object_classes} - type: ImageNormalize mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - type: GridMask use_h: true use_w: true max_epoch: ${max_epochs} rotate: 1 offset: false ratio: 0.5 mode: 1 prob: ${augment2d.gridmask.prob} fixed_prob: ${augment2d.gridmask.fixed_prob} - type: PointShuffle - type: DefaultFormatBundle3D classes: ${object_classes} - type: Collect3D keys: - img - points - gt_bboxes_3d - gt_labels_3d - gt_masks_bev meta_keys: - camera_intrinsics - camera2ego - lidar2ego - lidar2camera - camera2lidar - lidar2image - img_aug_matrix - lidar_aug_matrix - type: GTDepth keyframe_only: true test_pipeline: - type: LoadMultiViewImageFromFiles to_float32: true - type: LoadPointsFromFile coord_type: LIDAR load_dim: ${load_dim} use_dim: ${use_dim} reduce_beams: ${reduce_beams} load_augmented: ${load_augmented} - type: LoadPointsFromMultiSweeps sweeps_num: 9 load_dim: ${load_dim} use_dim: ${use_dim} reduce_beams: ${reduce_beams} pad_empty_sweeps: true remove_close: true load_augmented: ${load_augmented} - type: LoadAnnotations3D with_bbox_3d: true with_label_3d: true with_attr_label: False - type: ImageAug3D final_dim: ${image_size} resize_lim: ${augment2d.resize[1]} bot_pct_lim: [0.0, 0.0] rot_lim: [0.0, 0.0] rand_flip: false is_train: false - type: GlobalRotScaleTrans resize_lim: [1.0, 1.0] rot_lim: [0.0, 0.0] trans_lim: 0.0 is_train: false - type: LoadBEVSegmentation dataset_root: ${dataset_root} xbound: [-50.0, 50.0, 0.5] ybound: [-50.0, 50.0, 0.5] classes: ${map_classes} - type: PointsRangeFilter point_cloud_range: ${point_cloud_range} - type: ImageNormalize mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - type: DefaultFormatBundle3D classes: ${object_classes} - type: Collect3D keys: - img - points - gt_bboxes_3d - gt_labels_3d - gt_masks_bev meta_keys: - camera_intrinsics - camera2ego - lidar2ego - lidar2camera - camera2lidar - lidar2image - img_aug_matrix - lidar_aug_matrix - type: GTDepth keyframe_only: true