bev-project/configs/nuscenes/multitask/fusion-det-seg-swint-enhanc...

284 lines
6.3 KiB
YAML
Raw Normal View History

# 增强版多任务配置:检测 + 分割Enhanced Segmentation Head
#
# 改进点:
# 1. EnhancedBEVSegmentationHead (ASPP + 双注意力 + 深层解码器)
# 2. Focal Loss修复 (alpha=0.25)
# 3. Dice Loss混合 (weight=0.5)
# 4. 类别权重平衡 (stop_line×4, divider×3)
# 5. Deep supervision
# 6. 分割损失权重×5
# 7. 学习率降低(复用权重)
#
# 复用: epoch_19.pth (93.7%参数)
# 预期: mIoU 60-65% (2-3天)
_base_: ./convfuser.yaml
# 添加分割头和pipeline
model:
# 确保fuser配置正确
fuser:
type: ConvFuser
in_channels: [80, 256]
out_channels: 256
heads:
# object head从convfuser.yaml继承
# 添加增强版map head
map:
type: EnhancedBEVSegmentationHead # ← 使用增强版!
in_channels: 512
grid_transform:
input_scope: [[-54.0, 54.0, 0.75], [-54.0, 54.0, 0.75]]
output_scope: [[-50, 50, 0.5], [-50, 50, 0.5]]
classes: ${map_classes}
loss: focal
# 类别权重针对nuScenes不平衡
loss_weight:
drivable_area: 1.0
ped_crossing: 3.0
walkway: 1.5
stop_line: 4.0
carpark_area: 2.0
divider: 3.0
# Focal Loss参数
focal_alpha: 0.25
focal_gamma: 2.0
# Dice Loss配置
use_dice_loss: true
dice_weight: 0.5
# Deep Supervision
deep_supervision: true
# Decoder配置
decoder_channels: [256, 256, 128, 128]
# 损失权重分割权重×5
loss_scale:
object: 1.0
map: 5.0
# 训练epochs
max_epochs: 20
# 优化器(降低学习率)
optimizer:
lr: 1.0e-4
# 修改Collect3D添加gt_masks_bev
train_pipeline:
-
type: LoadMultiViewImageFromFiles
to_float32: true
-
type: LoadPointsFromFile
coord_type: LIDAR
load_dim: ${load_dim}
use_dim: ${use_dim}
reduce_beams: ${reduce_beams}
load_augmented: ${load_augmented}
-
type: LoadPointsFromMultiSweeps
sweeps_num: 9
load_dim: ${load_dim}
use_dim: ${use_dim}
reduce_beams: ${reduce_beams}
pad_empty_sweeps: true
remove_close: true
load_augmented: ${load_augmented}
-
type: LoadAnnotations3D
with_bbox_3d: true
with_label_3d: true
with_attr_label: False
-
type: ObjectPaste
stop_epoch: ${gt_paste_stop_epoch}
db_sampler:
dataset_root: ${dataset_root}
info_path: ${dataset_root + "nuscenes_dbinfos_train.pkl"}
rate: 1.0
prepare:
filter_by_difficulty: [-1]
filter_by_min_points:
car: 5
truck: 5
bus: 5
trailer: 5
construction_vehicle: 5
traffic_cone: 5
barrier: 5
motorcycle: 5
bicycle: 5
pedestrian: 5
classes: ${object_classes}
sample_groups:
car: 2
truck: 3
construction_vehicle: 7
bus: 4
trailer: 6
barrier: 2
motorcycle: 6
bicycle: 6
pedestrian: 2
traffic_cone: 2
points_loader:
type: LoadPointsFromFile
coord_type: LIDAR
load_dim: ${load_dim}
use_dim: ${use_dim}
reduce_beams: ${reduce_beams}
-
type: ImageAug3D
final_dim: ${image_size}
resize_lim: ${augment2d.resize[0]}
bot_pct_lim: [0.0, 0.0]
rot_lim: ${augment2d.rotate}
rand_flip: true
is_train: true
-
type: GlobalRotScaleTrans
resize_lim: ${augment3d.scale}
rot_lim: ${augment3d.rotate}
trans_lim: ${augment3d.translate}
is_train: true
-
type: LoadBEVSegmentation
dataset_root: ${dataset_root}
xbound: [-50.0, 50.0, 0.5]
ybound: [-50.0, 50.0, 0.5]
classes: ${map_classes}
-
type: RandomFlip3D
-
type: PointsRangeFilter
point_cloud_range: ${point_cloud_range}
-
type: ObjectRangeFilter
point_cloud_range: ${point_cloud_range}
-
type: ObjectNameFilter
classes: ${object_classes}
-
type: ImageNormalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
-
type: GridMask
use_h: true
use_w: true
max_epoch: ${max_epochs}
rotate: 1
offset: false
ratio: 0.5
mode: 1
prob: ${augment2d.gridmask.prob}
fixed_prob: ${augment2d.gridmask.fixed_prob}
-
type: PointShuffle
-
type: DefaultFormatBundle3D
classes: ${object_classes}
-
type: Collect3D
keys:
- img
- points
- gt_bboxes_3d
- gt_labels_3d
- gt_masks_bev
meta_keys:
- camera_intrinsics
- camera2ego
- lidar2ego
- lidar2camera
- camera2lidar
- lidar2image
- img_aug_matrix
- lidar_aug_matrix
-
type: GTDepth
keyframe_only: true
test_pipeline:
-
type: LoadMultiViewImageFromFiles
to_float32: true
-
type: LoadPointsFromFile
coord_type: LIDAR
load_dim: ${load_dim}
use_dim: ${use_dim}
reduce_beams: ${reduce_beams}
load_augmented: ${load_augmented}
-
type: LoadPointsFromMultiSweeps
sweeps_num: 9
load_dim: ${load_dim}
use_dim: ${use_dim}
reduce_beams: ${reduce_beams}
pad_empty_sweeps: true
remove_close: true
load_augmented: ${load_augmented}
-
type: LoadAnnotations3D
with_bbox_3d: true
with_label_3d: true
with_attr_label: False
-
type: ImageAug3D
final_dim: ${image_size}
resize_lim: ${augment2d.resize[1]}
bot_pct_lim: [0.0, 0.0]
rot_lim: [0.0, 0.0]
rand_flip: false
is_train: false
-
type: GlobalRotScaleTrans
resize_lim: [1.0, 1.0]
rot_lim: [0.0, 0.0]
trans_lim: 0.0
is_train: false
-
type: LoadBEVSegmentation
dataset_root: ${dataset_root}
xbound: [-50.0, 50.0, 0.5]
ybound: [-50.0, 50.0, 0.5]
classes: ${map_classes}
-
type: PointsRangeFilter
point_cloud_range: ${point_cloud_range}
-
type: ImageNormalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
-
type: DefaultFormatBundle3D
classes: ${object_classes}
-
type: Collect3D
keys:
- img
- points
- gt_bboxes_3d
- gt_labels_3d
- gt_masks_bev
meta_keys:
- camera_intrinsics
- camera2ego
- lidar2ego
- lidar2camera
- camera2lidar
- lidar2image
- img_aug_matrix
- lidar_aug_matrix
-
type: GTDepth
keyframe_only: true