284 lines
6.3 KiB
YAML
284 lines
6.3 KiB
YAML
# 增强版多任务配置:检测 + 分割(Enhanced Segmentation Head)
|
||
#
|
||
# 改进点:
|
||
# 1. EnhancedBEVSegmentationHead (ASPP + 双注意力 + 深层解码器)
|
||
# 2. Focal Loss修复 (alpha=0.25)
|
||
# 3. Dice Loss混合 (weight=0.5)
|
||
# 4. 类别权重平衡 (stop_line×4, divider×3)
|
||
# 5. Deep supervision
|
||
# 6. 分割损失权重×5
|
||
# 7. 学习率降低(复用权重)
|
||
#
|
||
# 复用: epoch_19.pth (93.7%参数)
|
||
# 预期: mIoU 60-65% (2-3天)
|
||
|
||
_base_: ./convfuser.yaml
|
||
|
||
# 添加分割头和pipeline
|
||
model:
|
||
# 确保fuser配置正确
|
||
fuser:
|
||
type: ConvFuser
|
||
in_channels: [80, 256]
|
||
out_channels: 256
|
||
|
||
heads:
|
||
# object head从convfuser.yaml继承
|
||
# 添加增强版map head
|
||
map:
|
||
type: EnhancedBEVSegmentationHead # ← 使用增强版!
|
||
in_channels: 512
|
||
grid_transform:
|
||
input_scope: [[-54.0, 54.0, 0.75], [-54.0, 54.0, 0.75]]
|
||
output_scope: [[-50, 50, 0.5], [-50, 50, 0.5]]
|
||
classes: ${map_classes}
|
||
loss: focal
|
||
|
||
# 类别权重(针对nuScenes不平衡)
|
||
loss_weight:
|
||
drivable_area: 1.0
|
||
ped_crossing: 3.0
|
||
walkway: 1.5
|
||
stop_line: 4.0
|
||
carpark_area: 2.0
|
||
divider: 3.0
|
||
|
||
# Focal Loss参数
|
||
focal_alpha: 0.25
|
||
focal_gamma: 2.0
|
||
|
||
# Dice Loss配置
|
||
use_dice_loss: true
|
||
dice_weight: 0.5
|
||
|
||
# Deep Supervision
|
||
deep_supervision: true
|
||
|
||
# Decoder配置
|
||
decoder_channels: [256, 256, 128, 128]
|
||
|
||
# 损失权重(分割权重×5)
|
||
loss_scale:
|
||
object: 1.0
|
||
map: 5.0
|
||
|
||
# 训练epochs
|
||
max_epochs: 20
|
||
|
||
# 优化器(降低学习率)
|
||
optimizer:
|
||
lr: 1.0e-4
|
||
|
||
# 修改Collect3D,添加gt_masks_bev
|
||
train_pipeline:
|
||
-
|
||
type: LoadMultiViewImageFromFiles
|
||
to_float32: true
|
||
-
|
||
type: LoadPointsFromFile
|
||
coord_type: LIDAR
|
||
load_dim: ${load_dim}
|
||
use_dim: ${use_dim}
|
||
reduce_beams: ${reduce_beams}
|
||
load_augmented: ${load_augmented}
|
||
-
|
||
type: LoadPointsFromMultiSweeps
|
||
sweeps_num: 9
|
||
load_dim: ${load_dim}
|
||
use_dim: ${use_dim}
|
||
reduce_beams: ${reduce_beams}
|
||
pad_empty_sweeps: true
|
||
remove_close: true
|
||
load_augmented: ${load_augmented}
|
||
-
|
||
type: LoadAnnotations3D
|
||
with_bbox_3d: true
|
||
with_label_3d: true
|
||
with_attr_label: False
|
||
-
|
||
type: ObjectPaste
|
||
stop_epoch: ${gt_paste_stop_epoch}
|
||
db_sampler:
|
||
dataset_root: ${dataset_root}
|
||
info_path: ${dataset_root + "nuscenes_dbinfos_train.pkl"}
|
||
rate: 1.0
|
||
prepare:
|
||
filter_by_difficulty: [-1]
|
||
filter_by_min_points:
|
||
car: 5
|
||
truck: 5
|
||
bus: 5
|
||
trailer: 5
|
||
construction_vehicle: 5
|
||
traffic_cone: 5
|
||
barrier: 5
|
||
motorcycle: 5
|
||
bicycle: 5
|
||
pedestrian: 5
|
||
classes: ${object_classes}
|
||
sample_groups:
|
||
car: 2
|
||
truck: 3
|
||
construction_vehicle: 7
|
||
bus: 4
|
||
trailer: 6
|
||
barrier: 2
|
||
motorcycle: 6
|
||
bicycle: 6
|
||
pedestrian: 2
|
||
traffic_cone: 2
|
||
points_loader:
|
||
type: LoadPointsFromFile
|
||
coord_type: LIDAR
|
||
load_dim: ${load_dim}
|
||
use_dim: ${use_dim}
|
||
reduce_beams: ${reduce_beams}
|
||
-
|
||
type: ImageAug3D
|
||
final_dim: ${image_size}
|
||
resize_lim: ${augment2d.resize[0]}
|
||
bot_pct_lim: [0.0, 0.0]
|
||
rot_lim: ${augment2d.rotate}
|
||
rand_flip: true
|
||
is_train: true
|
||
-
|
||
type: GlobalRotScaleTrans
|
||
resize_lim: ${augment3d.scale}
|
||
rot_lim: ${augment3d.rotate}
|
||
trans_lim: ${augment3d.translate}
|
||
is_train: true
|
||
-
|
||
type: LoadBEVSegmentation
|
||
dataset_root: ${dataset_root}
|
||
xbound: [-50.0, 50.0, 0.5]
|
||
ybound: [-50.0, 50.0, 0.5]
|
||
classes: ${map_classes}
|
||
-
|
||
type: RandomFlip3D
|
||
-
|
||
type: PointsRangeFilter
|
||
point_cloud_range: ${point_cloud_range}
|
||
-
|
||
type: ObjectRangeFilter
|
||
point_cloud_range: ${point_cloud_range}
|
||
-
|
||
type: ObjectNameFilter
|
||
classes: ${object_classes}
|
||
-
|
||
type: ImageNormalize
|
||
mean: [0.485, 0.456, 0.406]
|
||
std: [0.229, 0.224, 0.225]
|
||
-
|
||
type: GridMask
|
||
use_h: true
|
||
use_w: true
|
||
max_epoch: ${max_epochs}
|
||
rotate: 1
|
||
offset: false
|
||
ratio: 0.5
|
||
mode: 1
|
||
prob: ${augment2d.gridmask.prob}
|
||
fixed_prob: ${augment2d.gridmask.fixed_prob}
|
||
-
|
||
type: PointShuffle
|
||
-
|
||
type: DefaultFormatBundle3D
|
||
classes: ${object_classes}
|
||
-
|
||
type: Collect3D
|
||
keys:
|
||
- img
|
||
- points
|
||
- gt_bboxes_3d
|
||
- gt_labels_3d
|
||
- gt_masks_bev
|
||
meta_keys:
|
||
- camera_intrinsics
|
||
- camera2ego
|
||
- lidar2ego
|
||
- lidar2camera
|
||
- camera2lidar
|
||
- lidar2image
|
||
- img_aug_matrix
|
||
- lidar_aug_matrix
|
||
-
|
||
type: GTDepth
|
||
keyframe_only: true
|
||
|
||
test_pipeline:
|
||
-
|
||
type: LoadMultiViewImageFromFiles
|
||
to_float32: true
|
||
-
|
||
type: LoadPointsFromFile
|
||
coord_type: LIDAR
|
||
load_dim: ${load_dim}
|
||
use_dim: ${use_dim}
|
||
reduce_beams: ${reduce_beams}
|
||
load_augmented: ${load_augmented}
|
||
-
|
||
type: LoadPointsFromMultiSweeps
|
||
sweeps_num: 9
|
||
load_dim: ${load_dim}
|
||
use_dim: ${use_dim}
|
||
reduce_beams: ${reduce_beams}
|
||
pad_empty_sweeps: true
|
||
remove_close: true
|
||
load_augmented: ${load_augmented}
|
||
-
|
||
type: LoadAnnotations3D
|
||
with_bbox_3d: true
|
||
with_label_3d: true
|
||
with_attr_label: False
|
||
-
|
||
type: ImageAug3D
|
||
final_dim: ${image_size}
|
||
resize_lim: ${augment2d.resize[1]}
|
||
bot_pct_lim: [0.0, 0.0]
|
||
rot_lim: [0.0, 0.0]
|
||
rand_flip: false
|
||
is_train: false
|
||
-
|
||
type: GlobalRotScaleTrans
|
||
resize_lim: [1.0, 1.0]
|
||
rot_lim: [0.0, 0.0]
|
||
trans_lim: 0.0
|
||
is_train: false
|
||
-
|
||
type: LoadBEVSegmentation
|
||
dataset_root: ${dataset_root}
|
||
xbound: [-50.0, 50.0, 0.5]
|
||
ybound: [-50.0, 50.0, 0.5]
|
||
classes: ${map_classes}
|
||
-
|
||
type: PointsRangeFilter
|
||
point_cloud_range: ${point_cloud_range}
|
||
-
|
||
type: ImageNormalize
|
||
mean: [0.485, 0.456, 0.406]
|
||
std: [0.229, 0.224, 0.225]
|
||
-
|
||
type: DefaultFormatBundle3D
|
||
classes: ${object_classes}
|
||
-
|
||
type: Collect3D
|
||
keys:
|
||
- img
|
||
- points
|
||
- gt_bboxes_3d
|
||
- gt_labels_3d
|
||
- gt_masks_bev
|
||
meta_keys:
|
||
- camera_intrinsics
|
||
- camera2ego
|
||
- lidar2ego
|
||
- lidar2camera
|
||
- camera2lidar
|
||
- lidar2image
|
||
- img_aug_matrix
|
||
- lidar_aug_matrix
|
||
-
|
||
type: GTDepth
|
||
keyframe_only: true
|
||
|