284 lines
6.3 KiB
YAML
284 lines
6.3 KiB
YAML
|
|
# 增强版多任务配置:检测 + 分割(Enhanced Segmentation Head)
|
|||
|
|
#
|
|||
|
|
# 改进点:
|
|||
|
|
# 1. EnhancedBEVSegmentationHead (ASPP + 双注意力 + 深层解码器)
|
|||
|
|
# 2. Focal Loss修复 (alpha=0.25)
|
|||
|
|
# 3. Dice Loss混合 (weight=0.5)
|
|||
|
|
# 4. 类别权重平衡 (stop_line×4, divider×3)
|
|||
|
|
# 5. Deep supervision
|
|||
|
|
# 6. 分割损失权重×5
|
|||
|
|
# 7. 学习率降低(复用权重)
|
|||
|
|
#
|
|||
|
|
# 复用: epoch_19.pth (93.7%参数)
|
|||
|
|
# 预期: mIoU 60-65% (2-3天)
|
|||
|
|
|
|||
|
|
_base_: ./convfuser.yaml
|
|||
|
|
|
|||
|
|
# 添加分割头和pipeline
|
|||
|
|
model:
|
|||
|
|
# 确保fuser配置正确
|
|||
|
|
fuser:
|
|||
|
|
type: ConvFuser
|
|||
|
|
in_channels: [80, 256]
|
|||
|
|
out_channels: 256
|
|||
|
|
|
|||
|
|
heads:
|
|||
|
|
# object head从convfuser.yaml继承
|
|||
|
|
# 添加增强版map head
|
|||
|
|
map:
|
|||
|
|
type: EnhancedBEVSegmentationHead # ← 使用增强版!
|
|||
|
|
in_channels: 512
|
|||
|
|
grid_transform:
|
|||
|
|
input_scope: [[-54.0, 54.0, 0.75], [-54.0, 54.0, 0.75]]
|
|||
|
|
output_scope: [[-50, 50, 0.5], [-50, 50, 0.5]]
|
|||
|
|
classes: ${map_classes}
|
|||
|
|
loss: focal
|
|||
|
|
|
|||
|
|
# 类别权重(针对nuScenes不平衡)
|
|||
|
|
loss_weight:
|
|||
|
|
drivable_area: 1.0
|
|||
|
|
ped_crossing: 3.0
|
|||
|
|
walkway: 1.5
|
|||
|
|
stop_line: 4.0
|
|||
|
|
carpark_area: 2.0
|
|||
|
|
divider: 3.0
|
|||
|
|
|
|||
|
|
# Focal Loss参数
|
|||
|
|
focal_alpha: 0.25
|
|||
|
|
focal_gamma: 2.0
|
|||
|
|
|
|||
|
|
# Dice Loss配置
|
|||
|
|
use_dice_loss: true
|
|||
|
|
dice_weight: 0.5
|
|||
|
|
|
|||
|
|
# Deep Supervision
|
|||
|
|
deep_supervision: true
|
|||
|
|
|
|||
|
|
# Decoder配置
|
|||
|
|
decoder_channels: [256, 256, 128, 128]
|
|||
|
|
|
|||
|
|
# 损失权重(分割权重×5)
|
|||
|
|
loss_scale:
|
|||
|
|
object: 1.0
|
|||
|
|
map: 5.0
|
|||
|
|
|
|||
|
|
# 训练epochs
|
|||
|
|
max_epochs: 20
|
|||
|
|
|
|||
|
|
# 优化器(降低学习率)
|
|||
|
|
optimizer:
|
|||
|
|
lr: 1.0e-4
|
|||
|
|
|
|||
|
|
# 修改Collect3D,添加gt_masks_bev
|
|||
|
|
train_pipeline:
|
|||
|
|
-
|
|||
|
|
type: LoadMultiViewImageFromFiles
|
|||
|
|
to_float32: true
|
|||
|
|
-
|
|||
|
|
type: LoadPointsFromFile
|
|||
|
|
coord_type: LIDAR
|
|||
|
|
load_dim: ${load_dim}
|
|||
|
|
use_dim: ${use_dim}
|
|||
|
|
reduce_beams: ${reduce_beams}
|
|||
|
|
load_augmented: ${load_augmented}
|
|||
|
|
-
|
|||
|
|
type: LoadPointsFromMultiSweeps
|
|||
|
|
sweeps_num: 9
|
|||
|
|
load_dim: ${load_dim}
|
|||
|
|
use_dim: ${use_dim}
|
|||
|
|
reduce_beams: ${reduce_beams}
|
|||
|
|
pad_empty_sweeps: true
|
|||
|
|
remove_close: true
|
|||
|
|
load_augmented: ${load_augmented}
|
|||
|
|
-
|
|||
|
|
type: LoadAnnotations3D
|
|||
|
|
with_bbox_3d: true
|
|||
|
|
with_label_3d: true
|
|||
|
|
with_attr_label: False
|
|||
|
|
-
|
|||
|
|
type: ObjectPaste
|
|||
|
|
stop_epoch: ${gt_paste_stop_epoch}
|
|||
|
|
db_sampler:
|
|||
|
|
dataset_root: ${dataset_root}
|
|||
|
|
info_path: ${dataset_root + "nuscenes_dbinfos_train.pkl"}
|
|||
|
|
rate: 1.0
|
|||
|
|
prepare:
|
|||
|
|
filter_by_difficulty: [-1]
|
|||
|
|
filter_by_min_points:
|
|||
|
|
car: 5
|
|||
|
|
truck: 5
|
|||
|
|
bus: 5
|
|||
|
|
trailer: 5
|
|||
|
|
construction_vehicle: 5
|
|||
|
|
traffic_cone: 5
|
|||
|
|
barrier: 5
|
|||
|
|
motorcycle: 5
|
|||
|
|
bicycle: 5
|
|||
|
|
pedestrian: 5
|
|||
|
|
classes: ${object_classes}
|
|||
|
|
sample_groups:
|
|||
|
|
car: 2
|
|||
|
|
truck: 3
|
|||
|
|
construction_vehicle: 7
|
|||
|
|
bus: 4
|
|||
|
|
trailer: 6
|
|||
|
|
barrier: 2
|
|||
|
|
motorcycle: 6
|
|||
|
|
bicycle: 6
|
|||
|
|
pedestrian: 2
|
|||
|
|
traffic_cone: 2
|
|||
|
|
points_loader:
|
|||
|
|
type: LoadPointsFromFile
|
|||
|
|
coord_type: LIDAR
|
|||
|
|
load_dim: ${load_dim}
|
|||
|
|
use_dim: ${use_dim}
|
|||
|
|
reduce_beams: ${reduce_beams}
|
|||
|
|
-
|
|||
|
|
type: ImageAug3D
|
|||
|
|
final_dim: ${image_size}
|
|||
|
|
resize_lim: ${augment2d.resize[0]}
|
|||
|
|
bot_pct_lim: [0.0, 0.0]
|
|||
|
|
rot_lim: ${augment2d.rotate}
|
|||
|
|
rand_flip: true
|
|||
|
|
is_train: true
|
|||
|
|
-
|
|||
|
|
type: GlobalRotScaleTrans
|
|||
|
|
resize_lim: ${augment3d.scale}
|
|||
|
|
rot_lim: ${augment3d.rotate}
|
|||
|
|
trans_lim: ${augment3d.translate}
|
|||
|
|
is_train: true
|
|||
|
|
-
|
|||
|
|
type: LoadBEVSegmentation
|
|||
|
|
dataset_root: ${dataset_root}
|
|||
|
|
xbound: [-50.0, 50.0, 0.5]
|
|||
|
|
ybound: [-50.0, 50.0, 0.5]
|
|||
|
|
classes: ${map_classes}
|
|||
|
|
-
|
|||
|
|
type: RandomFlip3D
|
|||
|
|
-
|
|||
|
|
type: PointsRangeFilter
|
|||
|
|
point_cloud_range: ${point_cloud_range}
|
|||
|
|
-
|
|||
|
|
type: ObjectRangeFilter
|
|||
|
|
point_cloud_range: ${point_cloud_range}
|
|||
|
|
-
|
|||
|
|
type: ObjectNameFilter
|
|||
|
|
classes: ${object_classes}
|
|||
|
|
-
|
|||
|
|
type: ImageNormalize
|
|||
|
|
mean: [0.485, 0.456, 0.406]
|
|||
|
|
std: [0.229, 0.224, 0.225]
|
|||
|
|
-
|
|||
|
|
type: GridMask
|
|||
|
|
use_h: true
|
|||
|
|
use_w: true
|
|||
|
|
max_epoch: ${max_epochs}
|
|||
|
|
rotate: 1
|
|||
|
|
offset: false
|
|||
|
|
ratio: 0.5
|
|||
|
|
mode: 1
|
|||
|
|
prob: ${augment2d.gridmask.prob}
|
|||
|
|
fixed_prob: ${augment2d.gridmask.fixed_prob}
|
|||
|
|
-
|
|||
|
|
type: PointShuffle
|
|||
|
|
-
|
|||
|
|
type: DefaultFormatBundle3D
|
|||
|
|
classes: ${object_classes}
|
|||
|
|
-
|
|||
|
|
type: Collect3D
|
|||
|
|
keys:
|
|||
|
|
- img
|
|||
|
|
- points
|
|||
|
|
- gt_bboxes_3d
|
|||
|
|
- gt_labels_3d
|
|||
|
|
- gt_masks_bev
|
|||
|
|
meta_keys:
|
|||
|
|
- camera_intrinsics
|
|||
|
|
- camera2ego
|
|||
|
|
- lidar2ego
|
|||
|
|
- lidar2camera
|
|||
|
|
- camera2lidar
|
|||
|
|
- lidar2image
|
|||
|
|
- img_aug_matrix
|
|||
|
|
- lidar_aug_matrix
|
|||
|
|
-
|
|||
|
|
type: GTDepth
|
|||
|
|
keyframe_only: true
|
|||
|
|
|
|||
|
|
test_pipeline:
|
|||
|
|
-
|
|||
|
|
type: LoadMultiViewImageFromFiles
|
|||
|
|
to_float32: true
|
|||
|
|
-
|
|||
|
|
type: LoadPointsFromFile
|
|||
|
|
coord_type: LIDAR
|
|||
|
|
load_dim: ${load_dim}
|
|||
|
|
use_dim: ${use_dim}
|
|||
|
|
reduce_beams: ${reduce_beams}
|
|||
|
|
load_augmented: ${load_augmented}
|
|||
|
|
-
|
|||
|
|
type: LoadPointsFromMultiSweeps
|
|||
|
|
sweeps_num: 9
|
|||
|
|
load_dim: ${load_dim}
|
|||
|
|
use_dim: ${use_dim}
|
|||
|
|
reduce_beams: ${reduce_beams}
|
|||
|
|
pad_empty_sweeps: true
|
|||
|
|
remove_close: true
|
|||
|
|
load_augmented: ${load_augmented}
|
|||
|
|
-
|
|||
|
|
type: LoadAnnotations3D
|
|||
|
|
with_bbox_3d: true
|
|||
|
|
with_label_3d: true
|
|||
|
|
with_attr_label: False
|
|||
|
|
-
|
|||
|
|
type: ImageAug3D
|
|||
|
|
final_dim: ${image_size}
|
|||
|
|
resize_lim: ${augment2d.resize[1]}
|
|||
|
|
bot_pct_lim: [0.0, 0.0]
|
|||
|
|
rot_lim: [0.0, 0.0]
|
|||
|
|
rand_flip: false
|
|||
|
|
is_train: false
|
|||
|
|
-
|
|||
|
|
type: GlobalRotScaleTrans
|
|||
|
|
resize_lim: [1.0, 1.0]
|
|||
|
|
rot_lim: [0.0, 0.0]
|
|||
|
|
trans_lim: 0.0
|
|||
|
|
is_train: false
|
|||
|
|
-
|
|||
|
|
type: LoadBEVSegmentation
|
|||
|
|
dataset_root: ${dataset_root}
|
|||
|
|
xbound: [-50.0, 50.0, 0.5]
|
|||
|
|
ybound: [-50.0, 50.0, 0.5]
|
|||
|
|
classes: ${map_classes}
|
|||
|
|
-
|
|||
|
|
type: PointsRangeFilter
|
|||
|
|
point_cloud_range: ${point_cloud_range}
|
|||
|
|
-
|
|||
|
|
type: ImageNormalize
|
|||
|
|
mean: [0.485, 0.456, 0.406]
|
|||
|
|
std: [0.229, 0.224, 0.225]
|
|||
|
|
-
|
|||
|
|
type: DefaultFormatBundle3D
|
|||
|
|
classes: ${object_classes}
|
|||
|
|
-
|
|||
|
|
type: Collect3D
|
|||
|
|
keys:
|
|||
|
|
- img
|
|||
|
|
- points
|
|||
|
|
- gt_bboxes_3d
|
|||
|
|
- gt_labels_3d
|
|||
|
|
- gt_masks_bev
|
|||
|
|
meta_keys:
|
|||
|
|
- camera_intrinsics
|
|||
|
|
- camera2ego
|
|||
|
|
- lidar2ego
|
|||
|
|
- lidar2camera
|
|||
|
|
- camera2lidar
|
|||
|
|
- lidar2image
|
|||
|
|
- img_aug_matrix
|
|||
|
|
- lidar_aug_matrix
|
|||
|
|
-
|
|||
|
|
type: GTDepth
|
|||
|
|
keyframe_only: true
|
|||
|
|
|