247 lines
5.4 KiB
YAML
247 lines
5.4 KiB
YAML
|
|
# 三任务配置: 3D检测 + BEV分割 + 矢量地图
|
|||
|
|
# 基于SwinTransformer + ConvFuser架构
|
|||
|
|
|
|||
|
|
_base_: ./default.yaml
|
|||
|
|
|
|||
|
|
# 模型配置
|
|||
|
|
model:
|
|||
|
|
type: BEVFusion
|
|||
|
|
|
|||
|
|
# Encoder配置(复用训练好的)
|
|||
|
|
encoders:
|
|||
|
|
camera:
|
|||
|
|
backbone:
|
|||
|
|
type: SwinTransformer
|
|||
|
|
embed_dims: 96
|
|||
|
|
depths: [2, 2, 6, 2]
|
|||
|
|
num_heads: [3, 6, 12, 24]
|
|||
|
|
window_size: 7
|
|||
|
|
mlp_ratio: 4
|
|||
|
|
qkv_bias: true
|
|||
|
|
qk_scale: null
|
|||
|
|
drop_rate: 0.
|
|||
|
|
attn_drop_rate: 0.
|
|||
|
|
drop_path_rate: 0.2
|
|||
|
|
patch_norm: true
|
|||
|
|
out_indices: [1, 2, 3]
|
|||
|
|
with_cp: false
|
|||
|
|
convert_weights: true
|
|||
|
|
init_cfg:
|
|||
|
|
type: Pretrained
|
|||
|
|
checkpoint: https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth
|
|||
|
|
|
|||
|
|
neck:
|
|||
|
|
type: GeneralizedLSSFPN
|
|||
|
|
in_channels: [192, 384, 768]
|
|||
|
|
out_channels: 256
|
|||
|
|
start_level: 0
|
|||
|
|
num_outs: 3
|
|||
|
|
norm_cfg:
|
|||
|
|
type: BN2d
|
|||
|
|
requires_grad: true
|
|||
|
|
act_cfg:
|
|||
|
|
type: ReLU
|
|||
|
|
inplace: true
|
|||
|
|
upsample_cfg:
|
|||
|
|
mode: bilinear
|
|||
|
|
align_corners: false
|
|||
|
|
|
|||
|
|
vtransform:
|
|||
|
|
type: DepthLSSTransform
|
|||
|
|
in_channels: 256
|
|||
|
|
out_channels: 80
|
|||
|
|
image_size: ${image_size}
|
|||
|
|
feature_size: [${[image_size[0] // 8, image_size[1] // 8]}]
|
|||
|
|
xbound: [-54.0, 54.0, 0.3]
|
|||
|
|
ybound: [-54.0, 54.0, 0.3]
|
|||
|
|
zbound: [-10.0, 10.0, 20.0]
|
|||
|
|
dbound: [1.0, 60.0, 0.5]
|
|||
|
|
downsample: 2
|
|||
|
|
|
|||
|
|
lidar:
|
|||
|
|
voxelize:
|
|||
|
|
max_num_points: 10
|
|||
|
|
point_cloud_range: ${point_cloud_range}
|
|||
|
|
voxel_size: ${voxel_size}
|
|||
|
|
max_voxels: [120000, 160000]
|
|||
|
|
|
|||
|
|
backbone:
|
|||
|
|
type: SparseEncoder
|
|||
|
|
in_channels: 5
|
|||
|
|
sparse_shape: [1440, 1440, 41]
|
|||
|
|
output_channels: 128
|
|||
|
|
order:
|
|||
|
|
- conv
|
|||
|
|
- norm
|
|||
|
|
- act
|
|||
|
|
encoder_channels:
|
|||
|
|
- [16, 16, 32]
|
|||
|
|
- [32, 32, 64]
|
|||
|
|
- [64, 64, 128]
|
|||
|
|
- [128, 128]
|
|||
|
|
encoder_paddings:
|
|||
|
|
- [0, 0, 1]
|
|||
|
|
- [0, 0, 1]
|
|||
|
|
- [0, 0, [1, 1, 0]]
|
|||
|
|
- [0, 0]
|
|||
|
|
block_type: basicblock
|
|||
|
|
|
|||
|
|
# Fuser配置
|
|||
|
|
fuser:
|
|||
|
|
type: ConvFuser
|
|||
|
|
in_channels: [80, 256]
|
|||
|
|
out_channels: 256
|
|||
|
|
|
|||
|
|
# Decoder配置
|
|||
|
|
decoder:
|
|||
|
|
backbone:
|
|||
|
|
type: SECOND
|
|||
|
|
in_channels: 256
|
|||
|
|
out_channels: [128, 256]
|
|||
|
|
layer_nums: [5, 5]
|
|||
|
|
layer_strides: [1, 2]
|
|||
|
|
norm_cfg:
|
|||
|
|
type: BN
|
|||
|
|
eps: 1.0e-3
|
|||
|
|
momentum: 0.01
|
|||
|
|
conv_cfg:
|
|||
|
|
type: Conv2d
|
|||
|
|
bias: false
|
|||
|
|
|
|||
|
|
neck:
|
|||
|
|
type: SECONDFPN
|
|||
|
|
in_channels: [128, 256]
|
|||
|
|
out_channels: [256, 256]
|
|||
|
|
upsample_strides: [1, 2]
|
|||
|
|
norm_cfg:
|
|||
|
|
type: BN
|
|||
|
|
eps: 1.0e-3
|
|||
|
|
momentum: 0.01
|
|||
|
|
upsample_cfg:
|
|||
|
|
type: deconv
|
|||
|
|
bias: false
|
|||
|
|
use_conv_for_no_stride: true
|
|||
|
|
|
|||
|
|
# 三个任务头
|
|||
|
|
heads:
|
|||
|
|
# 任务1: 3D目标检测
|
|||
|
|
object:
|
|||
|
|
type: TransFusionHead
|
|||
|
|
num_proposals: 200
|
|||
|
|
auxiliary: true
|
|||
|
|
in_channels: 512
|
|||
|
|
hidden_channel: 128
|
|||
|
|
num_classes: 10
|
|||
|
|
num_decoder_layers: 1
|
|||
|
|
num_heads: 8
|
|||
|
|
nms_kernel_size: 3
|
|||
|
|
bn_momentum: 0.1
|
|||
|
|
activation: relu
|
|||
|
|
common_heads:
|
|||
|
|
center: [2, 2]
|
|||
|
|
height: [1, 2]
|
|||
|
|
dim: [3, 2]
|
|||
|
|
rot: [2, 2]
|
|||
|
|
vel: [2, 2]
|
|||
|
|
bbox_coder:
|
|||
|
|
type: TransFusionBBoxCoder
|
|||
|
|
pc_range: ${point_cloud_range[:2]}
|
|||
|
|
voxel_size: ${voxel_size[:2]}
|
|||
|
|
out_size_factor: 8
|
|||
|
|
post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
|
|||
|
|
score_threshold: 0.0
|
|||
|
|
code_size: 10
|
|||
|
|
loss_cls:
|
|||
|
|
type: FocalLoss
|
|||
|
|
use_sigmoid: true
|
|||
|
|
gamma: 2.0
|
|||
|
|
alpha: 0.25
|
|||
|
|
reduction: mean
|
|||
|
|
loss_weight: 1.0
|
|||
|
|
loss_bbox:
|
|||
|
|
type: L1Loss
|
|||
|
|
reduction: mean
|
|||
|
|
loss_weight: 0.25
|
|||
|
|
loss_heatmap:
|
|||
|
|
type: GaussianFocalLoss
|
|||
|
|
reduction: mean
|
|||
|
|
loss_weight: 1.0
|
|||
|
|
|
|||
|
|
# 任务2: BEV地图分割
|
|||
|
|
map:
|
|||
|
|
type: VanillaSegmentationHead
|
|||
|
|
in_channels: 512
|
|||
|
|
num_classes: 6
|
|||
|
|
align_corners: false
|
|||
|
|
loss_decode:
|
|||
|
|
type: CrossEntropyLoss
|
|||
|
|
use_sigmoid: false
|
|||
|
|
class_weight: [1.0, 2.0, 2.0, 2.0, 2.0, 2.0]
|
|||
|
|
loss_weight: 1.0
|
|||
|
|
|
|||
|
|
# 任务3: 矢量地图预测 🆕
|
|||
|
|
vector_map:
|
|||
|
|
type: MapTRHead
|
|||
|
|
in_channels: 512
|
|||
|
|
num_classes: 3
|
|||
|
|
num_queries: 50
|
|||
|
|
num_points: 20
|
|||
|
|
embed_dims: 256
|
|||
|
|
num_decoder_layers: 6
|
|||
|
|
num_heads: 8
|
|||
|
|
dropout: 0.1
|
|||
|
|
loss_cls_weight: 2.0
|
|||
|
|
loss_reg_weight: 5.0
|
|||
|
|
loss_chamfer_weight: 2.0
|
|||
|
|
score_threshold: 0.3
|
|||
|
|
nms_threshold: 0.5
|
|||
|
|
|
|||
|
|
# 损失权重
|
|||
|
|
loss_scale:
|
|||
|
|
object: 1.0
|
|||
|
|
map: 1.0
|
|||
|
|
vector_map: 1.0 # 矢量地图任务权重
|
|||
|
|
|
|||
|
|
# 训练配置
|
|||
|
|
max_epochs: 20
|
|||
|
|
batch_size_per_gpu: 1
|
|||
|
|
num_workers_per_gpu: 0 # 避免shared memory问题
|
|||
|
|
|
|||
|
|
# 学习率(三任务可能需要稍微调整)
|
|||
|
|
optimizer:
|
|||
|
|
type: AdamW
|
|||
|
|
lr: 1.5e-4 # 稍微降低学习率
|
|||
|
|
weight_decay: 0.01
|
|||
|
|
|
|||
|
|
# 评估配置
|
|||
|
|
evaluation:
|
|||
|
|
interval: 1
|
|||
|
|
pipeline: ${val_pipeline}
|
|||
|
|
metric:
|
|||
|
|
- bbox # 3D检测mAP
|
|||
|
|
- map # BEV分割mIoU
|
|||
|
|
- vector # 矢量地图AP 🆕
|
|||
|
|
|
|||
|
|
# Checkpoint
|
|||
|
|
checkpoint_config:
|
|||
|
|
interval: 1
|
|||
|
|
max_keep_ckpts: 5
|
|||
|
|
|
|||
|
|
# 日志
|
|||
|
|
log_config:
|
|||
|
|
interval: 50
|
|||
|
|
hooks:
|
|||
|
|
- type: TextLoggerHook
|
|||
|
|
- type: TensorboardLoggerHook
|
|||
|
|
|
|||
|
|
# 运行时
|
|||
|
|
dist_params:
|
|||
|
|
backend: nccl
|
|||
|
|
|
|||
|
|
log_level: INFO
|
|||
|
|
work_dir: runs/three_tasks
|
|||
|
|
load_from: null # 从当前训练的多任务模型加载
|
|||
|
|
resume_from: null
|
|||
|
|
workflow: [['train', 1]]
|
|||
|
|
|