bev-project/configs/nuscenes/three_tasks/bevfusion_det_seg_vec.yaml

247 lines
5.4 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 三任务配置: 3D检测 + BEV分割 + 矢量地图
# 基于SwinTransformer + ConvFuser架构
_base_: ./default.yaml
# 模型配置
model:
type: BEVFusion
# Encoder配置复用训练好的
encoders:
camera:
backbone:
type: SwinTransformer
embed_dims: 96
depths: [2, 2, 6, 2]
num_heads: [3, 6, 12, 24]
window_size: 7
mlp_ratio: 4
qkv_bias: true
qk_scale: null
drop_rate: 0.
attn_drop_rate: 0.
drop_path_rate: 0.2
patch_norm: true
out_indices: [1, 2, 3]
with_cp: false
convert_weights: true
init_cfg:
type: Pretrained
checkpoint: https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth
neck:
type: GeneralizedLSSFPN
in_channels: [192, 384, 768]
out_channels: 256
start_level: 0
num_outs: 3
norm_cfg:
type: BN2d
requires_grad: true
act_cfg:
type: ReLU
inplace: true
upsample_cfg:
mode: bilinear
align_corners: false
vtransform:
type: DepthLSSTransform
in_channels: 256
out_channels: 80
image_size: ${image_size}
feature_size: [${[image_size[0] // 8, image_size[1] // 8]}]
xbound: [-54.0, 54.0, 0.3]
ybound: [-54.0, 54.0, 0.3]
zbound: [-10.0, 10.0, 20.0]
dbound: [1.0, 60.0, 0.5]
downsample: 2
lidar:
voxelize:
max_num_points: 10
point_cloud_range: ${point_cloud_range}
voxel_size: ${voxel_size}
max_voxels: [120000, 160000]
backbone:
type: SparseEncoder
in_channels: 5
sparse_shape: [1440, 1440, 41]
output_channels: 128
order:
- conv
- norm
- act
encoder_channels:
- [16, 16, 32]
- [32, 32, 64]
- [64, 64, 128]
- [128, 128]
encoder_paddings:
- [0, 0, 1]
- [0, 0, 1]
- [0, 0, [1, 1, 0]]
- [0, 0]
block_type: basicblock
# Fuser配置
fuser:
type: ConvFuser
in_channels: [80, 256]
out_channels: 256
# Decoder配置
decoder:
backbone:
type: SECOND
in_channels: 256
out_channels: [128, 256]
layer_nums: [5, 5]
layer_strides: [1, 2]
norm_cfg:
type: BN
eps: 1.0e-3
momentum: 0.01
conv_cfg:
type: Conv2d
bias: false
neck:
type: SECONDFPN
in_channels: [128, 256]
out_channels: [256, 256]
upsample_strides: [1, 2]
norm_cfg:
type: BN
eps: 1.0e-3
momentum: 0.01
upsample_cfg:
type: deconv
bias: false
use_conv_for_no_stride: true
# 三个任务头
heads:
# 任务1: 3D目标检测
object:
type: TransFusionHead
num_proposals: 200
auxiliary: true
in_channels: 512
hidden_channel: 128
num_classes: 10
num_decoder_layers: 1
num_heads: 8
nms_kernel_size: 3
bn_momentum: 0.1
activation: relu
common_heads:
center: [2, 2]
height: [1, 2]
dim: [3, 2]
rot: [2, 2]
vel: [2, 2]
bbox_coder:
type: TransFusionBBoxCoder
pc_range: ${point_cloud_range[:2]}
voxel_size: ${voxel_size[:2]}
out_size_factor: 8
post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
score_threshold: 0.0
code_size: 10
loss_cls:
type: FocalLoss
use_sigmoid: true
gamma: 2.0
alpha: 0.25
reduction: mean
loss_weight: 1.0
loss_bbox:
type: L1Loss
reduction: mean
loss_weight: 0.25
loss_heatmap:
type: GaussianFocalLoss
reduction: mean
loss_weight: 1.0
# 任务2: BEV地图分割
map:
type: VanillaSegmentationHead
in_channels: 512
num_classes: 6
align_corners: false
loss_decode:
type: CrossEntropyLoss
use_sigmoid: false
class_weight: [1.0, 2.0, 2.0, 2.0, 2.0, 2.0]
loss_weight: 1.0
# 任务3: 矢量地图预测 🆕
vector_map:
type: MapTRHead
in_channels: 512
num_classes: 3
num_queries: 50
num_points: 20
embed_dims: 256
num_decoder_layers: 6
num_heads: 8
dropout: 0.1
loss_cls_weight: 2.0
loss_reg_weight: 5.0
loss_chamfer_weight: 2.0
score_threshold: 0.3
nms_threshold: 0.5
# 损失权重
loss_scale:
object: 1.0
map: 1.0
vector_map: 1.0 # 矢量地图任务权重
# 训练配置
max_epochs: 20
batch_size_per_gpu: 1
num_workers_per_gpu: 0 # 避免shared memory问题
# 学习率(三任务可能需要稍微调整)
optimizer:
type: AdamW
lr: 1.5e-4 # 稍微降低学习率
weight_decay: 0.01
# 评估配置
evaluation:
interval: 1
pipeline: ${val_pipeline}
metric:
- bbox # 3D检测mAP
- map # BEV分割mIoU
- vector # 矢量地图AP 🆕
# Checkpoint
checkpoint_config:
interval: 1
max_keep_ckpts: 5
# 日志
log_config:
interval: 50
hooks:
- type: TextLoggerHook
- type: TensorboardLoggerHook
# 运行时
dist_params:
backend: nccl
log_level: INFO
work_dir: runs/three_tasks
load_from: null # 从当前训练的多任务模型加载
resume_from: null
workflow: [['train', 1]]