2022-06-03 12:21:18 +08:00
|
|
|
model:
|
|
|
|
|
encoders:
|
|
|
|
|
camera:
|
|
|
|
|
backbone:
|
|
|
|
|
type: SwinTransformer
|
|
|
|
|
embed_dims: 96
|
|
|
|
|
depths: [2, 2, 6, 2]
|
|
|
|
|
num_heads: [3, 6, 12, 24]
|
|
|
|
|
window_size: 7
|
|
|
|
|
mlp_ratio: 4
|
|
|
|
|
qkv_bias: true
|
|
|
|
|
qk_scale: null
|
|
|
|
|
drop_rate: 0.
|
|
|
|
|
attn_drop_rate: 0.
|
|
|
|
|
drop_path_rate: 0.3
|
|
|
|
|
patch_norm: true
|
|
|
|
|
out_indices: [1, 2, 3]
|
|
|
|
|
with_cp: false
|
|
|
|
|
convert_weights: true
|
2022-12-05 11:47:29 +08:00
|
|
|
init_cfg:
|
|
|
|
|
type: Pretrained
|
2025-11-14 17:06:09 +08:00
|
|
|
checkpoint: /bevfusion/pretrained/swin_tiny_patch4_window7_224.pth
|
2022-06-03 12:21:18 +08:00
|
|
|
neck:
|
|
|
|
|
type: GeneralizedLSSFPN
|
|
|
|
|
in_channels: [192, 384, 768]
|
|
|
|
|
out_channels: 256
|
|
|
|
|
start_level: 0
|
|
|
|
|
num_outs: 3
|
|
|
|
|
norm_cfg:
|
|
|
|
|
type: BN2d
|
|
|
|
|
requires_grad: true
|
|
|
|
|
act_cfg:
|
|
|
|
|
type: ReLU
|
|
|
|
|
inplace: true
|
|
|
|
|
upsample_cfg:
|
|
|
|
|
mode: bilinear
|
|
|
|
|
align_corners: false
|
|
|
|
|
vtransform:
|
|
|
|
|
type: LSSTransform
|
|
|
|
|
in_channels: 256
|
|
|
|
|
out_channels: 80
|
|
|
|
|
image_size: ${image_size}
|
|
|
|
|
feature_size: ${[image_size[0] // 8, image_size[1] // 8]}
|
|
|
|
|
xbound: [-51.2, 51.2, 0.4]
|
|
|
|
|
ybound: [-51.2, 51.2, 0.4]
|
|
|
|
|
zbound: [-10.0, 10.0, 20.0]
|
|
|
|
|
dbound: [1.0, 60.0, 0.5]
|
|
|
|
|
downsample: 2
|
|
|
|
|
lidar:
|
|
|
|
|
voxelize:
|
|
|
|
|
max_num_points: 10
|
|
|
|
|
point_cloud_range: ${point_cloud_range}
|
|
|
|
|
voxel_size: ${voxel_size}
|
|
|
|
|
max_voxels: [90000, 120000]
|
|
|
|
|
backbone:
|
|
|
|
|
type: SparseEncoder
|
|
|
|
|
in_channels: 5
|
|
|
|
|
sparse_shape: [1024, 1024, 41]
|
|
|
|
|
output_channels: 128
|
|
|
|
|
order:
|
|
|
|
|
- conv
|
|
|
|
|
- norm
|
|
|
|
|
- act
|
|
|
|
|
encoder_channels:
|
|
|
|
|
- [16, 16, 32]
|
|
|
|
|
- [32, 32, 64]
|
|
|
|
|
- [64, 64, 128]
|
|
|
|
|
- [128, 128]
|
|
|
|
|
encoder_paddings:
|
|
|
|
|
- [0, 0, 1]
|
|
|
|
|
- [0, 0, 1]
|
|
|
|
|
- [0, 0, [1, 1, 0]]
|
|
|
|
|
- [0, 0]
|
|
|
|
|
block_type: basicblock
|
|
|
|
|
fuser:
|
|
|
|
|
type: ConvFuser
|
|
|
|
|
in_channels: [80, 256]
|
|
|
|
|
out_channels: 256
|
|
|
|
|
decoder:
|
|
|
|
|
backbone:
|
|
|
|
|
type: SECOND
|
|
|
|
|
in_channels: 256
|
|
|
|
|
out_channels: [128, 256]
|
|
|
|
|
layer_nums: [5, 5]
|
|
|
|
|
layer_strides: [1, 2]
|
|
|
|
|
norm_cfg:
|
|
|
|
|
type: BN
|
|
|
|
|
eps: 1.0e-3
|
|
|
|
|
momentum: 0.01
|
|
|
|
|
conv_cfg:
|
|
|
|
|
type: Conv2d
|
|
|
|
|
bias: false
|
|
|
|
|
neck:
|
|
|
|
|
type: SECONDFPN
|
|
|
|
|
in_channels: [128, 256]
|
|
|
|
|
out_channels: [256, 256]
|
|
|
|
|
upsample_strides: [1, 2]
|
|
|
|
|
norm_cfg:
|
|
|
|
|
type: BN
|
|
|
|
|
eps: 1.0e-3
|
|
|
|
|
momentum: 0.01
|
|
|
|
|
upsample_cfg:
|
|
|
|
|
type: deconv
|
|
|
|
|
bias: false
|
|
|
|
|
use_conv_for_no_stride: true
|
|
|
|
|
heads:
|
|
|
|
|
map:
|
|
|
|
|
in_channels: 512
|
|
|
|
|
|
|
|
|
|
optimizer:
|
|
|
|
|
type: AdamW
|
|
|
|
|
lr: 1.0e-4
|
2022-12-05 11:47:29 +08:00
|
|
|
weight_decay: 0.01
|
|
|
|
|
paramwise_cfg:
|
|
|
|
|
custom_keys:
|
|
|
|
|
absolute_pos_embed:
|
|
|
|
|
decay_mult: 0
|
|
|
|
|
relative_position_bias_table:
|
|
|
|
|
decay_mult: 0
|
|
|
|
|
|
|
|
|
|
optimizer_config:
|
|
|
|
|
grad_clip:
|
|
|
|
|
max_norm: 35
|
|
|
|
|
norm_type: 2
|
|
|
|
|
|
|
|
|
|
lr_config:
|
|
|
|
|
policy: cyclic
|
|
|
|
|
|
|
|
|
|
momentum_config:
|
|
|
|
|
policy: cyclic
|