bev-project/configs/nuscenes/det/transfusion/secfpn/camera+lidar/default.yaml

63 lines
1.4 KiB
YAML

model:
encoders:
camera:
neck:
type: GeneralizedLSSFPN
in_channels: [512, 1024, 2048]
out_channels: 256
start_level: 0
num_outs: 3
norm_cfg:
type: BN2d
requires_grad: true
act_cfg:
type: ReLU
inplace: true
upsample_cfg:
mode: bilinear
align_corners: false
vtransform:
type: DepthLSSTransform
in_channels: 256
out_channels: 80
image_size: ${image_size}
feature_size: ${[image_size[0] // 8, image_size[1] // 8]}
xbound: [-51.2, 51.2, 0.4]
ybound: [-51.2, 51.2, 0.4]
zbound: [-10.0, 10.0, 20.0]
dbound: [1.0, 60.0, 0.5]
downsample: 2
lidar:
voxelize:
max_num_points: 10
point_cloud_range: ${point_cloud_range}
voxel_size: ${voxel_size}
max_voxels: [90000, 120000]
backbone:
type: SparseEncoder
in_channels: 5
sparse_shape: [1024, 1024, 41]
output_channels: 128
order:
- conv
- norm
- act
encoder_channels:
- [16, 16, 32]
- [32, 32, 64]
- [64, 64, 128]
- [128, 128]
encoder_paddings:
- [0, 0, 1]
- [0, 0, 1]
- [0, 0, [1, 1, 0]]
- [0, 0]
block_type: basicblock
lr_config: null
optimizer:
lr: 2.0e-4
max_epochs: 6