bev-project/configs/nuscenes/det/centerhead/lssfpn/camera+radar/resnet50/default.yaml

84 lines
1.7 KiB
YAML
Raw Normal View History

2023-07-08 10:53:36 +08:00
image_size: [256, 704]
model:
encoders:
camera:
backbone:
type: ResNet
depth: 50
num_stages: 4
out_indices: [0, 1, 2, 3]
norm_cfg:
type: BN2d
requires_grad: true
norm_eval: false
init_cfg:
type: Pretrained
checkpoint: torchvision://resnet50
neck:
type: SECONDFPN
in_channels: [256, 512, 1024, 2048]
out_channels: [128, 128, 128, 128]
upsample_strides: [0.25, 0.5, 1, 2]
vtransform:
type: LSSTransform
in_channels: 512
out_channels: 64
image_size: ${image_size}
feature_size: ${[image_size[0] // 16, image_size[1] // 16]}
xbound: [-51.2, 51.2, 0.8]
ybound: [-51.2, 51.2, 0.8]
zbound: [-10.0, 10.0, 20.0]
dbound: [1.0, 60.0, 1.0]
downsample: 1
decoder:
backbone:
type: GeneralizedResNet
in_channels: 64
blocks:
- [2, 128, 2]
- [2, 256, 2]
- [2, 512, 1]
neck:
type: LSSFPN
in_indices: [-1, 0]
in_channels: [512, 128]
out_channels: 256
scale_factor: 2
heads:
object:
train_cfg:
code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
fuser:
type: ConvFuser
in_channels: [64, 64]
out_channels: 64
optimizer:
paramwise_cfg:
custom_keys:
absolute_pos_embed:
decay_mult: 0
relative_position_bias_table:
decay_mult: 0
# encoders.camera.backbone:
# lr_mult: 0.1
# lr_config:
# policy: cyclic
# target_ratio: 5.0
# cyclic_times: 1
# step_ratio_up: 0.4
# momentum_config:
# policy: cyclic
# cyclic_times: 1
# step_ratio_up: 0.4
data:
samples_per_gpu: 4