84 lines
1.7 KiB
YAML
84 lines
1.7 KiB
YAML
|
|
|
|
image_size: [256, 704]
|
|
|
|
model:
|
|
encoders:
|
|
camera:
|
|
backbone:
|
|
type: ResNet
|
|
depth: 50
|
|
num_stages: 4
|
|
out_indices: [0, 1, 2, 3]
|
|
norm_cfg:
|
|
type: BN2d
|
|
requires_grad: true
|
|
norm_eval: false
|
|
init_cfg:
|
|
type: Pretrained
|
|
checkpoint: torchvision://resnet50
|
|
neck:
|
|
type: SECONDFPN
|
|
in_channels: [256, 512, 1024, 2048]
|
|
out_channels: [128, 128, 128, 128]
|
|
upsample_strides: [0.25, 0.5, 1, 2]
|
|
vtransform:
|
|
type: LSSTransform
|
|
in_channels: 512
|
|
out_channels: 64
|
|
image_size: ${image_size}
|
|
feature_size: ${[image_size[0] // 16, image_size[1] // 16]}
|
|
xbound: [-51.2, 51.2, 0.8]
|
|
ybound: [-51.2, 51.2, 0.8]
|
|
zbound: [-10.0, 10.0, 20.0]
|
|
dbound: [1.0, 60.0, 1.0]
|
|
downsample: 1
|
|
decoder:
|
|
backbone:
|
|
type: GeneralizedResNet
|
|
in_channels: 64
|
|
blocks:
|
|
- [2, 128, 2]
|
|
- [2, 256, 2]
|
|
- [2, 512, 1]
|
|
neck:
|
|
type: LSSFPN
|
|
in_indices: [-1, 0]
|
|
in_channels: [512, 128]
|
|
out_channels: 256
|
|
scale_factor: 2
|
|
heads:
|
|
object:
|
|
train_cfg:
|
|
code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
|
|
fuser:
|
|
type: ConvFuser
|
|
in_channels: [64, 64]
|
|
out_channels: 64
|
|
|
|
|
|
optimizer:
|
|
paramwise_cfg:
|
|
custom_keys:
|
|
absolute_pos_embed:
|
|
decay_mult: 0
|
|
relative_position_bias_table:
|
|
decay_mult: 0
|
|
# encoders.camera.backbone:
|
|
# lr_mult: 0.1
|
|
|
|
|
|
# lr_config:
|
|
# policy: cyclic
|
|
# target_ratio: 5.0
|
|
# cyclic_times: 1
|
|
# step_ratio_up: 0.4
|
|
|
|
# momentum_config:
|
|
# policy: cyclic
|
|
# cyclic_times: 1
|
|
# step_ratio_up: 0.4
|
|
|
|
data:
|
|
samples_per_gpu: 4
|