image_size: [256, 704] model: encoders: camera: backbone: type: ResNet depth: 50 num_stages: 4 out_indices: [0, 1, 2, 3] norm_cfg: type: BN2d requires_grad: true norm_eval: false init_cfg: type: Pretrained checkpoint: torchvision://resnet50 neck: type: SECONDFPN in_channels: [256, 512, 1024, 2048] out_channels: [128, 128, 128, 128] upsample_strides: [0.25, 0.5, 1, 2] vtransform: type: LSSTransform in_channels: 512 out_channels: 64 image_size: ${image_size} feature_size: ${[image_size[0] // 16, image_size[1] // 16]} xbound: [-51.2, 51.2, 0.8] ybound: [-51.2, 51.2, 0.8] zbound: [-10.0, 10.0, 20.0] dbound: [1.0, 60.0, 1.0] downsample: 1 decoder: backbone: type: GeneralizedResNet in_channels: 64 blocks: - [2, 128, 2] - [2, 256, 2] - [2, 512, 1] neck: type: LSSFPN in_indices: [-1, 0] in_channels: [512, 128] out_channels: 256 scale_factor: 2 heads: object: train_cfg: code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] fuser: type: ConvFuser in_channels: [64, 64] out_channels: 64 optimizer: paramwise_cfg: custom_keys: absolute_pos_embed: decay_mult: 0 relative_position_bias_table: decay_mult: 0 # encoders.camera.backbone: # lr_mult: 0.1 # lr_config: # policy: cyclic # target_ratio: 5.0 # cyclic_times: 1 # step_ratio_up: 0.4 # momentum_config: # policy: cyclic # cyclic_times: 1 # step_ratio_up: 0.4 data: samples_per_gpu: 4