bev-project/configs/nuscenes/det/centerhead/lssfpn/camera+radar/resnet50/default.yaml



image_size: [256, 704]

model:
  encoders:
    camera:
      backbone:
        type: ResNet
        depth: 50
        num_stages: 4
        out_indices: [0, 1, 2, 3] 
        norm_cfg:
          type: BN2d
          requires_grad: true
        norm_eval: false
        init_cfg:
          type: Pretrained
          checkpoint: torchvision://resnet50
      neck:
        type: SECONDFPN
        in_channels: [256, 512, 1024, 2048]
        out_channels: [128, 128, 128, 128]
        upsample_strides: [0.25, 0.5, 1, 2]    
      vtransform:
        type: LSSTransform
        in_channels: 512
        out_channels: 64
        image_size: ${image_size}
        feature_size: ${[image_size[0] // 16, image_size[1] // 16]}
        xbound: [-51.2, 51.2, 0.8]
        ybound: [-51.2, 51.2, 0.8]
        zbound: [-10.0, 10.0, 20.0]
        dbound: [1.0, 60.0, 1.0]
        downsample: 1
  decoder:
    backbone:
      type: GeneralizedResNet
      in_channels: 64
      blocks:
        - [2, 128, 2]
        - [2, 256, 2]
        - [2, 512, 1]
    neck:
      type: LSSFPN
      in_indices: [-1, 0]
      in_channels: [512, 128]
      out_channels: 256
      scale_factor: 2
  heads:
    object:
      train_cfg:
        code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
  fuser:
    type: ConvFuser
    in_channels: [64, 64]
    out_channels: 64


optimizer:
  paramwise_cfg:
    custom_keys:
      absolute_pos_embed: 
        decay_mult: 0
      relative_position_bias_table:
        decay_mult: 0
      # encoders.camera.backbone:
      #   lr_mult: 0.1


# lr_config:
#   policy: cyclic
#   target_ratio: 5.0
#   cyclic_times: 1
#   step_ratio_up: 0.4

# momentum_config:
#   policy: cyclic
#   cyclic_times: 1
#   step_ratio_up: 0.4

data:
  samples_per_gpu: 4
Add BEVFusion-R (#443) 2023-07-08 10:53:36 +08:00

			`image_size: [256, 704]`

			`model:`
			`encoders:`
			`camera:`
			`backbone:`
			`type: ResNet`
			`depth: 50`
			`num_stages: 4`
			`out_indices: [0, 1, 2, 3]`
			`norm_cfg:`
			`type: BN2d`
			`requires_grad: true`
			`norm_eval: false`
			`init_cfg:`
			`type: Pretrained`
			`checkpoint: torchvision://resnet50`
			`neck:`
			`type: SECONDFPN`
			`in_channels: [256, 512, 1024, 2048]`
			`out_channels: [128, 128, 128, 128]`
			`upsample_strides: [0.25, 0.5, 1, 2]`
			`vtransform:`
			`type: LSSTransform`
			`in_channels: 512`
			`out_channels: 64`
			`image_size: ${image_size}`
			`feature_size: ${[image_size[0] // 16, image_size[1] // 16]}`
			`xbound: [-51.2, 51.2, 0.8]`
			`ybound: [-51.2, 51.2, 0.8]`
			`zbound: [-10.0, 10.0, 20.0]`
			`dbound: [1.0, 60.0, 1.0]`
			`downsample: 1`
			`decoder:`
			`backbone:`
			`type: GeneralizedResNet`
			`in_channels: 64`
			`blocks:`
			`- [2, 128, 2]`
			`- [2, 256, 2]`
			`- [2, 512, 1]`
			`neck:`
			`type: LSSFPN`
			`in_indices: [-1, 0]`
			`in_channels: [512, 128]`
			`out_channels: 256`
			`scale_factor: 2`
			`heads:`
			`object:`
			`train_cfg:`
			`code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]`
			`fuser:`
			`type: ConvFuser`
			`in_channels: [64, 64]`
			`out_channels: 64`


			`optimizer:`
			`paramwise_cfg:`
			`custom_keys:`
			`absolute_pos_embed:`
			`decay_mult: 0`
			`relative_position_bias_table:`
			`decay_mult: 0`
			`# encoders.camera.backbone:`
			`# lr_mult: 0.1`


			`# lr_config:`
			`# policy: cyclic`
			`# target_ratio: 5.0`
			`# cyclic_times: 1`
			`# step_ratio_up: 0.4`

			`# momentum_config:`
			`# policy: cyclic`
			`# cyclic_times: 1`
			`# step_ratio_up: 0.4`

			`data:`
			`samples_per_gpu: 4`