bev-project/mmdet3d/models/fusers/conv.py

from typing import List

import torch
from torch import nn
import torch.nn.functional as F

from mmdet3d.models.builder import FUSERS

__all__ = ["ConvFuser"]


@FUSERS.register_module()
class ConvFuser(nn.Sequential):
    def __init__(self, in_channels: int, out_channels: int) -> None:
        self.in_channels = in_channels
        self.out_channels = out_channels
        super().__init__(
            nn.Conv2d(sum(in_channels), out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(True),
        )

    def forward(self, inputs: List[torch.Tensor]) -> torch.Tensor:
        if len(inputs) == 1:
            return super().forward(inputs[0])

        # 对齐空间尺寸：统一到最大的 H/W（通常是 LiDAR 360×360）
        target_h = max(feat.shape[-2] for feat in inputs)
        target_w = max(feat.shape[-1] for feat in inputs)

        aligned = []
        for feat in inputs:
            if feat.shape[-2] != target_h or feat.shape[-1] != target_w:
                feat = F.interpolate(
                    feat,
                    size=(target_h, target_w),
                    mode="bilinear",
                    align_corners=False,
                )
            aligned.append(feat)

        return super().forward(torch.cat(aligned, dim=1))
-												[Major] Code release.

											
										
										
											2022-06-03 12:21:18 +08:00
+								from typing import List
 								import torch
 								from torch import nn
-												Complete project state snapshot: Phase 4B RMT-PPAD Integration

🎯 Training Status:
- Current Epoch: 2/10 (13.3% complete)
- Segmentation Dice: 0.9594
- Detection IoU: 0.5742
- Training stable with 8 GPUs

🔧 Technical Achievements:
- ✅ RMT-PPAD Transformer segmentation decoder integrated
- ✅ Task-specific GCA architecture optimized
- ✅ Multi-scale feature fusion (180×180, 360×360, 600×600)
- ✅ Adaptive scale weight learning implemented
- ✅ BEVFusion multi-task framework enhanced

📊 Performance Highlights:
- Divider segmentation: 0.9793 Dice (excellent)
- Pedestrian crossing: 0.9812 Dice (excellent)
- Stop line: 0.9812 Dice (excellent)
- Carpark area: 0.9802 Dice (excellent)
- Walkway: 0.9401 Dice (good)
- Drivable area: 0.8959 Dice (good)

🛠️ Code Changes Included:
- Enhanced BEVFusion model (bevfusion.py)
- RMT-PPAD integration modules (rmtppad_integration.py)
- Transformer segmentation head (enhanced_transformer.py)
- GCA module optimizations (gca.py)
- Configuration updates (Phase 4B configs)
- Training scripts and automation tools
- Comprehensive documentation and analysis reports

📅 Snapshot Date: Fri Nov 14 09:06:09 UTC 2025
📍 Environment: Docker container
🎯 Phase: RMT-PPAD Integration Complete

											
										
										
											2025-11-14 17:06:09 +08:00
+								import torch.nn.functional as F
-												[Major] Code release.

											
										
										
											2022-06-03 12:21:18 +08:00
 								from mmdet3d.models.builder import FUSERS
 								__all__ = ["ConvFuser"]
 								@FUSERS.register_module()
 								class ConvFuser(nn.Sequential):
 								    def __init__(self, in_channels: int, out_channels: int) -> None:
 								        self.in_channels = in_channels
 								        self.out_channels = out_channels
 								        super().__init__(
 								            nn.Conv2d(sum(in_channels), out_channels, 3, padding=1, bias=False),
 								            nn.BatchNorm2d(out_channels),
 								            nn.ReLU(True),
 								        )
 								    def forward(self, inputs: List[torch.Tensor]) -> torch.Tensor:
-												Complete project state snapshot: Phase 4B RMT-PPAD Integration

🎯 Training Status:
- Current Epoch: 2/10 (13.3% complete)
- Segmentation Dice: 0.9594
- Detection IoU: 0.5742
- Training stable with 8 GPUs

🔧 Technical Achievements:
- ✅ RMT-PPAD Transformer segmentation decoder integrated
- ✅ Task-specific GCA architecture optimized
- ✅ Multi-scale feature fusion (180×180, 360×360, 600×600)
- ✅ Adaptive scale weight learning implemented
- ✅ BEVFusion multi-task framework enhanced

📊 Performance Highlights:
- Divider segmentation: 0.9793 Dice (excellent)
- Pedestrian crossing: 0.9812 Dice (excellent)
- Stop line: 0.9812 Dice (excellent)
- Carpark area: 0.9802 Dice (excellent)
- Walkway: 0.9401 Dice (good)
- Drivable area: 0.8959 Dice (good)

🛠️ Code Changes Included:
- Enhanced BEVFusion model (bevfusion.py)
- RMT-PPAD integration modules (rmtppad_integration.py)
- Transformer segmentation head (enhanced_transformer.py)
- GCA module optimizations (gca.py)
- Configuration updates (Phase 4B configs)
- Training scripts and automation tools
- Comprehensive documentation and analysis reports

📅 Snapshot Date: Fri Nov 14 09:06:09 UTC 2025
📍 Environment: Docker container
🎯 Phase: RMT-PPAD Integration Complete

											
										
										
											2025-11-14 17:06:09 +08:00
+								        if len(inputs) == 1:
 								            return super().forward(inputs[0])
 								        # 对齐空间尺寸：统一到最大的 H/W（通常是 LiDAR 360×360）
 								        target_h = max(feat.shape[-2] for feat in inputs)
 								        target_w = max(feat.shape[-1] for feat in inputs)
 								        aligned = []
 								        for feat in inputs:
 								            if feat.shape[-2] != target_h or feat.shape[-1] != target_w:
 								                feat = F.interpolate(
 								                    feat,
 								                    size=(target_h, target_w),
 								                    mode="bilinear",
 								                    align_corners=False,
 								                )
 								            aligned.append(feat)
 								        return super().forward(torch.cat(aligned, dim=1))