#!/usr/bin/env python """ 分析BEVFusion中BEV特征尺寸变化 重点分析EnhancedBEVSegmentationHead的4层渐进上采样过程 """ import torch import numpy as np def calculate_bev_dimensions(): """计算BEV特征在各个阶段的尺寸""" print("="*80) print("🎯 BEVFusion BEV特征尺寸分析") print("="*80) # 1. 输入BEV特征 (来自BEV Neck) print("\n📥 1. 输入BEV特征") print("-" * 40) # SECONDFPN输出配置 bev_neck_output = { 'channels': 512, # SECONDFPN最终输出通道数 'height': 144, # BEV空间分辨率 'width': 144, # BEV空间分辨率 'voxel_size': 0.75, # 米/像素 'range': [-54, 54] # BEV范围 (米) } print("BEV Neck (SECONDFPN) 输出:") print(f"├── 通道数: {bev_neck_output['channels']}") print(f"├── 空间尺寸: {bev_neck_output['height']} × {bev_neck_output['width']}") print(f"├── 分辨率: {bev_neck_output['voxel_size']}m/像素") print(f"├── 覆盖范围: {bev_neck_output['range'][0]}m ~ {bev_neck_output['range'][1]}m") print(".1f") # 2. BEV Grid Transform print("\n🔄 2. BEV Grid Transform") print("-" * 40) transform_config = { 'input_scope': [[-54.0, 54.0, 0.75], [-54.0, 54.0, 0.75]], # 输入坐标系 'output_scope': [[-50, 50, 0.167], [-50, 50, 0.167]] # 输出坐标系 } # 计算变换后的尺寸 input_range = transform_config['input_scope'][0][1] - transform_config['input_scope'][0][0] # 108m input_resolution = transform_config['input_scope'][0][2] # 0.75m/像素 input_pixels = int(input_range / input_resolution) + 1 # 144 + 1 = 145 output_range = transform_config['output_scope'][0][1] - transform_config['output_scope'][0][0] # 100m output_resolution = transform_config['output_scope'][0][2] # 0.167m/像素 output_pixels = int(output_range / output_resolution) + 1 # 598 + 1 = 599 print("坐标系变换:") print(f"├── 输入范围: {transform_config['input_scope'][0][0]}m ~ {transform_config['input_scope'][0][1]}m") print(f"├── 输入分辨率: {transform_config['input_scope'][0][2]}m/像素") print(f"├── 输入像素: {input_pixels-1} × {input_pixels-1}") print("") print(f"├── 输出范围: {transform_config['output_scope'][0][0]}m ~ {transform_config['output_scope'][0][1]}m") print(f"├── 输出分辨率: {transform_config['output_scope'][0][2]}m/像素") print(f"├── 输出像素: {output_pixels-1} × {output_pixels-1} (≈598×598)") print(f"└── 空间放大: {(output_pixels-1) / (input_pixels-1):.1f}x") # 3. ASPP多尺度特征 print("\n🎯 3. ASPP多尺度特征") print("-" * 40) aspp_config = { 'input_channels': bev_neck_output['channels'], # 512 'output_channels': 256, # decoder_channels[0] 'dilation_rates': [1, 3, 6, 12] # ASPP膨胀率 } print("ASPP (Atrous Spatial Pyramid Pooling):") print(f"├── 输入通道: {aspp_config['input_channels']}") print(f"├── 输出通道: {aspp_config['output_channels']}") print(f"├── 膨胀率: {aspp_config['dilation_rates']}") print(f"├── 空间尺寸: {output_pixels-1} × {output_pixels-1} (保持不变)") print("└── 作用: 捕获多尺度上下文信息") # 4. 注意力机制 print("\n🎯 4. 注意力机制") print("-" * 40) attention_stages = [ { 'name': 'Channel Attention', 'input_channels': aspp_config['output_channels'], 'operation': '通道重要性加权' }, { 'name': 'Spatial Attention', 'input_channels': aspp_config['output_channels'], 'operation': '空间位置重要性加权' }, { 'name': 'Global Context Attention (GCA)', 'input_channels': aspp_config['output_channels'], 'operation': '全局上下文聚合', 'reduction': 4 } ] for i, attn in enumerate(attention_stages): print(f"{i+1}. {attn['name']}:") print(f" ├── 输入通道: {attn['input_channels']}") print(f" ├── 操作: {attn['operation']}") if 'reduction' in attn: print(f" ├── 降维比例: {attn['reduction']}x") print(f" └── 输出通道: {attn['input_channels']}") # 5. Deep Decoder (4层渐进上采样) print("\n🔄 5. Deep Decoder (4层卷积网络)") print("-" * 40) # 注意:这里的"上采样"实际上是指通道数的变换,不是空间尺寸的上采样 decoder_channels = [256, 256, 128, 128] # decoder_channels配置 print("解码器架构 (4层卷积):") print(f"├── 输入特征: {aspp_config['output_channels']}ch × {output_pixels-1}×{output_pixels-1}") print("├── 空间尺寸: 始终保持 598×598 (无上采样!)") print("\n通道变换过程:") current_channels = aspp_config['output_channels'] for i, out_channels in enumerate(decoder_channels): print(f"├── Layer {i+1}: {current_channels}ch → {out_channels}ch") print("│ ├── Conv2d(3×3, padding=1)") print("│ ├── GroupNorm") print("│ ├── ReLU") print("│ └── Dropout2d(0.1)") current_channels = out_channels print(f"└── 最终特征: {decoder_channels[-1]}ch × 598×598") # 6. 分类头 print("\n🎯 6. 分类头 (Per-class Classification)") print("-" * 40) classifier_config = { 'input_channels': decoder_channels[-1], # 128 'hidden_channels': decoder_channels[-1] // 2, # 64 'num_classes': 6, # nuScenes BEV分割类别数 'classes': ['drivable_area', 'ped_crossing', 'walkway', 'stop_line', 'carpark_area', 'divider'] } print("每个类别的分类器:") print(f"├── 输入通道: {classifier_config['input_channels']}") print(f"├── 隐藏通道: {classifier_config['hidden_channels']}") print(f"├── 输出通道: 1 (每个类别独立预测)") print(f"└── 总类别数: {classifier_config['num_classes']}") print(f"\n类别列表:") for i, cls in enumerate(classifier_config['classes']): print("2d") # 7. 最终输出 print("\n📤 7. 最终输出") print("-" * 40) final_output = { 'shape': [classifier_config['num_classes'], output_pixels-1, output_pixels-1], 'resolution': output_resolution, 'range': [transform_config['output_scope'][0][0], transform_config['output_scope'][0][1]], 'total_pixels': classifier_config['num_classes'] * (output_pixels-1) ** 2 } print("分割预测结果:") print(f"├── 张量形状: [{final_output['shape'][0]}, {final_output['shape'][1]}, {final_output['shape'][2]}]") print(f"├── 空间分辨率: {final_output['resolution']}m/像素") print(f"├── 覆盖范围: {final_output['range'][0]}m ~ {final_output['range'][1]}m") print(".1f") print(f"└── 总像素数: {final_output['total_pixels']:,}") # 8. 尺寸变化总结 print("\n📊 8. 尺寸变化总结") print("-" * 40) dimension_summary = [ ("BEV Neck输出", f"{bev_neck_output['channels']}ch × {bev_neck_output['height']}×{bev_neck_output['width']}"), ("Grid Transform", f"{bev_neck_output['channels']}ch × {output_pixels-1}×{output_pixels-1}"), ("ASPP", f"{aspp_config['output_channels']}ch × {output_pixels-1}×{output_pixels-1}"), ("Channel Attention", f"{aspp_config['output_channels']}ch × {output_pixels-1}×{output_pixels-1}"), ("Spatial Attention", f"{aspp_config['output_channels']}ch × {output_pixels-1}×{output_pixels-1}"), ("Decoder Layer 1", f"{decoder_channels[0]}ch × {output_pixels-1}×{output_pixels-1}"), ("Decoder Layer 2", f"{decoder_channels[1]}ch × {output_pixels-1}×{output_pixels-1}"), ("Decoder Layer 3", f"{decoder_channels[2]}ch × {output_pixels-1}×{output_pixels-1}"), ("Decoder Layer 4", f"{decoder_channels[3]}ch × {output_pixels-1}×{output_pixels-1}"), ("最终输出", f"{classifier_config['num_classes']}ch × {output_pixels-1}×{output_pixels-1}") ] print("特征尺寸演变:") for i, (stage, size) in enumerate(dimension_summary): marker = "├──" if i < len(dimension_summary) - 1 else "└──" print(f"{marker} {stage}: {size}") # 9. 关键发现 print("\n💡 9. 关键发现") print("-" * 40) findings = [ "🎯 空间尺寸保持不变: 整个分割头不进行空间上采样", "🔄 坐标系变换: 从144×144 (0.75m/px) → 598×598 (0.167m/px)", "📈 分辨率提升: 4.5倍高分辨率输出 (144→598像素)", "🏗️ 通道变换: 512→256→256→128→128 (4层渐进压缩)", "🎯 最终输出: 6类别 × 598×598 高分辨率分割图", "⚡ 计算特点: 空间保持,通道压缩,注意力增强" ] for finding in findings: print(f"├── {finding}") print("\n" + "="*80) print("🏁 BEV特征尺寸分析完成!") print("="*80) if __name__ == '__main__': calculate_bev_dimensions()