bev-project/ANALYZE_BEV_FEATURE_DIMENSI...

217 lines
9.2 KiB
Python
Raw Normal View History

2025-11-21 10:50:51 +08:00
#!/usr/bin/env python
"""
分析BEVFusion中BEV特征尺寸变化
重点分析EnhancedBEVSegmentationHead的4层渐进上采样过程
"""
import torch
import numpy as np
def calculate_bev_dimensions():
"""计算BEV特征在各个阶段的尺寸"""
print("="*80)
print("🎯 BEVFusion BEV特征尺寸分析")
print("="*80)
# 1. 输入BEV特征 (来自BEV Neck)
print("\n📥 1. 输入BEV特征")
print("-" * 40)
# SECONDFPN输出配置
bev_neck_output = {
'channels': 512, # SECONDFPN最终输出通道数
'height': 144, # BEV空间分辨率
'width': 144, # BEV空间分辨率
'voxel_size': 0.75, # 米/像素
'range': [-54, 54] # BEV范围 (米)
}
print("BEV Neck (SECONDFPN) 输出:")
print(f"├── 通道数: {bev_neck_output['channels']}")
print(f"├── 空间尺寸: {bev_neck_output['height']} × {bev_neck_output['width']}")
print(f"├── 分辨率: {bev_neck_output['voxel_size']}m/像素")
print(f"├── 覆盖范围: {bev_neck_output['range'][0]}m ~ {bev_neck_output['range'][1]}m")
print(".1f")
# 2. BEV Grid Transform
print("\n🔄 2. BEV Grid Transform")
print("-" * 40)
transform_config = {
'input_scope': [[-54.0, 54.0, 0.75], [-54.0, 54.0, 0.75]], # 输入坐标系
'output_scope': [[-50, 50, 0.167], [-50, 50, 0.167]] # 输出坐标系
}
# 计算变换后的尺寸
input_range = transform_config['input_scope'][0][1] - transform_config['input_scope'][0][0] # 108m
input_resolution = transform_config['input_scope'][0][2] # 0.75m/像素
input_pixels = int(input_range / input_resolution) + 1 # 144 + 1 = 145
output_range = transform_config['output_scope'][0][1] - transform_config['output_scope'][0][0] # 100m
output_resolution = transform_config['output_scope'][0][2] # 0.167m/像素
output_pixels = int(output_range / output_resolution) + 1 # 598 + 1 = 599
print("坐标系变换:")
print(f"├── 输入范围: {transform_config['input_scope'][0][0]}m ~ {transform_config['input_scope'][0][1]}m")
print(f"├── 输入分辨率: {transform_config['input_scope'][0][2]}m/像素")
print(f"├── 输入像素: {input_pixels-1} × {input_pixels-1}")
print("")
print(f"├── 输出范围: {transform_config['output_scope'][0][0]}m ~ {transform_config['output_scope'][0][1]}m")
print(f"├── 输出分辨率: {transform_config['output_scope'][0][2]}m/像素")
print(f"├── 输出像素: {output_pixels-1} × {output_pixels-1} (≈598×598)")
print(f"└── 空间放大: {(output_pixels-1) / (input_pixels-1):.1f}x")
# 3. ASPP多尺度特征
print("\n🎯 3. ASPP多尺度特征")
print("-" * 40)
aspp_config = {
'input_channels': bev_neck_output['channels'], # 512
'output_channels': 256, # decoder_channels[0]
'dilation_rates': [1, 3, 6, 12] # ASPP膨胀率
}
print("ASPP (Atrous Spatial Pyramid Pooling):")
print(f"├── 输入通道: {aspp_config['input_channels']}")
print(f"├── 输出通道: {aspp_config['output_channels']}")
print(f"├── 膨胀率: {aspp_config['dilation_rates']}")
print(f"├── 空间尺寸: {output_pixels-1} × {output_pixels-1} (保持不变)")
print("└── 作用: 捕获多尺度上下文信息")
# 4. 注意力机制
print("\n🎯 4. 注意力机制")
print("-" * 40)
attention_stages = [
{
'name': 'Channel Attention',
'input_channels': aspp_config['output_channels'],
'operation': '通道重要性加权'
},
{
'name': 'Spatial Attention',
'input_channels': aspp_config['output_channels'],
'operation': '空间位置重要性加权'
},
{
'name': 'Global Context Attention (GCA)',
'input_channels': aspp_config['output_channels'],
'operation': '全局上下文聚合',
'reduction': 4
}
]
for i, attn in enumerate(attention_stages):
print(f"{i+1}. {attn['name']}:")
print(f" ├── 输入通道: {attn['input_channels']}")
print(f" ├── 操作: {attn['operation']}")
if 'reduction' in attn:
print(f" ├── 降维比例: {attn['reduction']}x")
print(f" └── 输出通道: {attn['input_channels']}")
# 5. Deep Decoder (4层渐进上采样)
print("\n🔄 5. Deep Decoder (4层卷积网络)")
print("-" * 40)
# 注意:这里的"上采样"实际上是指通道数的变换,不是空间尺寸的上采样
decoder_channels = [256, 256, 128, 128] # decoder_channels配置
print("解码器架构 (4层卷积):")
print(f"├── 输入特征: {aspp_config['output_channels']}ch × {output_pixels-1}×{output_pixels-1}")
print("├── 空间尺寸: 始终保持 598×598 (无上采样!)")
print("\n通道变换过程:")
current_channels = aspp_config['output_channels']
for i, out_channels in enumerate(decoder_channels):
print(f"├── Layer {i+1}: {current_channels}ch → {out_channels}ch")
print("│ ├── Conv2d(3×3, padding=1)")
print("│ ├── GroupNorm")
print("│ ├── ReLU")
print("│ └── Dropout2d(0.1)")
current_channels = out_channels
print(f"└── 最终特征: {decoder_channels[-1]}ch × 598×598")
# 6. 分类头
print("\n🎯 6. 分类头 (Per-class Classification)")
print("-" * 40)
classifier_config = {
'input_channels': decoder_channels[-1], # 128
'hidden_channels': decoder_channels[-1] // 2, # 64
'num_classes': 6, # nuScenes BEV分割类别数
'classes': ['drivable_area', 'ped_crossing', 'walkway', 'stop_line', 'carpark_area', 'divider']
}
print("每个类别的分类器:")
print(f"├── 输入通道: {classifier_config['input_channels']}")
print(f"├── 隐藏通道: {classifier_config['hidden_channels']}")
print(f"├── 输出通道: 1 (每个类别独立预测)")
print(f"└── 总类别数: {classifier_config['num_classes']}")
print(f"\n类别列表:")
for i, cls in enumerate(classifier_config['classes']):
print("2d")
# 7. 最终输出
print("\n📤 7. 最终输出")
print("-" * 40)
final_output = {
'shape': [classifier_config['num_classes'], output_pixels-1, output_pixels-1],
'resolution': output_resolution,
'range': [transform_config['output_scope'][0][0], transform_config['output_scope'][0][1]],
'total_pixels': classifier_config['num_classes'] * (output_pixels-1) ** 2
}
print("分割预测结果:")
print(f"├── 张量形状: [{final_output['shape'][0]}, {final_output['shape'][1]}, {final_output['shape'][2]}]")
print(f"├── 空间分辨率: {final_output['resolution']}m/像素")
print(f"├── 覆盖范围: {final_output['range'][0]}m ~ {final_output['range'][1]}m")
print(".1f")
print(f"└── 总像素数: {final_output['total_pixels']:,}")
# 8. 尺寸变化总结
print("\n📊 8. 尺寸变化总结")
print("-" * 40)
dimension_summary = [
("BEV Neck输出", f"{bev_neck_output['channels']}ch × {bev_neck_output['height']}×{bev_neck_output['width']}"),
("Grid Transform", f"{bev_neck_output['channels']}ch × {output_pixels-1}×{output_pixels-1}"),
("ASPP", f"{aspp_config['output_channels']}ch × {output_pixels-1}×{output_pixels-1}"),
("Channel Attention", f"{aspp_config['output_channels']}ch × {output_pixels-1}×{output_pixels-1}"),
("Spatial Attention", f"{aspp_config['output_channels']}ch × {output_pixels-1}×{output_pixels-1}"),
("Decoder Layer 1", f"{decoder_channels[0]}ch × {output_pixels-1}×{output_pixels-1}"),
("Decoder Layer 2", f"{decoder_channels[1]}ch × {output_pixels-1}×{output_pixels-1}"),
("Decoder Layer 3", f"{decoder_channels[2]}ch × {output_pixels-1}×{output_pixels-1}"),
("Decoder Layer 4", f"{decoder_channels[3]}ch × {output_pixels-1}×{output_pixels-1}"),
("最终输出", f"{classifier_config['num_classes']}ch × {output_pixels-1}×{output_pixels-1}")
]
print("特征尺寸演变:")
for i, (stage, size) in enumerate(dimension_summary):
marker = "├──" if i < len(dimension_summary) - 1 else "└──"
print(f"{marker} {stage}: {size}")
# 9. 关键发现
print("\n💡 9. 关键发现")
print("-" * 40)
findings = [
"🎯 空间尺寸保持不变: 整个分割头不进行空间上采样",
"🔄 坐标系变换: 从144×144 (0.75m/px) → 598×598 (0.167m/px)",
"📈 分辨率提升: 4.5倍高分辨率输出 (144→598像素)",
"🏗️ 通道变换: 512→256→256→128→128 (4层渐进压缩)",
"🎯 最终输出: 6类别 × 598×598 高分辨率分割图",
"⚡ 计算特点: 空间保持,通道压缩,注意力增强"
]
for finding in findings:
print(f"├── {finding}")
print("\n" + "="*80)
print("🏁 BEV特征尺寸分析完成")
print("="*80)
if __name__ == '__main__':
calculate_bev_dimensions()