#!/usr/bin/env python3 """ Phase 4A Task-specific GCA 配置分析 分析 multitask_BEV2X_phase4a_stage1_task_gca.yaml 的网络架构和特征尺寸 """ import yaml import math def analyze_phase4a_gca_config(): """分析Phase 4A Task-specific GCA配置""" print("=" * 80) print("🚀 BEVFusion Phase 4A - Task-specific GCA 架构分析") print("=" * 80) # 读取配置文件 config_path = "configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4a_stage1_task_gca.yaml" with open(config_path, 'r', encoding='utf-8') as f: config = yaml.safe_load(f) print(f"📁 配置文件: {config_path}") print() # ========== 1. 输入规格分析 ========== print("📊 1. 输入规格分析") print("-" * 40) voxel_size = config['voxel_size'] point_cloud_range = config['point_cloud_range'] image_size = config.get('image_size', [256, 704]) # 从_base_继承 print(f"🔹 LiDAR体素尺寸: {voxel_size}") print(f"🔹 点云范围: {point_cloud_range}") print(f"🔹 图像尺寸: {image_size}") print() # 计算BEV网格尺寸 bev_width = int((point_cloud_range[3] - point_cloud_range[0]) / voxel_size[0]) bev_height = int((point_cloud_range[4] - point_cloud_range[1]) / voxel_size[1]) bev_depth = int((point_cloud_range[5] - point_cloud_range[2]) / voxel_size[2]) print(f"🔹 BEV网格尺寸: {bev_width}×{bev_height}×{bev_depth}") print(f" ├── 宽度: {bev_width} ({point_cloud_range[0]}m ~ {point_cloud_range[3]}m)") print(f" ├── 高度: {bev_height} ({point_cloud_range[1]}m ~ {point_cloud_range[4]}m)") print(f" └── 深度: {bev_depth} ({point_cloud_range[2]}m ~ {point_cloud_range[5]}m)") print() # ========== 2. 编码器分析 ========== print("🏗️ 2. 编码器架构分析") print("-" * 40) model = config['model'] encoders = model['encoders'] # Camera Encoder print("📷 Camera Encoder:") camera = encoders['camera'] backbone = camera['backbone'] print(f" ├── Backbone: {backbone['type']}") print(f" ├── embed_dims: {backbone['embed_dims']}") print(f" ├── depths: {backbone['depths']} (总层数: {sum(backbone['depths'])})") print(f" ├── num_heads: {backbone['num_heads']}") print(f" ├── out_indices: {backbone['out_indices']}") neck = camera['neck'] print(f" ├── Neck: {neck['type']}") print(f" │ ├── in_channels: {neck['in_channels']}") print(f" │ ├── out_channels: {neck['out_channels']}") print(f" │ └── num_outs: {neck['num_outs']}") vtransform = camera['vtransform'] print(f" └── VTransform: {vtransform['type']}") print(f" ├── in_channels: {vtransform['in_channels']}") print(f" ├── out_channels: {vtransform['out_channels']}") print(f" ├── image_size: {vtransform['image_size']}") print(f" ├── feature_size: {vtransform['feature_size']}") print(f" └── downsample: {vtransform['downsample']}") print() # LiDAR Encoder print("🔍 LiDAR Encoder:") lidar = encoders['lidar'] voxelize = lidar['voxelize'] print(f" ├── Voxelize:") print(f" │ ├── max_num_points: {voxelize['max_num_points']}") print(f" │ ├── voxel_size: {voxelize['voxel_size']}") print(f" │ ├── max_voxels: {voxelize['max_voxels']}") print(f" │ └── point_cloud_range: {voxelize['point_cloud_range']}") backbone_lidar = lidar['backbone'] print(f" └── Backbone: {backbone_lidar['type']}") print(f" ├── in_channels: {backbone_lidar['in_channels']}") print(f" ├── output_channels: {backbone_lidar['output_channels']}") print(f" ├── sparse_shape: {backbone_lidar['sparse_shape']}") print(f" └── encoder_channels: {backbone_lidar['encoder_channels']}") print() # Fuser print("🔗 Fuser:") fuser = model['fuser'] print(f" ├── Type: {fuser['type']}") print(f" ├── in_channels: {fuser['in_channels']} (Camera + LiDAR)") print(f" └── out_channels: {fuser['out_channels']}") print() # ========== 3. 解码器分析 ========== print("📈 3. 解码器架构分析") print("-" * 40) decoder = model['decoder'] # Backbone backbone_dec = decoder['backbone'] print("🔧 Decoder Backbone:") print(f" ├── Type: {backbone_dec['type']}") print(f" ├── in_channels: {backbone_dec['in_channels']}") print(f" ├── out_channels: {backbone_dec['out_channels']}") print(f" └── layer_nums: {backbone_dec['layer_nums']}") # Neck neck_dec = decoder['neck'] print(f"🔧 Decoder Neck:") print(f" ├── Type: {neck_dec['type']}") print(f" ├── in_channels: {neck_dec['in_channels']}") print(f" ├── out_channels: {neck_dec['out_channels']}") print(f" └── upsample_strides: {neck_dec['upsample_strides']}") print() # ========== 4. Task-specific GCA 分析 ========== print("🎯 4. Task-specific GCA 机制") print("-" * 40) task_gca = model['task_specific_gca'] print("✨ Task-specific GCA 配置:") print(f" ├── enabled: {task_gca['enabled']}") print(f" ├── in_channels: {task_gca['in_channels']} (原始BEV通道数)") print(f" ├── reduction: {task_gca['reduction']}") print(f" ├── use_max_pool: {task_gca['use_max_pool']}") print(f" ├── object_reduction: {task_gca['object_reduction']} (检测GCA)") print(f" └── map_reduction: {task_gca['map_reduction']} (分割GCA)") print() print("📋 GCA机制说明:") print(" • 检测GCA: 从512通道中选择对检测最有益的特征") print(" - 强化: 物体边界、中心点、空间关系") print(" - 抑制: 语义纹理、全局语义") print(" • 分割GCA: 从512通道中选择对分割最有益的特征") print(" - 强化: 语义纹理、连续性、全局语义") print(" - 抑制: 物体边界(精确)、中心点") print(" • 结果: 各取所需,性能最大化 ✅") print() # ========== 5. 头部分析 ========== print("🎯 5. 任务头部分析") print("-" * 40) heads = model['heads'] # Object Head (3D Detection) object_head = heads['object'] print("🚗 3D检测头 (Object Head):") print(f" ├── in_channels: {object_head['in_channels']}") print(f" ├── train_cfg.grid_size: {object_head['train_cfg']['grid_size']}") print(f" └── test_cfg.grid_size: {object_head['test_cfg']['grid_size']}") print() # Map Head (BEV Segmentation) map_head = heads['map'] print("🗺️ BEV分割头 (Map Head):") print(f" ├── Type: {map_head['type']}") print(f" ├── in_channels: {map_head['in_channels']}") print(f" ├── classes: {config.get('map_classes', '6 classes')}") print(f" ├── loss: {map_head['loss']}") print(f" ├── deep_supervision: {map_head['deep_supervision']}") print(f" ├── use_dice_loss: {map_head['use_dice_loss']}") print(f" ├── dice_weight: {map_head['dice_weight']}") print(f" ├── focal_alpha: {map_head['focal_alpha']}") print(f" ├── focal_gamma: {map_head['focal_gamma']}") print(f" ├── decoder_channels: {map_head['decoder_channels']}") print(f" ├── use_internal_gca: {map_head['use_internal_gca']}") print(f" ├── adaptive_multiscale: {map_head['adaptive_multiscale']}") print(f" └── adaptive_dilation_rates: {map_head['adaptive_dilation_rates']}") # Grid Transform grid_transform = map_head['grid_transform'] input_scope = grid_transform['input_scope'] output_scope = grid_transform['output_scope'] input_res = input_scope[0][2] # xbound resolution output_res = output_scope[0][2] # xbound resolution input_width = int((input_scope[0][1] - input_scope[0][0]) / input_res) input_height = int((input_scope[0][3] - input_scope[0][2]) / input_res) if len(input_scope[0]) > 3 else input_width output_width = int((output_scope[0][1] - output_scope[0][0]) / output_res) output_height = int((output_scope[0][3] - output_scope[0][2]) / output_res) if len(output_scope[0]) > 3 else output_width print(f" └── Grid Transform:") print(f" ├── 输入范围: {input_scope}") print(f" │ ├── 分辨率: {input_res}m/pixel") print(f" │ └── 尺寸: {input_width}×{input_height}") print(f" ├── 输出范围: {output_scope}") print(f" │ ├── 分辨率: {output_res}m/pixel") print(f" │ └── 尺寸: {output_width}×{output_height}") print(f" └── 缩放倍数: {input_res/output_res:.1f}x 上采样") print() # ========== 6. 特征尺寸计算 ========== print("📏 6. 特征尺寸计算") print("-" * 40) print("🌊 特征流尺寸变化:") print() # Camera特征流 print("📷 Camera特征流:") print(f" ├── 输入图像: {image_size[0]}×{image_size[1]}×3") print(f" ├── Swin输出特征: Stage1: {image_size[0]//4}×{image_size[1]//4}×192") print(f" │ Stage2: {image_size[0]//8}×{image_size[1]//8}×384") print(f" │ Stage3: {image_size[0]//16}×{image_size[1]//16}×768") print(f" │ Stage4: {image_size[0]//32}×{image_size[1]//32}×768") print(f" ├── LSS FPN输出: {vtransform['feature_size'][0]}×{vtransform['feature_size'][1]}×256") print(f" └── VTransform输出: {bev_width}×{bev_height}×80") print() # LiDAR特征流 print("🔍 LiDAR特征流:") print(f" ├── 稀疏输入: {bev_width}×{bev_height}×{bev_depth}") print(f" └── SparseEncoder输出: {bev_width}×{bev_height}×128") print() # 融合和解码 print("🔗 融合与解码:") print(f" ├── ConvFuser输入: Camera(80ch) + LiDAR(128ch) = 208ch") print(f" ├── ConvFuser输出: {bev_width}×{bev_height}×256") print(f" ├── SECOND Backbone: {bev_width}×{bev_height}×256 → {bev_width//2}×{bev_height//2}×256") print(f" ├── SECONDFPN输出: {bev_width}×{bev_height}×512 (融合多尺度)") print(f" └── Task-specific GCA: {bev_width}×{bev_height}×512 → 任务特定特征") print() # 分割头详细尺寸 print("🗺️ 分割头特征尺寸:") print(f" ├── 输入BEV: {bev_width}×{bev_height}×512") print(f" ├── 4层渐进上采样 (512→256→256→128→128):") print(f" │ ├── Layer1: {bev_width}×{bev_height}×512 → {bev_width}×{bev_height}×256") print(f" │ ├── Layer2: {bev_width}×{bev_height}×256 → {bev_width}×{bev_height}×256") print(f" │ ├── Layer3: {bev_width}×{bev_height}×256 → {bev_width}×{bev_height}×128") print(f" │ └── Layer4: {bev_width}×{bev_height}×128 → {bev_width}×{bev_height}×128") print(f" ├── 自适应多尺度融合: {map_head['adaptive_multiscale']}") print(f" ├── 空洞率: {map_head['adaptive_dilation_rates']}") print(f" └── 最终输出: {output_width}×{output_height}×{len(config.get('map_classes', []))} (6类别)") print() # ========== 7. 内存占用估计 ========== print("💾 7. 内存占用估计") print("-" * 40) # 计算主要特征图的内存占用 bev_pixels = bev_width * bev_height seg_pixels = output_width * output_height # 主要特征图内存 (FP32, 4 bytes per float) bev_512ch = bev_pixels * 512 * 4 / (1024**3) # GB bev_256ch = bev_pixels * 256 * 4 / (1024**3) # GB seg_output = seg_pixels * 6 * 4 / (1024**3) # GB print(f"🔹 BEV特征图 (1440×1440×512ch): {bev_512ch:.2f} GB") print(f"🔹 BEV特征图 (1440×1440×256ch): {bev_256ch:.2f} GB") print(f"🔹 分割输出 (598×598×6ch): {seg_output:.2f} GB") print(f"🔹 总计主要特征: {bev_512ch + bev_256ch + seg_output:.2f} GB") print() # ========== 8. 关键创新点 ========== print("🚀 8. Phase 4A 关键创新点") print("-" * 40) innovations = [ "✨ Task-specific GCA: 检测和分割各自选择最优特征", "🎯 避免统一特征选择的折中问题", "🔧 自适应多尺度融合 (adaptive_multiscale)", "📏 渐进式4层解码器 (512→256→256→128→128)", "🎨 动态空洞率学习 (adaptive_dilation_rates: [1,3,6,12])", "📊 分割分辨率: 598×598 @ 0.167m/pixel", "🏗️ 检测使用完整1440×1440 BEV特征" ] for innovation in innovations: print(f" {innovation}") print() # ========== 9. 训练配置摘要 ========== print("⚙️ 9. 训练配置摘要") print("-" * 40) print(f"🔹 训练轮数: {config['max_epochs']}") print(f"🔹 学习率: {config['optimizer']['lr']}") print(f"🔹 权重衰减: {config['optimizer']['weight_decay']}") print(f"🔹 梯度裁剪: {config['optimizer_config']['grad_clip']['max_norm']}") print(f"🔹 FP16初始scale: {config['fp16']['loss_scale']['init_scale']}") print(f"🔹 验证间隔: {config['evaluation']['interval']}") print(f"🔹 数据采样: val.load_interval = {config['data']['val']['load_interval']}") print() print("=" * 80) print("✅ Phase 4A Task-specific GCA 配置分析完成") print("=" * 80) if __name__ == "__main__": analyze_phase4a_gca_config()