#!/usr/bin/env python3 """ RT-DETR架构分析工具 分析rtdetr-l.yaml和rtdetr-resnet50.yaml的网络结构和特征维度 """ def analyze_rtdetr_l(): """分析RT-DETR-l (HGBlock) 架构""" print("=" * 80) print("🚀 RT-DETR-l (HGBlock架构) 网络分析") print("=" * 80) # 假设输入尺寸为640×640(典型RT-DETR输入) input_size = 640 print(f"📊 输入尺寸: {input_size}×{input_size}×3") print() print("🏗️ Backbone架构分析:") print("-" * 50) # 特征尺寸跟踪 current_size = input_size layers = [ # [layer_idx, module, args, description] [0, "HGStem", "[32, 48]", "Stem层 - P2/4"], [1, "HGBlock×6", "[48, 128, 3]", "Stage 1 - 6个HGBlock"], [2, "DWConv", "[128, 3, 2, 1, False]", "下采样 - P3/8"], [3, "HGBlock×6", "[96, 512, 3]", "Stage 2 - 6个HGBlock"], [4, "DWConv", "[512, 3, 2, 1, False]", "下采样 - P4/16"], [5, "HGBlock×6", "[192, 1024, 5, True, False]", "Stage 3 - HGBlock 1"], [6, "HGBlock×6", "[192, 1024, 5, True, True]", "Stage 3 - HGBlock 2"], [7, "HGBlock×6", "[192, 1024, 5, True, True]", "Stage 3 - HGBlock 3"], [8, "DWConv", "[1024, 3, 2, 1, False]", "下采样 - P5/32"], [9, "HGBlock×6", "[384, 2048, 5, True, False]", "Stage 4 - 6个HGBlock"], ] feature_maps = {} for i, (idx, module, args, desc) in enumerate(layers): print(f" [{idx}] {module}: {desc}") if "HGStem" in module: # Stem层:输入 -> 32 -> 48, 尺寸/4 current_size = current_size // 4 channels = 48 print(f" ├── 输入: {input_size}×{input_size}×3") print(f" ├── 输出: {current_size}×{current_size}×{channels}") feature_maps[f"P2"] = (current_size, channels) elif "HGBlock" in module: # HGBlock保持尺寸和通道数 args_list = args.replace("[", "").replace("]", "").split(", ") if len(args_list) >= 2: channels = int(args_list[1]) print(f" ├── 尺寸: {current_size}×{current_size}×{channels}") elif "DWConv" in module: # 下采样:stride=2, 尺寸/2 current_size = current_size // 2 args_list = args.replace("[", "").replace("]", "").split(", ") channels = int(args_list[0]) level = f"P{5 - (current_size // (input_size // 32)) + 1}" # 计算P级别 if current_size == input_size // 8: level = "P3" elif current_size == input_size // 16: level = "P4" elif current_size == input_size // 32: level = "P5" print(f" ├── 输出: {current_size}×{current_size}×{channels}") feature_maps[level] = (current_size, channels) print() print("🎯 Backbone输出特征图:") for level, (size, ch) in feature_maps.items(): print(f" {level}: {size}×{size}×{ch} = {size*size*ch:,} 参数") print() # Head分析 print("🎯 Head架构分析 (FPN + Decoder):") print("-" * 50) # FPN特征流 fpn_features = { "P3": feature_maps.get("P3", (80, 256)), "P4": feature_maps.get("P4", (40, 512)), "P5": feature_maps.get("P5", (20, 1024)) } print("FPN特征流:") for level, (size, ch) in fpn_features.items(): print(f" {level}: {size}×{size}×{ch}") print() print("最终检测输出:") print(f" RTDETRDecoder输入: P3({fpn_features['P3'][0]}×{fpn_features['P3'][0]}×{fpn_features['P3'][1]}) + " + f"P4({fpn_features['P4'][0]}×{fpn_features['P4'][0]}×{fpn_features['P4'][1]}) + " + f"P5({fpn_features['P5'][0]}×{fpn_features['P5'][0]}×{fpn_features['P5'][1]})") print(f" 输出: {80}×{80}×(4+nc) + {40}×{40}×(4+nc) + {20}×{20}×(4+nc) # 4=bbox, nc=80类") print() def analyze_rtdetr_resnet50(): """分析RT-DETR-ResNet50架构""" print("=" * 80) print("🏗️ RT-DETR-ResNet50 (传统架构) 网络分析") print("=" * 80) input_size = 640 print(f"📊 输入尺寸: {input_size}×{input_size}×3") print() print("🏗️ Backbone架构分析:") print("-" * 50) current_size = input_size layers = [ [0, "ResNetLayer", "[3, 64, 1, True, 1]", "Stem - C2"], [1, "ResNetLayer", "[64, 64, 1, False, 3]", "Stage 1 - C3"], [2, "ResNetLayer", "[256, 128, 2, False, 4]", "Stage 2 - C4"], [3, "ResNetLayer", "[512, 256, 2, False, 6]", "Stage 3 - C5"], [4, "ResNetLayer", "[1024, 512, 2, False, 3]", "Stage 4 - C6"], ] feature_maps = {} channels = [64, 64, 128, 256, 512] for i, (idx, module, args, desc) in enumerate(layers): print(f" [{idx}] {module}: {desc}") args_list = args.replace("[", "").replace("]", "").split(", ") in_ch = int(args_list[0]) out_ch = int(args_list[1]) stride = int(args_list[2]) if stride == 2: current_size = current_size // 2 print(f" ├── 输入通道: {in_ch}, 输出通道: {out_ch}") print(f" ├── 步长: {stride}, 当前尺寸: {current_size}×{current_size}") # 记录输出特征图 level = f"C{i+2}" if i < 5 else f"C{i+2}" feature_maps[level] = (current_size, out_ch) print() print("🎯 Backbone输出特征图:") for level, (size, ch) in feature_maps.items(): print(f" {level}: {size}×{size}×{ch} = {size*size*ch:,} 参数") print() # Head分析 print("🎯 Head架构分析 (FPN + Decoder):") print("-" * 50) # 从C3-C6映射到P3-P6,然后FPN生成P3-P5 fpn_features = { "P3": (80, 256), # 从C3上采样得到 "P4": (40, 256), # 从C4得到 "P5": (20, 256) # 从C5下采样得到 } print("FPN特征流:") for level, (size, ch) in fpn_features.items(): print(f" {level}: {size}×{size}×{ch}") print() print("最终检测输出:") print(f" RTDETRDecoder输入: P3({fpn_features['P3'][0]}×{fpn_features['P3'][0]}×{fpn_features['P3'][1]}) + " + f"P4({fpn_features['P4'][0]}×{fpn_features['P4'][0]}×{fpn_features['P4'][1]}) + " + f"P5({fpn_features['P5'][0]}×{fpn_features['P5'][0]}×{fpn_features['P5'][1]})") print(f" 输出: {80}×{80}×(4+nc) + {40}×{40}×(4+nc) + {20}×{20}×(4+nc) # 4=bbox, nc=80类") print() def compare_architectures(): """对比两种架构""" print("=" * 80) print("🔄 架构对比分析") print("=" * 80) comparison = { "RT-DETR-l (HGBlock)": { "骨干网络": "HGBlock (Hybrid Guided)", "参数量级": "大 (多层HGBlock)", "创新点": "轻量级混合引导块", "优势": "参数效率高,性能好", "P3特征": "80×80×256", "P4特征": "40×40×512", "P5特征": "20×20×1024" }, "RT-DETR-ResNet50": { "骨干网络": "ResNetLayer (传统)", "参数量级": "中等", "创新点": "标准ResNet架构", "优势": "稳定,易于训练", "P3特征": "80×80×256", "P4特征": "40×40×256", "P5特征": "20×20×256" } } print("📊 架构对比:") print("-" * 60) for model, specs in comparison.items(): print(f"🏗️ {model}:") for key, value in specs.items(): print(f" {key}: {value}") print() print("🎯 关键差异:") print("1. HGBlock架构使用渐进式通道增长: 48→128→512→1024→2048") print("2. ResNet50使用标准通道: 64→256→512→1024→2048") print("3. HGBlock在深层使用更大的卷积核 (k=5) 和残差连接") print("4. 两种架构的Head部分都是统一的FPN + RTDETRDecoder") print() if __name__ == "__main__": analyze_rtdetr_l() analyze_rtdetr_resnet50() compare_architectures()