#!/bin/bash # 从epoch_19开始训练Enhanced版本 (改进版) # 使用配置文件而非命令行参数 set -e export PATH=/opt/conda/bin:$PATH cd /workspace/bevfusion echo "==========================================" echo "BEVFusion Enhanced训练 - 从Epoch 19继续" echo "任务: 3D检测 + BEV分割(Enhanced Head)" echo "==========================================" echo "" echo "配置信息:" echo " 配置文件: multitask_enhanced.yaml" echo " GPU数量: 4x Tesla V100S" echo " 基础模型: epoch_19.pth (515MB)" echo " 分割头: EnhancedBEVSegmentationHead" echo "" echo "Enhanced功能:" echo " ✅ ASPP - 多尺度特征" echo " ✅ Channel Attention - 通道注意力" echo " ✅ Spatial Attention - 空间注意力" echo " ✅ Deep Supervision - 深度监督" echo " ✅ Dice Loss - 区域相似度" echo " ✅ Class Weights - 类别平衡" echo "" echo "优化措施:" echo " - 使用4个GPU(避免8GPU同步死锁)" echo " - workers_per_gpu=0(避免共享内存问题)" echo " - samples_per_gpu=2(降低内存压力)" echo " - 学习率=1e-4(微调模式)" echo " - map loss权重=5.0(强化分割学习)" echo "" echo "预计训练时间: 6-8小时" echo "预期性能提升:" echo " - 分割mIoU: 36% → 50-55%" echo " - 检测mAP: 保持65-68%" echo "" echo "⚠️ 注意: 当前有训练正在运行!" echo " 如需停止,请运行: bash STOP_CURRENT_TRAINING.sh" echo "" echo "==========================================" echo "" # 生成日志文件名 LOG_FILE="training_enhanced_from_epoch19_$(date +%Y%m%d_%H%M%S).log" echo "训练日志: $LOG_FILE" echo "" # 确认继续 read -p "是否继续启动Enhanced训练?(y/n): " -n 1 -r echo "" if [[ ! $REPLY =~ ^[Yy]$ ]]; then echo "已取消" exit 0 fi echo "开始训练..." echo "" # 启动训练 torchpack dist-run -np 4 python tools/train.py \ configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_enhanced.yaml \ --model.encoders.camera.backbone.init_cfg.checkpoint pretrained/swint-nuimages-pretrained.pth \ 2>&1 | tee "$LOG_FILE" echo "" echo "==========================================" echo "训练完成!" echo "日志文件: $LOG_FILE" echo "=========================================="