bev-project/archive_scripts/start_enhanced_from_epoch19...

81 lines
2.2 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 从epoch_19开始训练Enhanced版本 (改进版)
# 使用配置文件而非命令行参数
set -e
export PATH=/opt/conda/bin:$PATH
cd /workspace/bevfusion
echo "=========================================="
echo "BEVFusion Enhanced训练 - 从Epoch 19继续"
echo "任务: 3D检测 + BEV分割Enhanced Head"
echo "=========================================="
echo ""
echo "配置信息:"
echo " 配置文件: multitask_enhanced.yaml"
echo " GPU数量: 4x Tesla V100S"
echo " 基础模型: epoch_19.pth (515MB)"
echo " 分割头: EnhancedBEVSegmentationHead"
echo ""
echo "Enhanced功能"
echo " ✅ ASPP - 多尺度特征"
echo " ✅ Channel Attention - 通道注意力"
echo " ✅ Spatial Attention - 空间注意力"
echo " ✅ Deep Supervision - 深度监督"
echo " ✅ Dice Loss - 区域相似度"
echo " ✅ Class Weights - 类别平衡"
echo ""
echo "优化措施:"
echo " - 使用4个GPU避免8GPU同步死锁"
echo " - workers_per_gpu=0避免共享内存问题"
echo " - samples_per_gpu=2降低内存压力"
echo " - 学习率=1e-4微调模式"
echo " - map loss权重=5.0(强化分割学习)"
echo ""
echo "预计训练时间: 6-8小时"
echo "预期性能提升:"
echo " - 分割mIoU: 36% → 50-55%"
echo " - 检测mAP: 保持65-68%"
echo ""
echo "⚠️ 注意: 当前有训练正在运行!"
echo " 如需停止,请运行: bash STOP_CURRENT_TRAINING.sh"
echo ""
echo "=========================================="
echo ""
# 生成日志文件名
LOG_FILE="training_enhanced_from_epoch19_$(date +%Y%m%d_%H%M%S).log"
echo "训练日志: $LOG_FILE"
echo ""
# 确认继续
read -p "是否继续启动Enhanced训练(y/n): " -n 1 -r
echo ""
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
echo "已取消"
exit 0
fi
echo "开始训练..."
echo ""
# 启动训练
torchpack dist-run -np 4 python tools/train.py \
configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_enhanced.yaml \
--model.encoders.camera.backbone.init_cfg.checkpoint pretrained/swint-nuimages-pretrained.pth \
2>&1 | tee "$LOG_FILE"
echo ""
echo "=========================================="
echo "训练完成!"
echo "日志文件: $LOG_FILE"
echo "=========================================="