bev-project/scripts/training/start_phase4.sh

77 lines
1.8 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 启动Phase 4: 完整Enhanced版本
# 前提Phase 3 (epoch_29.pth) 已完成
set -e
export PATH=/opt/conda/bin:$PATH
cd /workspace/bevfusion
echo "=========================================="
echo "Phase 4: 完整Enhanced版本"
echo "=========================================="
echo ""
echo "增强内容:"
echo " ✅ ASPP (Phase 1)"
echo " ✅ Dice Loss (Phase 2)"
echo " ✅ Deep Supervision (Phase 3)"
echo " ✅ 提高分割权重 (map loss×3.0)"
echo ""
echo "训练设置:"
echo " 基础模型: epoch_29.pth"
echo " 目标: epoch_34.pth"
echo " 学习率: 8e-5 (稍微提高)"
echo " 预期提升: mIoU +12-18% (累计)"
echo " 最终目标: mIoU 50-55%"
echo ""
echo "=========================================="
echo ""
EPOCH_29_PATH=$(find runs/ -name "epoch_29.pth" -type f | sort -r | head -1)
if [ -z "$EPOCH_29_PATH" ]; then
echo "错误: 找不到epoch_29.pth"
echo "请先完成Phase 3训练"
exit 1
fi
echo "找到Phase 3模型: $EPOCH_29_PATH"
echo ""
echo "开始Phase 4训练最终版本..."
echo ""
LOG_FILE="training_phase4_final_$(date +%Y%m%d_%H%M%S).log"
torchpack dist-run -np 8 python tools/train.py \
configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_enhanced_phase4.yaml \
--model.encoders.camera.backbone.init_cfg.checkpoint pretrained/swint-nuimages-pretrained.pth \
--load_from "$EPOCH_29_PATH" \
--data.workers_per_gpu 0 \
2>&1 | tee "$LOG_FILE"
echo ""
echo "=========================================="
echo "🎉 Phase 4训练完成"
echo "=========================================="
echo ""
echo "✅ 所有增强阶段已完成!"
echo ""
echo "日志: $LOG_FILE"
echo ""
echo "下一步:"
echo " 1. 评估最终性能"
echo " 2. 对比各Phase提升"
echo " 3. 准备部署"
echo ""
echo "=========================================="