bev-project/scripts/training/start_phase1.sh

61 lines
1.7 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 启动Phase 1: ASPP增强训练
# 前提Baseline (epoch_20.pth) 已完成
set -e
export PATH=/opt/conda/bin:$PATH
cd /workspace/bevfusion
echo "=========================================="
echo "Phase 1: ASPP多尺度特征增强"
echo "=========================================="
echo ""
echo "增强内容:"
echo " ✅ ASPP - 多尺度特征提取"
echo " ❌ Dice Loss (Phase 2)"
echo " ❌ Deep Supervision (Phase 3)"
echo " ❌ 高权重 (Phase 4)"
echo ""
echo "训练设置:"
echo " 基础模型: epoch_20.pth"
echo " 目标: epoch_23.pth"
echo " 学习率: 5e-5"
echo " 预期提升: mIoU +2-3%"
echo ""
echo "=========================================="
echo ""
# 查找epoch_19.pth或epoch_20.pth
EPOCH_BASE_PATH=$(find runs/ -name "epoch_19.pth" -type f | sort -r | head -1)
if [ -z "$EPOCH_BASE_PATH" ]; then
echo "错误: 找不到epoch_19.pth或epoch_20.pth"
echo "请先完成Baseline训练"
exit 1
fi
echo "找到Baseline模型: $EPOCH_BASE_PATH"
echo ""
echo "开始Phase 1训练..."
echo ""
# 生成日志文件名
LOG_FILE="training_phase1_$(date +%Y%m%d_%H%M%S).log"
# 启动训练
torchpack dist-run -np 8 python tools/train.py \
configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_enhanced_phase1.yaml \
--model.encoders.camera.backbone.init_cfg.checkpoint pretrained/swint-nuimages-pretrained.pth \
--load_from "$EPOCH_BASE_PATH" \
--data.workers_per_gpu 0 \
2>&1 | tee "$LOG_FILE"
echo ""
echo "=========================================="
echo "Phase 1训练完成"
echo "日志: $LOG_FILE"
echo "下一步: 运行 bash scripts/start_phase2.sh"
echo "=========================================="