bev-project/scripts/start_phase1.sh

61 lines
1.7 KiB
Bash
Raw Normal View History

#!/bin/bash
# 启动Phase 1: ASPP增强训练
# 前提Baseline (epoch_20.pth) 已完成
set -e
export PATH=/opt/conda/bin:$PATH
cd /workspace/bevfusion
echo "=========================================="
echo "Phase 1: ASPP多尺度特征增强"
echo "=========================================="
echo ""
echo "增强内容:"
echo " ✅ ASPP - 多尺度特征提取"
echo " ❌ Dice Loss (Phase 2)"
echo " ❌ Deep Supervision (Phase 3)"
echo " ❌ 高权重 (Phase 4)"
echo ""
echo "训练设置:"
echo " 基础模型: epoch_20.pth"
echo " 目标: epoch_23.pth"
echo " 学习率: 5e-5"
echo " 预期提升: mIoU +2-3%"
echo ""
echo "=========================================="
echo ""
# 查找epoch_19.pth或epoch_20.pth
EPOCH_BASE_PATH=$(find runs/ -name "epoch_19.pth" -type f | sort -r | head -1)
if [ -z "$EPOCH_BASE_PATH" ]; then
echo "错误: 找不到epoch_19.pth或epoch_20.pth"
echo "请先完成Baseline训练"
exit 1
fi
echo "找到Baseline模型: $EPOCH_BASE_PATH"
echo ""
echo "开始Phase 1训练..."
echo ""
# 生成日志文件名
LOG_FILE="training_phase1_$(date +%Y%m%d_%H%M%S).log"
# 启动训练
torchpack dist-run -np 8 python tools/train.py \
configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_enhanced_phase1.yaml \
--model.encoders.camera.backbone.init_cfg.checkpoint pretrained/swint-nuimages-pretrained.pth \
--load_from "$EPOCH_BASE_PATH" \
--data.workers_per_gpu 0 \
2>&1 | tee "$LOG_FILE"
echo ""
echo "=========================================="
echo "Phase 1训练完成"
echo "日志: $LOG_FILE"
echo "下一步: 运行 bash scripts/start_phase2.sh"
echo "=========================================="