bev-project/archive/scripts_old/START_PHASE4A_DIVIDER_ENHAN...

67 lines
2.4 KiB
Bash

#!/bin/bash
# Phase 4A Stage 1 - Divider增强训练启动脚本
# ✨ 特色: Adaptive Multi-Scale Fusion + Divider边界增强 + 增强权重
set -e
echo "══════════════════════════════════════════════════════════"
echo "Phase 4A Stage 1 - Divider增强版训练"
echo "══════════════════════════════════════════════════════════"
echo ""
echo "✨ 增强策略:"
echo " 1. Adaptive Multi-Scale Fusion (每个类别学习最优尺度)"
echo " 2. Divider边界增强模块 (专门针对线性特征)"
echo " 3. 增强Divider权重 (3.0 → 5.0)"
echo ""
# 环境检查
cd /workspace/bevfusion
# 设置环境变量
export PATH=/opt/conda/bin:$PATH
export PYTHONPATH=/workspace/bevfusion:$PYTHONPATH
# 验证环境
/opt/conda/bin/python -c "import torch; print('✅ PyTorch:', torch.__version__)" || exit 1
# 检查checkpoint
if [ ! -f "/workspace/bevfusion/runs/run-326653dc-b7d0a4a4/epoch_8.pth" ]; then
echo "❌ 错误: 未找到epoch_8.pth"
exit 1
fi
echo "✅ 使用checkpoint: epoch_8.pth"
ls -lh /workspace/bevfusion/runs/run-326653dc-b7d0a4a4/epoch_8.pth
# 创建输出目录
mkdir -p /data/runs/phase4a_divider_enhanced
LOG_FILE="/data/runs/phase4a_divider_enhanced/train_$(date +%Y%m%d_%H%M%S).log"
echo ""
echo "=== 启动Divider增强训练 ==="
echo "配置文件: multitask_BEV2X_phase4a_stage1_task_gca.yaml (已启用adaptive_multiscale)"
echo "输出目录: /data/runs/phase4a_divider_enhanced"
echo "日志文件: $LOG_FILE"
echo ""
# 启动训练
nohup torchpack dist-run \
-np 8 \
/opt/conda/bin/python tools/train.py \
configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4a_stage1_task_gca.yaml \
--load_from /workspace/bevfusion/runs/run-326653dc-b7d0a4a4/epoch_8.pth \
--data.samples_per_gpu 1 \
--data.workers_per_gpu 0 \
> "$LOG_FILE" 2>&1 &
TRAIN_PID=$!
echo "✅ 训练已在后台启动 (PID: $TRAIN_PID)"
echo ""
echo "监控命令:"
echo " tail -f $LOG_FILE"
echo " tail -f $LOG_FILE | grep divider"
echo ""
echo "══════════════════════════════════════════════════════════"