bev-project/EVAL_EPOCH1_SIMPLE.sh

97 lines
3.2 KiB
Bash
Raw Normal View History

2025-11-21 10:50:51 +08:00
#!/bin/bash
# Phase 4B RMT-PPAD Epoch 1 快速评估脚本 (10x降采样)
# 使用原始配置不进行数据增强评估602个样本
set -e
export PATH=/opt/conda/bin:$PATH
export LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PYTHONPATH=/workspace/bevfusion:$PYTHONPATH
cd /workspace/bevfusion
echo "========================================================================"
echo "Phase 4B RMT-PPAD Epoch 1 快速评估 (全量评估)"
echo "========================================================================"
echo "使用: 原始val_pipeline配置"
echo "数据增强: 已关闭 (is_train=false)"
echo "数据集: 6019样本 (全量评估,优化内存)"
echo "GPU配置: 4 GPU × 8样本/GPU (深度优化内存)"
echo "Checkpoint: runs/run-4c8ec7e5-fabdc997/epoch_1.pth"
echo "========================================================================"
echo ""
# 创建评估输出目录
EVAL_DIR="/data/eval_fast/epoch1_fast_$(date +%Y%m%d_%H%M%S)"
mkdir -p "$EVAL_DIR"
CONFIG="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4b_rmtppad_segmentation.yaml"
CHECKPOINT="runs/run-4c8ec7e5-fabdc997/epoch_1.pth"
echo "配置文件: $CONFIG"
echo "Checkpoint: $CHECKPOINT"
echo "输出目录: $EVAL_DIR"
echo ""
# 检查文件存在
if [ ! -f "$CONFIG" ]; then
echo "❌ 配置文件不存在: $CONFIG"
exit 1
fi
if [ ! -f "$CHECKPOINT" ]; then
echo "❌ Checkpoint不存在: $CHECKPOINT"
exit 1
fi
echo "✓ 文件检查通过"
echo ""
# 使用torchpack分布式运行评估
echo "开始评估..."
echo "测试模式: 使用val_pipeline (数据增强已关闭)"
echo "数据集: 6019样本 (全量评估,优化内存)"
echo "GPU配置: 4 GPU × 8样本/GPU (深度优化内存)"
echo "预计时间: 25-35分钟"
echo "日志文件: $EVAL_DIR/eval_fast.log"
echo ""
torchpack dist-run \
-np 1 \
/opt/conda/bin/python tools/test.py \
"$CONFIG" \
"$CHECKPOINT" \
--eval bbox map \
--out "$EVAL_DIR/fast_results.pkl" \
--cfg-options data.val.samples_per_gpu=1 data.workers_per_gpu=0 \
2>&1 | tee "$EVAL_DIR/eval_fast.log"
echo ""
echo "========================================================================"
echo "评估完成!"
echo "========================================================================"
echo "结果文件: $EVAL_DIR/fast_results.pkl"
echo "日志文件: $EVAL_DIR/eval_fast.log"
echo ""
# 检查是否成功完成
if grep -q "Evaluation results" "$EVAL_DIR/eval_fast.log"; then
echo "✅ 评估成功!配置正确"
echo ""
echo "========================================================================"
echo "关键指标 (Epoch 1):"
echo "========================================================================"
grep -E "(NDS|mAP|mIoU|Car|Pedestrian|Divider Dice)" "$EVAL_DIR/eval_fast.log" | grep -v "UserWarning" | tail -10
else
echo "❌ 评估失败,检查日志文件"
echo "关键错误信息:"
grep -E "(ERROR|Error|Exception|KeyError|ImportError)" "$EVAL_DIR/eval_fast.log" | tail -10
exit 1
fi
echo ""
echo "✅ 验证通过!现在可以安全启动训练了"
echo "========================================================================"