bev-project/EVAL_EPOCH1_SIMPLE.sh

97 lines
3.2 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Phase 4B RMT-PPAD Epoch 1 快速评估脚本 (10x降采样)
# 使用原始配置不进行数据增强评估602个样本
set -e
export PATH=/opt/conda/bin:$PATH
export LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PYTHONPATH=/workspace/bevfusion:$PYTHONPATH
cd /workspace/bevfusion
echo "========================================================================"
echo "Phase 4B RMT-PPAD Epoch 1 快速评估 (全量评估)"
echo "========================================================================"
echo "使用: 原始val_pipeline配置"
echo "数据增强: 已关闭 (is_train=false)"
echo "数据集: 6019样本 (全量评估,优化内存)"
echo "GPU配置: 4 GPU × 8样本/GPU (深度优化内存)"
echo "Checkpoint: runs/run-4c8ec7e5-fabdc997/epoch_1.pth"
echo "========================================================================"
echo ""
# 创建评估输出目录
EVAL_DIR="/data/eval_fast/epoch1_fast_$(date +%Y%m%d_%H%M%S)"
mkdir -p "$EVAL_DIR"
CONFIG="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4b_rmtppad_segmentation.yaml"
CHECKPOINT="runs/run-4c8ec7e5-fabdc997/epoch_1.pth"
echo "配置文件: $CONFIG"
echo "Checkpoint: $CHECKPOINT"
echo "输出目录: $EVAL_DIR"
echo ""
# 检查文件存在
if [ ! -f "$CONFIG" ]; then
echo "❌ 配置文件不存在: $CONFIG"
exit 1
fi
if [ ! -f "$CHECKPOINT" ]; then
echo "❌ Checkpoint不存在: $CHECKPOINT"
exit 1
fi
echo "✓ 文件检查通过"
echo ""
# 使用torchpack分布式运行评估
echo "开始评估..."
echo "测试模式: 使用val_pipeline (数据增强已关闭)"
echo "数据集: 6019样本 (全量评估,优化内存)"
echo "GPU配置: 4 GPU × 8样本/GPU (深度优化内存)"
echo "预计时间: 25-35分钟"
echo "日志文件: $EVAL_DIR/eval_fast.log"
echo ""
torchpack dist-run \
-np 1 \
/opt/conda/bin/python tools/test.py \
"$CONFIG" \
"$CHECKPOINT" \
--eval bbox map \
--out "$EVAL_DIR/fast_results.pkl" \
--cfg-options data.val.samples_per_gpu=1 data.workers_per_gpu=0 \
2>&1 | tee "$EVAL_DIR/eval_fast.log"
echo ""
echo "========================================================================"
echo "评估完成!"
echo "========================================================================"
echo "结果文件: $EVAL_DIR/fast_results.pkl"
echo "日志文件: $EVAL_DIR/eval_fast.log"
echo ""
# 检查是否成功完成
if grep -q "Evaluation results" "$EVAL_DIR/eval_fast.log"; then
echo "✅ 评估成功!配置正确"
echo ""
echo "========================================================================"
echo "关键指标 (Epoch 1):"
echo "========================================================================"
grep -E "(NDS|mAP|mIoU|Car|Pedestrian|Divider Dice)" "$EVAL_DIR/eval_fast.log" | grep -v "UserWarning" | tail -10
else
echo "❌ 评估失败,检查日志文件"
echo "关键错误信息:"
grep -E "(ERROR|Error|Exception|KeyError|ImportError)" "$EVAL_DIR/eval_fast.log" | tail -10
exit 1
fi
echo ""
echo "✅ 验证通过!现在可以安全启动训练了"
echo "========================================================================"