97 lines
3.2 KiB
Bash
97 lines
3.2 KiB
Bash
|
|
#!/bin/bash
|
|||
|
|
# Phase 4B RMT-PPAD Epoch 1 快速评估脚本 (10x降采样)
|
|||
|
|
# 使用原始配置,不进行数据增强,评估602个样本
|
|||
|
|
|
|||
|
|
set -e
|
|||
|
|
|
|||
|
|
export PATH=/opt/conda/bin:$PATH
|
|||
|
|
export LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
|||
|
|
export PYTHONPATH=/workspace/bevfusion:$PYTHONPATH
|
|||
|
|
|
|||
|
|
cd /workspace/bevfusion
|
|||
|
|
|
|||
|
|
echo "========================================================================"
|
|||
|
|
echo "Phase 4B RMT-PPAD Epoch 1 快速评估 (全量评估)"
|
|||
|
|
echo "========================================================================"
|
|||
|
|
echo "使用: 原始val_pipeline配置"
|
|||
|
|
echo "数据增强: 已关闭 (is_train=false)"
|
|||
|
|
echo "数据集: 6019样本 (全量评估,优化内存)"
|
|||
|
|
echo "GPU配置: 4 GPU × 8样本/GPU (深度优化内存)"
|
|||
|
|
echo "Checkpoint: runs/run-4c8ec7e5-fabdc997/epoch_1.pth"
|
|||
|
|
echo "========================================================================"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
# 创建评估输出目录
|
|||
|
|
EVAL_DIR="/data/eval_fast/epoch1_fast_$(date +%Y%m%d_%H%M%S)"
|
|||
|
|
mkdir -p "$EVAL_DIR"
|
|||
|
|
|
|||
|
|
CONFIG="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4b_rmtppad_segmentation.yaml"
|
|||
|
|
CHECKPOINT="runs/run-4c8ec7e5-fabdc997/epoch_1.pth"
|
|||
|
|
|
|||
|
|
echo "配置文件: $CONFIG"
|
|||
|
|
echo "Checkpoint: $CHECKPOINT"
|
|||
|
|
echo "输出目录: $EVAL_DIR"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
# 检查文件存在
|
|||
|
|
if [ ! -f "$CONFIG" ]; then
|
|||
|
|
echo "❌ 配置文件不存在: $CONFIG"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
if [ ! -f "$CHECKPOINT" ]; then
|
|||
|
|
echo "❌ Checkpoint不存在: $CHECKPOINT"
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
echo "✓ 文件检查通过"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
# 使用torchpack分布式运行评估
|
|||
|
|
echo "开始评估..."
|
|||
|
|
echo "测试模式: 使用val_pipeline (数据增强已关闭)"
|
|||
|
|
echo "数据集: 6019样本 (全量评估,优化内存)"
|
|||
|
|
echo "GPU配置: 4 GPU × 8样本/GPU (深度优化内存)"
|
|||
|
|
echo "预计时间: 25-35分钟"
|
|||
|
|
echo "日志文件: $EVAL_DIR/eval_fast.log"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
torchpack dist-run \
|
|||
|
|
-np 1 \
|
|||
|
|
/opt/conda/bin/python tools/test.py \
|
|||
|
|
"$CONFIG" \
|
|||
|
|
"$CHECKPOINT" \
|
|||
|
|
--eval bbox map \
|
|||
|
|
--out "$EVAL_DIR/fast_results.pkl" \
|
|||
|
|
--cfg-options data.val.samples_per_gpu=1 data.workers_per_gpu=0 \
|
|||
|
|
2>&1 | tee "$EVAL_DIR/eval_fast.log"
|
|||
|
|
|
|||
|
|
echo ""
|
|||
|
|
echo "========================================================================"
|
|||
|
|
echo "评估完成!"
|
|||
|
|
echo "========================================================================"
|
|||
|
|
echo "结果文件: $EVAL_DIR/fast_results.pkl"
|
|||
|
|
echo "日志文件: $EVAL_DIR/eval_fast.log"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
# 检查是否成功完成
|
|||
|
|
if grep -q "Evaluation results" "$EVAL_DIR/eval_fast.log"; then
|
|||
|
|
echo "✅ 评估成功!配置正确"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
echo "========================================================================"
|
|||
|
|
echo "关键指标 (Epoch 1):"
|
|||
|
|
echo "========================================================================"
|
|||
|
|
grep -E "(NDS|mAP|mIoU|Car|Pedestrian|Divider Dice)" "$EVAL_DIR/eval_fast.log" | grep -v "UserWarning" | tail -10
|
|||
|
|
|
|||
|
|
else
|
|||
|
|
echo "❌ 评估失败,检查日志文件"
|
|||
|
|
echo "关键错误信息:"
|
|||
|
|
grep -E "(ERROR|Error|Exception|KeyError|ImportError)" "$EVAL_DIR/eval_fast.log" | tail -10
|
|||
|
|
exit 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
echo ""
|
|||
|
|
echo "✅ 验证通过!现在可以安全启动训练了"
|
|||
|
|
echo "========================================================================"
|