#!/bin/bash # Phase 4B RMT-PPAD Epoch 1 快速评估脚本 (10x降采样) # 使用原始配置,不进行数据增强,评估602个样本 set -e export PATH=/opt/conda/bin:$PATH export LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH export PYTHONPATH=/workspace/bevfusion:$PYTHONPATH cd /workspace/bevfusion echo "========================================================================" echo "Phase 4B RMT-PPAD Epoch 1 快速评估 (全量评估)" echo "========================================================================" echo "使用: 原始val_pipeline配置" echo "数据增强: 已关闭 (is_train=false)" echo "数据集: 6019样本 (全量评估,优化内存)" echo "GPU配置: 4 GPU × 8样本/GPU (深度优化内存)" echo "Checkpoint: runs/run-4c8ec7e5-fabdc997/epoch_1.pth" echo "========================================================================" echo "" # 创建评估输出目录 EVAL_DIR="/data/eval_fast/epoch1_fast_$(date +%Y%m%d_%H%M%S)" mkdir -p "$EVAL_DIR" CONFIG="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4b_rmtppad_segmentation.yaml" CHECKPOINT="runs/run-4c8ec7e5-fabdc997/epoch_1.pth" echo "配置文件: $CONFIG" echo "Checkpoint: $CHECKPOINT" echo "输出目录: $EVAL_DIR" echo "" # 检查文件存在 if [ ! -f "$CONFIG" ]; then echo "❌ 配置文件不存在: $CONFIG" exit 1 fi if [ ! -f "$CHECKPOINT" ]; then echo "❌ Checkpoint不存在: $CHECKPOINT" exit 1 fi echo "✓ 文件检查通过" echo "" # 使用torchpack分布式运行评估 echo "开始评估..." echo "测试模式: 使用val_pipeline (数据增强已关闭)" echo "数据集: 6019样本 (全量评估,优化内存)" echo "GPU配置: 4 GPU × 8样本/GPU (深度优化内存)" echo "预计时间: 25-35分钟" echo "日志文件: $EVAL_DIR/eval_fast.log" echo "" torchpack dist-run \ -np 1 \ /opt/conda/bin/python tools/test.py \ "$CONFIG" \ "$CHECKPOINT" \ --eval bbox map \ --out "$EVAL_DIR/fast_results.pkl" \ --cfg-options data.val.samples_per_gpu=1 data.workers_per_gpu=0 \ 2>&1 | tee "$EVAL_DIR/eval_fast.log" echo "" echo "========================================================================" echo "评估完成!" echo "========================================================================" echo "结果文件: $EVAL_DIR/fast_results.pkl" echo "日志文件: $EVAL_DIR/eval_fast.log" echo "" # 检查是否成功完成 if grep -q "Evaluation results" "$EVAL_DIR/eval_fast.log"; then echo "✅ 评估成功!配置正确" echo "" echo "========================================================================" echo "关键指标 (Epoch 1):" echo "========================================================================" grep -E "(NDS|mAP|mIoU|Car|Pedestrian|Divider Dice)" "$EVAL_DIR/eval_fast.log" | grep -v "UserWarning" | tail -10 else echo "❌ 评估失败,检查日志文件" echo "关键错误信息:" grep -E "(ERROR|Error|Exception|KeyError|ImportError)" "$EVAL_DIR/eval_fast.log" | tail -10 exit 1 fi echo "" echo "✅ 验证通过!现在可以安全启动训练了" echo "========================================================================"