bev-project/eval_in_new_docker.sh

86 lines
2.6 KiB
Bash
Raw Normal View History

#!/bin/bash
# Epoch 23评估脚本 - 用于新Docker容器
set -e
export PATH=/opt/conda/bin:$PATH
export LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PYTHONPATH=/workspace/bevfusion:$PYTHONPATH
cd /workspace/bevfusion
echo "========================================================================"
echo "Phase 3 Epoch 23 评估 (新Docker容器)"
echo "========================================================================"
echo "Checkpoint: epoch_23.pth"
echo "配置: Phase 3 Enhanced BEV Segmentation"
echo "========================================================================"
echo ""
# 创建评估输出目录
EVAL_DIR="eval_results/epoch23_new_docker_$(date +%Y%m%d_%H%M%S)"
mkdir -p "$EVAL_DIR"
CONFIG="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_enhanced_phase1_HIGHRES.yaml"
CHECKPOINT="runs/enhanced_from_epoch19/epoch_23.pth"
echo "配置文件: $CONFIG"
echo "Checkpoint: $CHECKPOINT"
echo "输出目录: $EVAL_DIR"
echo ""
# 检查文件存在
if [ ! -f "$CONFIG" ]; then
echo "❌ 配置文件不存在: $CONFIG"
exit 1
fi
if [ ! -f "$CHECKPOINT" ]; then
echo "❌ Checkpoint不存在: $CHECKPOINT"
exit 1
fi
echo "✓ 文件检查通过"
echo ""
# 检查GPU
GPU_COUNT=$(python -c "import torch; print(torch.cuda.device_count())")
echo "可用GPU数量: $GPU_COUNT"
echo ""
echo "开始评估..."
echo "预计时间: 2-3小时"
echo "日志文件: $EVAL_DIR/eval.log"
echo ""
# 使用所有可用GPU进行分布式评估
/opt/conda/bin/torchpack dist-run -np $GPU_COUNT /opt/conda/bin/python tools/test.py \
"$CONFIG" \
"$CHECKPOINT" \
--eval bbox \
--out "$EVAL_DIR/results.pkl" \
--cfg-options data.workers_per_gpu=4 data.samples_per_gpu=1 \
2>&1 | tee "$EVAL_DIR/eval.log"
echo ""
echo "========================================================================"
echo "评估完成!"
echo "========================================================================"
echo "结果文件: $EVAL_DIR/results.pkl"
echo "日志文件: $EVAL_DIR/eval.log"
echo ""
echo "========================================================================"
echo "性能指标摘要:"
echo "========================================================================"
grep -E "(NDS|mAP|mIoU|Car|Pedestrian)" "$EVAL_DIR/eval.log" | grep -v "UserWarning" | tail -50
echo ""
echo "详细结果请查看: $EVAL_DIR/eval.log"
echo ""
echo "结果已保存在共享目录训练Docker也可以访问"
echo "========================================================================"