bev-project/archive_scripts/EVAL_EPOCH23_COMPLETE.sh

176 lines
5.4 KiB
Bash
Raw Normal View History

#!/bin/bash
# Epoch 23完整评估脚本 - 检测+分割
# 使用GPU 4-7不影响Stage 1训练GPU 0-3
set -e
export PATH=/opt/conda/bin:$PATH
export LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PYTHONPATH=/workspace/bevfusion:$PYTHONPATH
cd /workspace/bevfusion
echo "========================================================================"
echo "Epoch 23 完整评估 (GPU 4-7, 不影响训练)"
echo "========================================================================"
echo ""
echo "启动时间: $(date)"
echo ""
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
EVAL_DIR="eval_results/epoch23_complete_${TIMESTAMP}"
mkdir -p "$EVAL_DIR"
CONFIG="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_enhanced_phase1_HIGHRES.yaml"
CHECKPOINT="runs/enhanced_from_epoch19/epoch_23.pth"
echo "配置文件: $CONFIG"
echo "Checkpoint: $CHECKPOINT (516MB)"
echo "输出目录: $EVAL_DIR"
echo "使用GPU: 4-7 (避开训练GPU 0-3)"
echo ""
# 检查文件是否存在
if [ ! -f "$CHECKPOINT" ]; then
echo "错误: Checkpoint文件不存在: $CHECKPOINT"
exit 1
fi
if [ ! -f "$CONFIG" ]; then
echo "错误: 配置文件不存在: $CONFIG"
exit 1
fi
# 阶段1: 3D检测评估
echo "========== 阶段1: 3D目标检测评估 =========="
echo "预计时间: 45-60分钟"
echo ""
CUDA_VISIBLE_DEVICES=4,5,6,7 \
LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH \
PATH=/opt/conda/bin:$PATH \
/opt/conda/bin/torchpack dist-run -np 4 /opt/conda/bin/python tools/test.py \
"$CONFIG" \
"$CHECKPOINT" \
--eval bbox \
--out "$EVAL_DIR/detection_results.pkl" \
--cfg-options data.workers_per_gpu=0 \
2>&1 | tee "$EVAL_DIR/detection_eval.log"
echo ""
echo "检测评估完成!"
echo ""
# 阶段2: BEV分割评估
echo "========== 阶段2: BEV分割评估 =========="
echo "预计时间: 30-45分钟"
echo ""
CUDA_VISIBLE_DEVICES=4,5,6,7 \
LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH \
PATH=/opt/conda/bin:$PATH \
/opt/conda/bin/torchpack dist-run -np 4 /opt/conda/bin/python tools/test.py \
"$CONFIG" \
"$CHECKPOINT" \
--eval map \
--out "$EVAL_DIR/segmentation_results.pkl" \
--cfg-options data.workers_per_gpu=0 \
2>&1 | tee "$EVAL_DIR/segmentation_eval.log"
echo ""
echo "分割评估完成!"
echo ""
# 阶段3: 综合评估
echo "========== 阶段3: 综合评估(检测+分割) =========="
echo "预计时间: 60-90分钟"
echo ""
CUDA_VISIBLE_DEVICES=4,5,6,7 \
LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH \
PATH=/opt/conda/bin:$PATH \
/opt/conda/bin/torchpack dist-run -np 4 /opt/conda/bin/python tools/test.py \
"$CONFIG" \
"$CHECKPOINT" \
--eval bbox map \
--out "$EVAL_DIR/complete_results.pkl" \
--cfg-options data.workers_per_gpu=0 \
2>&1 | tee "$EVAL_DIR/complete_eval.log"
echo ""
echo "综合评估完成!"
echo ""
# 生成报告摘要
echo "========================================================================"
echo "生成评估报告摘要..."
echo "========================================================================"
cat > "$EVAL_DIR/SUMMARY.txt" << 'SUMMARY_EOF'
======================================================================
Epoch 23 评估报告摘要
======================================================================
生成时间: $(date)
Checkpoint: epoch_23.pth
配置: multitask_enhanced_phase1_HIGHRES.yaml
--- 3D检测结果 ---
SUMMARY_EOF
# 提取检测指标
grep -E "(NDS|mAP):" "$EVAL_DIR/detection_eval.log" | tail -20 >> "$EVAL_DIR/SUMMARY.txt" 2>/dev/null || echo " (待提取)" >> "$EVAL_DIR/SUMMARY.txt"
cat >> "$EVAL_DIR/SUMMARY.txt" << 'SUMMARY_EOF'
--- BEV分割结果 ---
SUMMARY_EOF
# 提取分割指标
grep -E "(mIoU|IoU):" "$EVAL_DIR/segmentation_eval.log" | tail -20 >> "$EVAL_DIR/SUMMARY.txt" 2>/dev/null || echo " (待提取)" >> "$EVAL_DIR/SUMMARY.txt"
cat >> "$EVAL_DIR/SUMMARY.txt" << 'SUMMARY_EOF'
--- 综合评估结果 ---
SUMMARY_EOF
# 提取综合指标
grep -E "(NDS|mAP|mIoU):" "$EVAL_DIR/complete_eval.log" | tail -30 >> "$EVAL_DIR/SUMMARY.txt" 2>/dev/null || echo " (待提取)" >> "$EVAL_DIR/SUMMARY.txt"
cat >> "$EVAL_DIR/SUMMARY.txt" << 'SUMMARY_EOF'
======================================================================
完整日志位置:
- 检测: detection_eval.log
- 分割: segmentation_eval.log
- 综合: complete_eval.log
结果文件:
- detection_results.pkl
- segmentation_results.pkl
- complete_results.pkl
======================================================================
SUMMARY_EOF
# 显示摘要
echo ""
cat "$EVAL_DIR/SUMMARY.txt"
echo ""
echo "========================================================================"
echo "全部评估完成!"
echo "========================================================================"
echo "完成时间: $(date)"
echo "输出目录: $EVAL_DIR"
echo ""
echo "查看详细结果:"
echo " cat $EVAL_DIR/SUMMARY.txt"
echo " cat $EVAL_DIR/detection_eval.log | grep -A 20 'Evaluation'"
echo " cat $EVAL_DIR/segmentation_eval.log | grep -A 20 'Evaluation'"
echo ""
echo "下一步:"
echo " 1. 查看详细性能指标"
echo " 2. 对比Stage 1训练进度"
echo " 3. 识别改进空间"
echo "========================================================================"