bev-project/INFER_ONE_BATCH.sh

109 lines
3.4 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Phase 4B 单Batch推理脚本 - 只推理一组数据
# 用于快速验证模型是否正常工作
set -e
export PATH=/opt/conda/bin:$PATH
export LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PYTHONPATH=/workspace/bevfusion:$PYTHONPATH
cd /workspace/bevfusion
echo "========================================================================"
echo "Phase 4B 单Batch推理测试"
echo "========================================================================"
echo "只推理1个batch的数据 (2个样本)"
echo "用于快速验证模型和配置是否正常"
echo "========================================================================"
echo ""
# 创建推理输出目录
INFER_DIR="/data/infer_test/$(date +%Y%m%d_%H%M%S)"
mkdir -p "$INFER_DIR"
CONFIG="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4b_rmtppad_segmentation.yaml"
CHECKPOINT="runs/run-4c8ec7e5-fabdc997/epoch_1.pth"
echo "配置文件: $CONFIG"
echo "Checkpoint: $CHECKPOINT"
echo "输出目录: $INFER_DIR"
echo ""
# 检查文件存在
if [ ! -f "$CONFIG" ]; then
echo "❌ 配置文件不存在: $CONFIG"
exit 1
fi
if [ ! -f "$CHECKPOINT" ]; then
echo "❌ Checkpoint不存在: $CHECKPOINT"
exit 1
fi
echo "✓ 文件检查通过"
echo ""
# 单GPU单Batch推理
echo "开始单Batch推理..."
echo "只推理1个batch (2个样本)"
echo "预计时间: 10-30秒"
echo "日志文件: $INFER_DIR/infer_test.log"
echo ""
torchpack dist-run \
-np 1 \
/opt/conda/bin/python tools/test.py \
"$CONFIG" \
"$CHECKPOINT" \
--out "$INFER_DIR/one_batch_results.pkl" \
--cfg-options data.test.load_interval=6018 data.test.samples_per_gpu=1 data.workers_per_gpu=0 \
2>&1 | tee "$INFER_DIR/infer_test.log"
echo ""
echo "========================================================================"
echo "单Batch推理完成"
echo "========================================================================"
echo "结果文件: $INFER_DIR/one_batch_results.pkl"
echo "日志文件: $INFER_DIR/infer_test.log"
echo ""
# 检查结果文件
if [ -f "$INFER_DIR/one_batch_results.pkl" ]; then
echo "✅ 推理成功!结果文件已生成"
# 显示结果文件大小
FILE_SIZE=$(du -h "$INFER_DIR/one_batch_results.pkl" | cut -f1)
echo "结果文件大小: $FILE_SIZE"
# 检查是否有推理输出
/opt/conda/bin/python -c "
import pickle
import torch
try:
with open('$INFER_DIR/one_batch_results.pkl', 'rb') as f:
results = pickle.load(f)
print(f'推理结果数量: {len(results)}')
if len(results) > 0:
sample = results[0]
print(f'第一个样本的keys: {list(sample.keys())}')
if 'masks_bev' in sample:
print(f'BEV分割形状: {sample[\"masks_bev\"].shape}')
if 'boxes_3d' in sample:
print(f'3D检测框数量: {len(sample[\"boxes_3d\"])}')
print('✅ 结果格式正确!')
except Exception as e:
print(f'❌ 结果文件读取失败: {e}')
"
else
echo "❌ 推理失败,未生成结果文件"
echo "关键错误信息:"
grep -E "(ERROR|Error|Exception|KeyError|ImportError|RuntimeError)" "$INFER_DIR/infer_test.log" | tail -5
exit 1
fi
echo ""
echo "✅ 单Batch推理测试完成模型配置正确"
echo "========================================================================"