109 lines
3.4 KiB
Bash
Executable File
109 lines
3.4 KiB
Bash
Executable File
#!/bin/bash
|
||
# Phase 4B 单Batch推理脚本 - 只推理一组数据
|
||
# 用于快速验证模型是否正常工作
|
||
|
||
set -e
|
||
|
||
export PATH=/opt/conda/bin:$PATH
|
||
export LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
||
export PYTHONPATH=/workspace/bevfusion:$PYTHONPATH
|
||
|
||
cd /workspace/bevfusion
|
||
|
||
echo "========================================================================"
|
||
echo "Phase 4B 单Batch推理测试"
|
||
echo "========================================================================"
|
||
echo "只推理1个batch的数据 (2个样本)"
|
||
echo "用于快速验证模型和配置是否正常"
|
||
echo "========================================================================"
|
||
echo ""
|
||
|
||
# 创建推理输出目录
|
||
INFER_DIR="/data/infer_test/$(date +%Y%m%d_%H%M%S)"
|
||
mkdir -p "$INFER_DIR"
|
||
|
||
CONFIG="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4b_rmtppad_segmentation.yaml"
|
||
CHECKPOINT="runs/run-4c8ec7e5-fabdc997/epoch_1.pth"
|
||
|
||
echo "配置文件: $CONFIG"
|
||
echo "Checkpoint: $CHECKPOINT"
|
||
echo "输出目录: $INFER_DIR"
|
||
echo ""
|
||
|
||
# 检查文件存在
|
||
if [ ! -f "$CONFIG" ]; then
|
||
echo "❌ 配置文件不存在: $CONFIG"
|
||
exit 1
|
||
fi
|
||
|
||
if [ ! -f "$CHECKPOINT" ]; then
|
||
echo "❌ Checkpoint不存在: $CHECKPOINT"
|
||
exit 1
|
||
fi
|
||
|
||
echo "✓ 文件检查通过"
|
||
echo ""
|
||
|
||
# 单GPU单Batch推理
|
||
echo "开始单Batch推理..."
|
||
echo "只推理1个batch (2个样本)"
|
||
echo "预计时间: 10-30秒"
|
||
echo "日志文件: $INFER_DIR/infer_test.log"
|
||
echo ""
|
||
|
||
torchpack dist-run \
|
||
-np 1 \
|
||
/opt/conda/bin/python tools/test.py \
|
||
"$CONFIG" \
|
||
"$CHECKPOINT" \
|
||
--out "$INFER_DIR/one_batch_results.pkl" \
|
||
--cfg-options data.test.load_interval=6018 data.test.samples_per_gpu=1 data.workers_per_gpu=0 \
|
||
2>&1 | tee "$INFER_DIR/infer_test.log"
|
||
|
||
echo ""
|
||
echo "========================================================================"
|
||
echo "单Batch推理完成!"
|
||
echo "========================================================================"
|
||
echo "结果文件: $INFER_DIR/one_batch_results.pkl"
|
||
echo "日志文件: $INFER_DIR/infer_test.log"
|
||
echo ""
|
||
|
||
# 检查结果文件
|
||
if [ -f "$INFER_DIR/one_batch_results.pkl" ]; then
|
||
echo "✅ 推理成功!结果文件已生成"
|
||
|
||
# 显示结果文件大小
|
||
FILE_SIZE=$(du -h "$INFER_DIR/one_batch_results.pkl" | cut -f1)
|
||
echo "结果文件大小: $FILE_SIZE"
|
||
|
||
# 检查是否有推理输出
|
||
/opt/conda/bin/python -c "
|
||
import pickle
|
||
import torch
|
||
try:
|
||
with open('$INFER_DIR/one_batch_results.pkl', 'rb') as f:
|
||
results = pickle.load(f)
|
||
print(f'推理结果数量: {len(results)}')
|
||
if len(results) > 0:
|
||
sample = results[0]
|
||
print(f'第一个样本的keys: {list(sample.keys())}')
|
||
if 'masks_bev' in sample:
|
||
print(f'BEV分割形状: {sample[\"masks_bev\"].shape}')
|
||
if 'boxes_3d' in sample:
|
||
print(f'3D检测框数量: {len(sample[\"boxes_3d\"])}')
|
||
print('✅ 结果格式正确!')
|
||
except Exception as e:
|
||
print(f'❌ 结果文件读取失败: {e}')
|
||
"
|
||
|
||
else
|
||
echo "❌ 推理失败,未生成结果文件"
|
||
echo "关键错误信息:"
|
||
grep -E "(ERROR|Error|Exception|KeyError|ImportError|RuntimeError)" "$INFER_DIR/infer_test.log" | tail -5
|
||
exit 1
|
||
fi
|
||
|
||
echo ""
|
||
echo "✅ 单Batch推理测试完成!模型配置正确"
|
||
echo "========================================================================"
|