#!/bin/bash # Phase 4B 单Batch推理脚本 - 只推理一组数据 # 用于快速验证模型是否正常工作 set -e export PATH=/opt/conda/bin:$PATH export LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH export PYTHONPATH=/workspace/bevfusion:$PYTHONPATH cd /workspace/bevfusion echo "========================================================================" echo "Phase 4B 单Batch推理测试" echo "========================================================================" echo "只推理1个batch的数据 (2个样本)" echo "用于快速验证模型和配置是否正常" echo "========================================================================" echo "" # 创建推理输出目录 INFER_DIR="/data/infer_test/$(date +%Y%m%d_%H%M%S)" mkdir -p "$INFER_DIR" CONFIG="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4b_rmtppad_segmentation.yaml" CHECKPOINT="runs/run-4c8ec7e5-fabdc997/epoch_1.pth" echo "配置文件: $CONFIG" echo "Checkpoint: $CHECKPOINT" echo "输出目录: $INFER_DIR" echo "" # 检查文件存在 if [ ! -f "$CONFIG" ]; then echo "❌ 配置文件不存在: $CONFIG" exit 1 fi if [ ! -f "$CHECKPOINT" ]; then echo "❌ Checkpoint不存在: $CHECKPOINT" exit 1 fi echo "✓ 文件检查通过" echo "" # 单GPU单Batch推理 echo "开始单Batch推理..." echo "只推理1个batch (2个样本)" echo "预计时间: 10-30秒" echo "日志文件: $INFER_DIR/infer_test.log" echo "" torchpack dist-run \ -np 1 \ /opt/conda/bin/python tools/test.py \ "$CONFIG" \ "$CHECKPOINT" \ --out "$INFER_DIR/one_batch_results.pkl" \ --cfg-options data.test.load_interval=6018 data.test.samples_per_gpu=1 data.workers_per_gpu=0 \ 2>&1 | tee "$INFER_DIR/infer_test.log" echo "" echo "========================================================================" echo "单Batch推理完成!" echo "========================================================================" echo "结果文件: $INFER_DIR/one_batch_results.pkl" echo "日志文件: $INFER_DIR/infer_test.log" echo "" # 检查结果文件 if [ -f "$INFER_DIR/one_batch_results.pkl" ]; then echo "✅ 推理成功!结果文件已生成" # 显示结果文件大小 FILE_SIZE=$(du -h "$INFER_DIR/one_batch_results.pkl" | cut -f1) echo "结果文件大小: $FILE_SIZE" # 检查是否有推理输出 /opt/conda/bin/python -c " import pickle import torch try: with open('$INFER_DIR/one_batch_results.pkl', 'rb') as f: results = pickle.load(f) print(f'推理结果数量: {len(results)}') if len(results) > 0: sample = results[0] print(f'第一个样本的keys: {list(sample.keys())}') if 'masks_bev' in sample: print(f'BEV分割形状: {sample[\"masks_bev\"].shape}') if 'boxes_3d' in sample: print(f'3D检测框数量: {len(sample[\"boxes_3d\"])}') print('✅ 结果格式正确!') except Exception as e: print(f'❌ 结果文件读取失败: {e}') " else echo "❌ 推理失败,未生成结果文件" echo "关键错误信息:" grep -E "(ERROR|Error|Exception|KeyError|ImportError|RuntimeError)" "$INFER_DIR/infer_test.log" | tail -5 exit 1 fi echo "" echo "✅ 单Batch推理测试完成!模型配置正确" echo "========================================================================"