#!/bin/bash # Phase 4A: BEV 2x分辨率提升训练 # 参考start_enhanced_training_fixed.sh的成功配置 set -e export PATH=/opt/conda/bin:$PATH export LD_LIBRARY_PATH=/opt/conda/lib:/opt/conda/lib/python3.8/site-packages/torch/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH echo "========================================================================" echo "Phase 4A: BEV 2x分辨率提升训练" echo "========================================================================" echo "配置: BEV 0.15m分辨率 (2倍提升)" echo "Decoder: 4层完整版 [256, 256, 128, 128]" echo "从epoch_23.pth加载所有权重" echo "========================================================================" echo "" echo "关键配置:" echo " - BEV分辨率: 0.3m → 0.15m (720×720)" echo " - GT标签: 0.25m → 0.125m (800×800)" echo " - Decoder: 2层 → 4层" echo " - Deep Supervision: 启用" echo " - Dice Loss: 启用" echo " - GPU数量: 6" echo " - Batch: 1/GPU (显存限制)" echo " - Workers: 4 (参考Phase 3成功配置)" echo "" echo "预期性能提升:" echo " - Stop Line IoU: 0.27 → 0.42+ (+55%)" echo " - Divider IoU: 0.19 → 0.35+ (+84%)" echo " - 整体mIoU: 0.41 → 0.54+ (+32%)" echo "" echo "预计训练时间: 12.5天" echo "========================================================================" echo "" # 创建日志文件 LOG_FILE="phase4a_bev2x_$(date +%Y%m%d_%H%M%S).log" echo "开始训练..." echo "日志文件: $LOG_FILE" echo "" # 启动训练 - 完全参考start_enhanced_training_fixed.sh的格式 /opt/conda/bin/torchpack dist-run -np 6 /opt/conda/bin/python tools/train.py \ configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4a.yaml \ --model.encoders.camera.backbone.init_cfg.checkpoint pretrained/swint-nuimages-pretrained.pth \ --load_from runs/enhanced_from_epoch19/epoch_23.pth \ --data.samples_per_gpu 1 \ --data.workers_per_gpu 4 \ 2>&1 | tee "$LOG_FILE" echo "" echo "========================================================================" echo "训练完成!日志保存在: $LOG_FILE" echo "========================================================================"