bev-project/archive_scripts/start_enhanced_training_fix...

31 lines
1.1 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 启动修复后的增强版训练
# 使用GroupNorm替换BatchNorm解决分布式死锁问题
set -e
export PATH=/opt/conda/bin:$PATH
echo "=================================================="
echo "启动修复后的EnhancedBEVSegmentationHead训练"
echo "修复内容BatchNorm2d → GroupNorm (15处)"
echo "配置Phase1 高分辨率 + ASPP + 注意力机制"
echo "从epoch_19.pth加载backbone和encoder权重"
echo "=================================================="
# 创建日志文件
LOG_FILE="training_enhanced_fixed_$(date +%Y%m%d_%H%M%S).log"
# 启动训练
/opt/conda/bin/torchpack dist-run -np 8 /opt/conda/bin/python tools/train.py \
configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_enhanced_phase1_HIGHRES.yaml \
--model.encoders.camera.backbone.init_cfg.checkpoint pretrained/swint-nuimages-pretrained.pth \
--load_from runs/run-326653dc-74184412/epoch_19.pth \
--data.samples_per_gpu 2 \
--data.workers_per_gpu 4 \
2>&1 | tee "$LOG_FILE"
echo ""
echo "训练完成!日志保存在: $LOG_FILE"