31 lines
1.1 KiB
Bash
Executable File
31 lines
1.1 KiB
Bash
Executable File
#!/bin/bash
|
||
# 启动修复后的增强版训练
|
||
# 使用GroupNorm替换BatchNorm,解决分布式死锁问题
|
||
|
||
set -e
|
||
|
||
export PATH=/opt/conda/bin:$PATH
|
||
|
||
echo "=================================================="
|
||
echo "启动修复后的EnhancedBEVSegmentationHead训练"
|
||
echo "修复内容:BatchNorm2d → GroupNorm (15处)"
|
||
echo "配置:Phase1 高分辨率 + ASPP + 注意力机制"
|
||
echo "从epoch_19.pth加载backbone和encoder权重"
|
||
echo "=================================================="
|
||
|
||
# 创建日志文件
|
||
LOG_FILE="training_enhanced_fixed_$(date +%Y%m%d_%H%M%S).log"
|
||
|
||
# 启动训练
|
||
/opt/conda/bin/torchpack dist-run -np 8 /opt/conda/bin/python tools/train.py \
|
||
configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_enhanced_phase1_HIGHRES.yaml \
|
||
--model.encoders.camera.backbone.init_cfg.checkpoint pretrained/swint-nuimages-pretrained.pth \
|
||
--load_from runs/run-326653dc-74184412/epoch_19.pth \
|
||
--data.samples_per_gpu 2 \
|
||
--data.workers_per_gpu 4 \
|
||
2>&1 | tee "$LOG_FILE"
|
||
|
||
echo ""
|
||
echo "训练完成!日志保存在: $LOG_FILE"
|
||
|