#!/bin/bash # ๐Ÿš€๐Ÿš€๐Ÿš€ BEVFusion Phase 4B: RMT-PPAD Segmentation Training (Resume) ๐Ÿš€๐Ÿš€๐Ÿš€ # ไปŽๆœ€ๆ–ฐ็š„checkpointๆขๅค่ฎญ็ปƒ - ๅŒ…ๅซๆ•ฐๅ€ผ็จณๅฎšๆ€งไฟฎๅค #!/bin/bash # BEVFusion Phase 4B: RMT-PPAD Segmentation Integration # ้›†ๆˆRMT-PPAD็š„Transformerๅˆ†ๅ‰ฒ่งฃ็ ๅ™จ # ่„šๆœฌ้…็ฝฎ CONFIG_FILE="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4b_rmtppad_segmentation.yaml" WORK_DIR="runs/phase4b_rmtppad_segmentation" GPUS=8 PORT=29501 echo "๐Ÿš€ Starting BEVFusion Phase 4B: RMT-PPAD Segmentation Integration" echo "๐Ÿ“ Config: $CONFIG_FILE" echo "๐Ÿ’พ Work Dir: $WORK_DIR" echo "๐ŸŽฎ GPUs: $GPUS" echo "๐Ÿ”Œ Port: $PORT" echo "" # ๅˆ›ๅปบๅทฅไฝœ็›ฎๅฝ• mkdir -p $WORK_DIR # โœ… ๅ…ณ้”ฎ: ่ฎพ็ฝฎ็Žฏๅขƒๅ˜้‡ (ๅ‚่€ƒๆˆๅŠŸ่„šๆœฌ) export PATH=/opt/conda/bin:$PATH export LD_LIBRARY_PATH=/opt/conda/lib/python3.8/site-packages/torch/lib:/opt/conda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH export PYTHONPATH=/workspace/bevfusion:$PYTHONPATH #export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:64 # ่ฎพ็ฝฎGPU export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 # ้ชŒ่ฏ็Žฏๅขƒ echo "" echo "=== ็Žฏๅขƒ้ชŒ่ฏ ===" /opt/conda/bin/python -c "import torch; print('โœ… PyTorch:', torch.__version__)" || { echo "โŒ PyTorchๅฏผๅ…ฅๅคฑ่ดฅ" exit 1 } /opt/conda/bin/python -c "import mmcv; print('โœ… mmcv:', mmcv.__version__)" || { echo "โŒ mmcvๅฏผๅ…ฅๅคฑ่ดฅ" exit 1 } which torchpack || { echo "โŒ torchpackๆœชๆ‰พๅˆฐ" exit 1 } echo "โœ… torchpack: $(which torchpack)" # ๐Ÿ”„๐Ÿ”„๐Ÿ”„ Resume่ฎญ็ปƒ๏ผšๅŠ ่ฝฝๆœ€ๆ–ฐ็š„ๅฎŒๆ•ดcheckpoint ๐Ÿ”„๐Ÿ”„๐Ÿ”„ echo "" echo "=== Checkpointๆฃ€ๆŸฅ (Resume่ฎญ็ปƒ) ===" LOAD_CKPT="/workspace/bevfusion/runs/run-4c8ec7e5-f3215f6d/epoch_1.pth" if [ -f "$LOAD_CKPT" ]; then # Resume่ฎญ็ปƒ๏ผšๅŠ ่ฝฝๅฎŒๆ•ดcheckpoint๏ผŒๅŒ…ๆ‹ฌๆ‰€ๆœ‰ๆจกๅ— LOAD_FROM="--load-from $LOAD_CKPT" echo "๐Ÿ”„ Resume่ฎญ็ปƒ๏ผšๅŠ ่ฝฝๅฎŒๆ•ดcheckpoint" echo "๐Ÿ“„ ๅŠ ่ฝฝ: ๆ‰€ๆœ‰ๆจกๅ— (้ชจๅนฒ็ฝ‘็ปœ + BEV็‰นๅพ + ๆฃ€ๆต‹ๅคด + ๅˆ†ๅ‰ฒๅคด)" echo "๐Ÿ“„ ็›ฎ็š„: ไปŽEpoch 3็ปง็ปญ่ฎญ็ปƒ" echo "๐Ÿ“„ Checkpoint: $LOAD_CKPT" echo "๐Ÿ“Š Checkpoint่ฏฆๆƒ…:" ls -lh "$LOAD_CKPT" # ๆฃ€ๆŸฅcheckpointๆ—ถ้—ดๆˆณ if [[ "$LOAD_CKPT" =~ epoch_([0-9]+) ]]; then EPOCH_NUM="${BASH_REMATCH[1]}" echo "๐Ÿ“ˆ Checkpointๆฅ่‡ชEpoch: $EPOCH_NUM" fi echo "๐Ÿ•’ ไฟฎๆ”นๆ—ถ้—ด: $(stat -c '%y' "$LOAD_CKPT" | cut -d'.' -f1)" else LOAD_FROM="" LOAD_CKPT="" echo "โŒ ๆœชๆ‰พๅˆฐcheckpointๆ–‡ไปถ: $LOAD_CKPT" echo "โš ๏ธ ๅฐ†ไปŽๅคดๅผ€ๅง‹่ฎญ็ปƒ" fi # ๅฏๅŠจ่ฎญ็ปƒ echo "" echo "๐ŸŽฏ Starting training..." nohup torchpack dist-run \ -np 8 \ /opt/conda/bin/python tools/train.py \ configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_BEV2X_phase4b_rmtppad_segmentation.yaml \ --load_from "$LOAD_CKPT" \ --data.samples_per_gpu 1 \ --data.workers_per_gpu 0 \ > "$WORK_DIR/train.log" 2>&1 & ่Žทๅ–่ฟ›็จ‹ID TRAIN_PID=$! echo "" echo "โœ… Phase 4B training completed!" echo "๐Ÿ“Š Check results in: $WORK_DIR" echo "๐Ÿ“ˆ Monitor progress: tail -f $WORK_DIR/train.log" echo "" echo "โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•" echo "โœ… ่ฎญ็ปƒๅทฒๅœจๅŽๅฐๅฏๅŠจ๏ผ" echo "โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•" echo "" echo "่ฟ›็จ‹ID: $TRAIN_PID" echo "ๆ—ฅๅฟ—ๆ–‡ไปถ: $WORK_DIR/train.log" echo "" echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" echo "็›‘ๆŽงๅ‘ฝไปค:" echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”" echo "" echo "1. ๆŸฅ็œ‹ๅฎžๆ—ถๆ—ฅๅฟ—:" echo " tail -f $WORK_DIR/train.log" echo "" echo "2. ๆŸฅ็œ‹ๅ…ณ้”ฎๆŒ‡ๆ ‡:" echo " tail -f $WORK_DIR/train.log | grep -E 'Epoch|loss/map/divider|loss/object'" echo "" echo "3. ๆฃ€ๆŸฅ่ฟ›็จ‹็Šถๆ€:" echo " ps aux | grep $TRAIN_PID" echo "" echo "4. GPU็›‘ๆŽง:" echo " nvidia-smi -l 5" echo "" echo "5. ๅœๆญข่ฎญ็ปƒ:" echo " kill $TRAIN_PID" echo "" echo "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”"