#!/bin/bash # 一键执行剪枝+微调流程 set -e cd /workspace/bevfusion echo "========================================================================" echo "BEVFusion 模型剪枝+微调 完整流程" echo "========================================================================" echo "" echo "Baseline: Epoch 23" echo " - 参数量: 45.72M" echo " - NDS: 0.6941, mAP: 0.6446, mIoU: 0.4130" echo "" echo "目标: 剪枝30% → 32M参数" echo "预期: 精度损失<2%" echo "" # 配置 CHECKPOINT="runs/enhanced_from_epoch19/epoch_23.pth" PRUNED_OUTPUT="pruning_results/bevfusion_pruned_32M.pth" TARGET_RATIO=0.70 # 保留70%参数(剪枝30%) TIMESTAMP=$(date +%Y%m%d_%H%M%S) FINETUNE_DIR="runs/pruned_finetune_${TIMESTAMP}" # 检查checkpoint if [ ! -f "$CHECKPOINT" ]; then echo "错误: Checkpoint不存在: $CHECKPOINT" exit 1 fi # 创建输出目录 mkdir -p pruning_results mkdir -p "$FINETUNE_DIR" echo "========================================================================" echo "阶段1: 模型剪枝(预计15分钟)" echo "========================================================================" echo "" # 执行剪枝 /opt/conda/bin/python tools/pruning/prune_bevfusion_builtin.py \ --checkpoint "$CHECKPOINT" \ --output "$PRUNED_OUTPUT" \ --target-ratio $TARGET_RATIO \ 2>&1 | tee "pruning_results/pruning_log_${TIMESTAMP}.txt" if [ $? -ne 0 ]; then echo "错误: 剪枝失败" exit 1 fi echo "" echo "✅ 剪枝完成!" echo " 输出: $PRUNED_OUTPUT" echo "" # 询问是否继续微调 echo "========================================================================" echo "阶段2: 微调训练(预计12-15小时)" echo "========================================================================" echo "" echo "是否立即开始微调训练?" echo "" echo "微调配置:" echo " - Epochs: 3" echo " - 学习率: 5e-6 (很小)" echo " - GPU: 8张" echo " - 预计时间: 12-15小时" echo "" echo "选项:" echo " [1] 立即开始微调(后台运行)" echo " [2] 稍后手动启动" echo " [3] 查看剪枝结果后再决定" echo "" read -p "请选择 [1/2/3]: " choice case $choice in 1) echo "" echo "启动微调训练..." CONFIG="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_enhanced_phase1_HIGHRES.yaml" # 后台启动微调 nohup /opt/conda/bin/torchpack dist-run -np 8 /opt/conda/bin/python tools/train.py \ "$CONFIG" \ --load_from "$PRUNED_OUTPUT" \ --run-dir "$FINETUNE_DIR" \ --cfg-options \ max_epochs=3 \ optimizer.lr=5.0e-6 \ data.samples_per_gpu=2 \ data.workers_per_gpu=0 \ 2>&1 | tee "${FINETUNE_DIR}/finetune.log" & FINETUNE_PID=$! echo $FINETUNE_PID > pruning_results/finetune.pid echo "" echo "✅ 微调训练已启动(后台运行)" echo " PID: $FINETUNE_PID" echo " 日志: ${FINETUNE_DIR}/finetune.log" echo "" echo "监控命令:" echo " tail -f ${FINETUNE_DIR}/finetune.log | grep 'Epoch'" echo "" ;; 2) echo "" echo "稍后手动启动微调。" echo "" echo "启动命令:" echo " torchpack dist-run -np 8 python tools/train.py \\" echo " configs/.../multitask_enhanced_phase1_HIGHRES.yaml \\" echo " --load_from $PRUNED_OUTPUT \\" echo " --cfg-options max_epochs=3 optimizer.lr=5.0e-6" echo "" ;; 3) echo "" echo "查看剪枝结果:" echo " python tools/analysis/analyze_checkpoint.py $PRUNED_OUTPUT" echo "" echo "如果满意,再启动微调。" echo "" ;; *) echo "无效选择,退出。" exit 1 ;; esac echo "========================================================================" echo "剪枝流程完成" echo "========================================================================" echo "" echo "已生成文件:" echo " - $PRUNED_OUTPUT" echo " - pruning_results/pruning_log_${TIMESTAMP}.txt" echo "" echo "下一步:" echo " 1. 等待微调完成(如已启动)" echo " 2. 评估剪枝+微调后的模型" echo " 3. 进行INT8量化" echo ""