bev-project/archive_scripts/一键剪枝和微调.sh

151 lines
4.3 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 一键执行剪枝+微调流程
set -e
cd /workspace/bevfusion
echo "========================================================================"
echo "BEVFusion 模型剪枝+微调 完整流程"
echo "========================================================================"
echo ""
echo "Baseline: Epoch 23"
echo " - 参数量: 45.72M"
echo " - NDS: 0.6941, mAP: 0.6446, mIoU: 0.4130"
echo ""
echo "目标: 剪枝30% → 32M参数"
echo "预期: 精度损失<2%"
echo ""
# 配置
CHECKPOINT="runs/enhanced_from_epoch19/epoch_23.pth"
PRUNED_OUTPUT="pruning_results/bevfusion_pruned_32M.pth"
TARGET_RATIO=0.70 # 保留70%参数剪枝30%
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
FINETUNE_DIR="runs/pruned_finetune_${TIMESTAMP}"
# 检查checkpoint
if [ ! -f "$CHECKPOINT" ]; then
echo "错误: Checkpoint不存在: $CHECKPOINT"
exit 1
fi
# 创建输出目录
mkdir -p pruning_results
mkdir -p "$FINETUNE_DIR"
echo "========================================================================"
echo "阶段1: 模型剪枝预计15分钟"
echo "========================================================================"
echo ""
# 执行剪枝
/opt/conda/bin/python tools/pruning/prune_bevfusion_builtin.py \
--checkpoint "$CHECKPOINT" \
--output "$PRUNED_OUTPUT" \
--target-ratio $TARGET_RATIO \
2>&1 | tee "pruning_results/pruning_log_${TIMESTAMP}.txt"
if [ $? -ne 0 ]; then
echo "错误: 剪枝失败"
exit 1
fi
echo ""
echo "✅ 剪枝完成!"
echo " 输出: $PRUNED_OUTPUT"
echo ""
# 询问是否继续微调
echo "========================================================================"
echo "阶段2: 微调训练预计12-15小时"
echo "========================================================================"
echo ""
echo "是否立即开始微调训练?"
echo ""
echo "微调配置:"
echo " - Epochs: 3"
echo " - 学习率: 5e-6 (很小)"
echo " - GPU: 8张"
echo " - 预计时间: 12-15小时"
echo ""
echo "选项:"
echo " [1] 立即开始微调(后台运行)"
echo " [2] 稍后手动启动"
echo " [3] 查看剪枝结果后再决定"
echo ""
read -p "请选择 [1/2/3]: " choice
case $choice in
1)
echo ""
echo "启动微调训练..."
CONFIG="configs/nuscenes/det/transfusion/secfpn/camera+lidar/swint_v0p075/multitask_enhanced_phase1_HIGHRES.yaml"
# 后台启动微调
nohup /opt/conda/bin/torchpack dist-run -np 8 /opt/conda/bin/python tools/train.py \
"$CONFIG" \
--load_from "$PRUNED_OUTPUT" \
--run-dir "$FINETUNE_DIR" \
--cfg-options \
max_epochs=3 \
optimizer.lr=5.0e-6 \
data.samples_per_gpu=2 \
data.workers_per_gpu=0 \
2>&1 | tee "${FINETUNE_DIR}/finetune.log" &
FINETUNE_PID=$!
echo $FINETUNE_PID > pruning_results/finetune.pid
echo ""
echo "✅ 微调训练已启动(后台运行)"
echo " PID: $FINETUNE_PID"
echo " 日志: ${FINETUNE_DIR}/finetune.log"
echo ""
echo "监控命令:"
echo " tail -f ${FINETUNE_DIR}/finetune.log | grep 'Epoch'"
echo ""
;;
2)
echo ""
echo "稍后手动启动微调。"
echo ""
echo "启动命令:"
echo " torchpack dist-run -np 8 python tools/train.py \\"
echo " configs/.../multitask_enhanced_phase1_HIGHRES.yaml \\"
echo " --load_from $PRUNED_OUTPUT \\"
echo " --cfg-options max_epochs=3 optimizer.lr=5.0e-6"
echo ""
;;
3)
echo ""
echo "查看剪枝结果:"
echo " python tools/analysis/analyze_checkpoint.py $PRUNED_OUTPUT"
echo ""
echo "如果满意,再启动微调。"
echo ""
;;
*)
echo "无效选择,退出。"
exit 1
;;
esac
echo "========================================================================"
echo "剪枝流程完成"
echo "========================================================================"
echo ""
echo "已生成文件:"
echo " - $PRUNED_OUTPUT"
echo " - pruning_results/pruning_log_${TIMESTAMP}.txt"
echo ""
echo "下一步:"
echo " 1. 等待微调完成(如已启动)"
echo " 2. 评估剪枝+微调后的模型"
echo " 3. 进行INT8量化"
echo ""