58 lines
1.3 KiB
Bash
Executable File
58 lines
1.3 KiB
Bash
Executable File
#!/bin/bash
|
||
# 停止当前训练进程
|
||
|
||
echo "=========================================="
|
||
echo "停止当前训练进程"
|
||
echo "=========================================="
|
||
echo ""
|
||
|
||
# 查找训练进程
|
||
echo "当前运行的训练进程:"
|
||
ps aux | grep "train.py.*multitask.yaml" | grep -v grep
|
||
|
||
echo ""
|
||
echo "准备停止进程..."
|
||
echo ""
|
||
|
||
# 找到mpirun主进程
|
||
MPIRUN_PID=$(ps aux | grep "mpirun.*train.py" | grep -v grep | awk '{print $2}' | head -1)
|
||
|
||
if [ -z "$MPIRUN_PID" ]; then
|
||
echo "未找到运行中的训练进程"
|
||
exit 0
|
||
fi
|
||
|
||
echo "找到主进程 PID: $MPIRUN_PID"
|
||
echo ""
|
||
echo "发送SIGTERM信号(优雅停止)..."
|
||
|
||
# 优雅停止
|
||
kill -TERM $MPIRUN_PID
|
||
|
||
# 等待10秒
|
||
echo "等待进程退出..."
|
||
sleep 10
|
||
|
||
# 检查是否还在运行
|
||
if ps -p $MPIRUN_PID > /dev/null 2>&1; then
|
||
echo "进程仍在运行,发送SIGKILL信号(强制停止)..."
|
||
kill -9 $MPIRUN_PID
|
||
sleep 2
|
||
fi
|
||
|
||
# 清理所有相关进程
|
||
echo "清理所有Python训练进程..."
|
||
pkill -9 -f "train.py.*multitask.yaml"
|
||
|
||
echo ""
|
||
echo "训练进程已停止"
|
||
echo ""
|
||
echo "剩余进程检查:"
|
||
ps aux | grep "train.py" | grep -v grep || echo "无训练进程"
|
||
|
||
echo ""
|
||
echo "=========================================="
|
||
echo "完成!"
|
||
echo "=========================================="
|
||
|