-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathnode-auto-restart.sh
159 lines (138 loc) · 5.97 KB
/
node-auto-restart.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/bin/bash
#restart commands
function restartNode(){
echo "检测到卡顿超时!重启节点!" #可替换为各种告警脚本命令
phala stop node #停止节点命令,取决于用户部署环境
phala start #启动节点命令,取决于用户部署环境
}
#update commands
function updateNode() {
echo "重启多次无效!更新节点!" #可替换为各种告警脚本命令
phala stop node #停止节点命令,取决于用户部署环境
docker image rm phalanetwork/khala-node #移除旧 node 镜像
docker pull phalanetwork/khala-node #拉取新 node 镜像
phala start #启动节点命令,取决于用户部署环境
}
#check if synced
function isSynced(){
if [ -z $1 ]; then
echo "未启动"
elif [ -n $1 -o $1 = "false" ]; then
echo "\E[1;32m已同步\E[0m"
else
echo "同步中"
fi
}
#need sudo
if [ $(id -u) -ne 0 ]; then
echo "请使用sudo运行!"
exit 1
fi
#need jq
if ! type jq > /dev/null; then
apt-get install -y jq
fi
#var
node_ip="127.0.0.1"
khala_block_last_check=0
kusama_block_last_check=0
node_stuck_count=0
restart_count=0
#reads var
read -p "检测区块未增加几分钟后重启? (直接回车默认5分)" stuck_times
if [ -z $stuck_times ]; then stuck_times=5; fi
read -p "重启几次后未解决,更新节点? (直接回车默认3次)" restart_times
if [ -z $restart_times ]; then restart_times=3; fi
while true; do
#get_node_version
node_system_version=$(curl -sH "Content-Type: application/json" -d '{"id":1, "jsonrpc":"2.0", "method": "system_version", "params":[]}' http://${node_ip}:9933 | jq '.result' | tr -d '"' | cut -d'-' -f1)
if [ -z $node_system_version ]; then node_system_version="节点未响应"; fi
#get_khala_info
node_khala_system_health=$(curl -sH "Content-Type: application/json" -d '{"id":1, "jsonrpc":"2.0", "method": "system_health", "params":[]}' http://${node_ip}:9933 | jq '.result')
node_khala_system_health_isSyncing=$(echo $node_khala_system_health | jq '.isSyncing')
node_khala_system_health_peers=$(echo $node_khala_system_health | jq '.peers')
node_khala_system_syncState=$(curl -sH "Content-Type: application/json" -d '{"id":1, "jsonrpc":"2.0", "method": "system_syncState", "params":[]}' http://${node_ip}:9933 | jq '.result')
node_khala_system_syncState_currentBlock=$(echo $node_khala_system_syncState | jq '.currentBlock')
node_khala_system_syncState_highestBlock=$(echo $node_khala_system_syncState | jq '.highestBlock')
node_khala_synced=$(isSynced $node_khala_system_health_isSyncing)
#get_kusama_info
node_kusama_system_health=$(curl -sH "Content-Type: application/json" -d '{"id":1, "jsonrpc":"2.0", "method": "system_health", "params":[]}' http://${node_ip}:9934 | jq '.result')
node_kusama_system_health_isSyncing=$(echo $node_kusama_system_health | jq '.isSyncing')
node_kusama_system_health_peers=$(echo $node_kusama_system_health | jq '.peers')
node_kusama_system_syncState=$(curl -sH "Content-Type: application/json" -d '{"id":1, "jsonrpc":"2.0", "method": "system_syncState", "params":[]}' http://${node_ip}:9934 | jq '.result')
node_kusama_system_syncState_currentBlock=$(echo $node_kusama_system_syncState | jq '.currentBlock')
node_kusama_system_syncState_highestBlock=$(echo $node_kusama_system_syncState | jq '.highestBlock')
node_kusama_synced=$(isSynced $node_kusama_system_health_isSyncing)
#get node ip length
node_ip_length=${#node_ip}
hyphen=""
for i in `seq 0 $node_ip_length`; do hyphen="-$hyphen"; done
#print info
printf "
--$hyphen--
$node_ip |
----------------------------------------------------------------------
节点版本 | khala节点 | 当前高度 | 最高高度 | 对等点数量 |
----------------------------------------------------------------------
%-8s | $node_khala_synced | %-10s | %-10s | %-10s |
----------------------------------------------------------------------
| ksm节点 | 当前高度 | 最高高度 | 对等点数量 |
----------------------------------------------------------------------
| $node_kusama_synced | %-10s | %-10s | %-10s |
----------------------------------------------------------------------" $node_system_version $node_khala_system_syncState_currentBlock $node_khala_system_syncState_highestBlock $node_khala_system_health_peers $node_kusama_system_syncState_currentBlock $node_kusama_system_syncState_highestBlock $node_kusama_system_health_peers
#if getting info fails
if [ -z ${node_khala_system_syncState_currentBlock} ]; then
node_khala_system_syncState_currentBlock=1
khala_block_last_check=0
fi
if [ -z ${node_kusama_system_syncState_currentBlock} ]; then
node_kusama_system_syncState_currentBlock=1
kusama_block_last_check=0
fi
#compare block value
khala_diff=`expr $node_khala_system_syncState_currentBlock - $khala_block_last_check`
kusama_diff=`expr $node_kusama_system_syncState_currentBlock - $kusama_block_last_check`
#save last check value
khala_block_last_check=$node_khala_system_syncState_currentBlock
kusama_block_last_check=$node_kusama_system_syncState_currentBlock
#if stuck, increase node_stuck_count
if [ $khala_diff -lt 1 -o $kusama_diff -lt 1 ]; then
node_stuck_count=`expr $node_stuck_count + 1`
else
node_stuck_count=0
restart_count=0
fi
printf "
---------------------------------
卡顿计数 | $node_stuck_count | 重启计数 | $restart_count |
"
#if stuck too long, restart node
if [ $node_stuck_count -ge $stuck_times ]; then
restartNode
restart_count=`expr $restart_count + 1`
node_stuck_count=0
#waiting 5 mins for node fully restarted
for i in `seq 300 -1 1`
do
echo -ne "--- ${i}s 等待重启完成 ---\r"
sleep 1
done
fi
#if restart not work, try update node
if [ $restart_count -ge $restart_times ]; then
updateNode
restart_count=0
#waiting 5 mins for node fully restarted
for i in `seq 300 -1 1`
do
echo -ne "--- ${i}s 等待重启完成 ---\r"
sleep 1
done
fi
#check every 60s
for i in `seq 60 -1 1`
do
echo -ne "--- ${i}s 刷新 ---\r"
sleep 1
done
done