diff --git a/replication/files/mongo-trib.py b/replication/files/mongo-trib.py index 97bbaa7..e7071d4 100755 --- a/replication/files/mongo-trib.py +++ b/replication/files/mongo-trib.py @@ -486,7 +486,7 @@ def get_ssh_cmd(self, ip, cmd): -o ConnectTimeout=5 -o ConnectionAttempts=3 root@%s \"%s\"" % (ip, cmd) def check_local_mongod(self): - cmd = 'lsof -i:%s' % self.port + cmd = 'nc -z -v -w10 127.0.0.1 %s' % self.port ret_code, _ = self.exec_cmd(cmd) if ret_code != 0: self.logger.error('no process listen at %s', self.port) diff --git a/replication/files/upgrade.sh b/replication/files/upgrade.sh new file mode 100755 index 0000000..dec175c --- /dev/null +++ b/replication/files/upgrade.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash + +set -eo pipefail + +readonly appctlLogDir=/data/appctl/logs +readonly appctlLogFile=$appctlLogDir/appctl.log + +initNode() { + mkdir -p $appctlLogDir +} + +log() { + echo "$@" >> $appctlLogFile +} + +retry() { + local tried=0 + local maxAttempts=$1 + local interval=$2 + local stopCodes=$3 + local cmd="${@:4}" + local retCode=0 + while [ $tried -lt $maxAttempts ]; do + $cmd && return 0 || { + retCode=$? + if [[ ",$stopCodes," == *",$retCode,"* ]]; then + log "'$cmd' returned with stop code '$retCode'. Stopping ..." + return $retCode + fi + } + sleep $interval + tried=$((tried+1)) + done + + log "'$cmd' still returned errors after $tried attempts. Stopping ..." + return $retCode +} + +toggleHealthCheck() { + local readonly path=/usr/local/etc/ignore_agent + if [ "$1" == "true" ]; then + rm -rf $path + else + touch $path + fi +} + +getMongoPort() { + awk '$1=="port:" {print $2}' /etc/mongod.conf +} + +runMongoCmd() { + local passwd="$(cat /data/pitrix.pwd)" + local port=$(getMongoPort) + local uri=mongodb://qc_master:$passwd@127.0.0.1:$port/admin + if [ "$1" = "--local" ]; then + shift + else + uri=$uri?replicaSet=foobar + fi + timeout --preserve-status 3 /opt/mongodb/bin/mongo --quiet $uri --eval "$@" +} + +readonly EC_NOT_READY=128 + +checkFullyStarted() { + local myIp=$(hostname -I | xargs) + local port=$(getMongoPort) + runMongoCmd "rs.status().members.filter(m => m.name == '$myIp:$port' && /(PRIMARY|SECONDARY)/.test(m.stateStr)).length == 1 || quit($EC_NOT_READY)" +} + +isMaster() { + runMongoCmd --local "db.isMaster().ismaster == true || quit(1)" +} + +readonly oldMongoVersion=3.4.5 +readonly newMongoVersion=3.4.17 + +proceed() { + initNode + if [ ! -d /opt/mongodb/$oldMongoVersion ]; then + log "backup old files ..." + mv /opt/mongodb /opt/$oldMongoVersion + mkdir /opt/mongodb + mv /opt/$oldMongoVersion /opt/mongodb/ + fi + log "copying new files ..." + rsync -aAX /upgrade/opt/mongodb/ /opt/mongodb/ + log "upgrading to $newMongoVersion ..." + ln -snf $newMongoVersion/bin /opt/mongodb/bin +} + +rollback() { + log "rolling back to $oldMongoVersion ..." + ln -snf $oldMongoVersion/bin /opt/mongodb/bin +} + +main() { + toggleHealthCheck false + + ${@:-proceed} + + if isMaster; then + log "leaving primary node as is old version, please manually restart it later." + else + log "restarting mongodb ..." + /opt/mongodb/bin/restart-mongod-server.sh + + log "waiting mongodb to be ready ..." + retry 1200 3 0 checkFullyStarted + fi + + toggleHealthCheck true +} + +main $@ diff --git a/replication/packages/cluster.json.mustache b/replication/packages/cluster.json.mustache index 9860381..c92951d 100644 --- a/replication/packages/cluster.json.mustache +++ b/replication/packages/cluster.json.mustache @@ -12,7 +12,7 @@ "container":{ "type":"kvm", "zone":"pek3a", - "image":"img-pwg90sjm" + "image":"img-i2o7whhf" }, "instance_class":{{cluster.replica.instance_class}}, "count":{{cluster.replica.count}}, diff --git a/replication/packages/locale/zh-cn.json b/replication/packages/locale/zh-cn.json index 1812aee..57c630a 100644 --- a/replication/packages/locale/zh-cn.json +++ b/replication/packages/locale/zh-cn.json @@ -1,4 +1,5 @@ { + "err_code128": "检测到节点未恢复到正常状态,为避免影响业务,我们已经中断了升级动作,如需协助请工单联系", "Name": "集群名称", "Description": "描述", "Nodes": "节点", diff --git a/replication/packages/patch.json b/replication/packages/patch.json new file mode 100644 index 0000000..2a63285 --- /dev/null +++ b/replication/packages/patch.json @@ -0,0 +1,23 @@ +{ + "patch_policy": [""], + "patch_nodes": [{ + "container": { + "snapshot": "ss-epbgrs13", + "zone": "pek3a" + }, + "patch": [{ + "mount_role": "replica", + "mount_point": "/upgrade", + "mount_options": "defaults,noatime", + "filesystem": "ext4", + "cmd": "/upgrade/opt/mongodb/bin/upgrade.sh" + }], + "rollback": [{ + "mount_role": "replica", + "mount_point": "/upgrade", + "mount_options": "defaults,noatime", + "filesystem": "ext4", + "cmd": "/upgrade/opt/mongodb/bin/upgrade.sh rollback" + }] + }] +}