Skip to content

Commit

Permalink
Merge pull request #135 from luck3y/CLOUD-3348
Browse files Browse the repository at this point in the history
[CLOUD-3349] - openshift-migrate.sh terminates with Max retries exceeded with url: /management
  • Loading branch information
luck3y authored Aug 20, 2019
2 parents d2715aa + f0fff5f commit ca79935
Showing 1 changed file with 39 additions and 23 deletions.
62 changes: 39 additions & 23 deletions os-eap-migration/added/launch/openshift-migrate-common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ source /opt/partition/partitionPV.sh

function runMigration() {
local instanceDir=$1

local COUNT=30
local SLEEP=5
# if count provided the node_name should be constructed
local count=$2
[ "x$count" != "x" ] && export NODE_NAME="${NODE_NAME:-node}-${count}"
Expand All @@ -30,31 +31,46 @@ function runMigration() {
trap "echo Received TERM ; touch \"${terminatingFile}\" ; kill -TERM $PID ; " TERM
local success=false
local message="Finished, migration pod has been terminated"
${JBOSS_HOME}/bin/readinessProbe.sh
local probeStatus=$?
local probeStatus=0

if [ $probeStatus -eq 0 ] ; then
echo "$(date): Server started, checking for transactions"

local startTime=$(date +'%s')
local endTime=$((startTime + ${RECOVERY_TIMEOUT} + 1))

local socketBinding=$(run_cli_cmd '/subsystem=transactions/:read-attribute(name="socket-binding")' | grep -w result | sed -e 's+^.*=> "++' -e 's+".*$++')
local recoveryPort=$(run_cli_cmd '/socket-binding-group=standard-sockets/socket-binding='"${socketBinding}"'/:read-attribute(name="bound-port")' | grep -w result | sed -e 's+^.*=> ++')
local recoveryHost=$(run_cli_cmd '/socket-binding-group=standard-sockets/socket-binding='"${socketBinding}"'/:read-attribute(name="bound-address")' | grep -w result | sed -e 's+^.*=> "++' -e 's+".*$++')

if [ "${recoveryPort}" != "undefined" ] ; then
local recoveryClass="com.arjuna.ats.arjuna.tools.RecoveryMonitor"
recoveryJar=$(find "${JBOSS_HOME}" -name \*.jar | xargs grep -l "${recoveryClass}")
if [ -n "${recoveryJar}" ] ; then
echo "$(date): Executing synchronous recovery scan for a first time"
java -cp "${recoveryJar}" "${recoveryClass}" -host "${recoveryHost}" -port "${recoveryPort}" -timeout 1800000
echo "$(date): Executing synchronous recovery scan for a second time"
java -cp "${recoveryJar}" "${recoveryClass}" -host "${recoveryHost}" -port "${recoveryPort}" -timeout 1800000
echo "$(date): Synchronous recovery scans finished for the first and the second time"
# this sleeps for 10s before the first probe, to emulate the old behavior
sleep 10

for i in `seq ${COUNT}`
do
echo "Checking readiness probe status for server start."
${JBOSS_HOME}/bin/readinessProbe.sh
probeStatus=$?
if [ $probeStatus -eq 0 ] ; then
echo "$(date): Server started, checking for transactions"

local startTime=$(date +'%s')
local endTime=$((startTime + ${RECOVERY_TIMEOUT} + 1))

local socketBinding=$(run_cli_cmd '/subsystem=transactions/:read-attribute(name="socket-binding")' | grep -w result | sed -e 's+^.*=> "++' -e 's+".*$++')
local recoveryPort=$(run_cli_cmd '/socket-binding-group=standard-sockets/socket-binding='"${socketBinding}"'/:read-attribute(name="bound-port")' | grep -w result | sed -e 's+^.*=> ++')
local recoveryHost=$(run_cli_cmd '/socket-binding-group=standard-sockets/socket-binding='"${socketBinding}"'/:read-attribute(name="bound-address")' | grep -w result | sed -e 's+^.*=> "++' -e 's+".*$++')

if [ "${recoveryPort}" != "undefined" ] ; then
local recoveryClass="com.arjuna.ats.arjuna.tools.RecoveryMonitor"
# we may have > 1 jar, if that is the case we use the most recent one
recoveryJars=$(find "${JBOSS_HOME}" -name \*.jar | xargs grep -l "${recoveryClass}")
recoveryJar=$(ls -Art $recoveryJars | tail -n 1)
if [ -n "${recoveryJar}" ] ; then
echo "$(date): Executing synchronous recovery scan for a first time"
java -cp "${recoveryJar}" "${recoveryClass}" -host "${recoveryHost}" -port "${recoveryPort}" -timeout 1800000
echo "$(date): Executing synchronous recovery scan for a second time"
java -cp "${recoveryJar}" "${recoveryClass}" -host "${recoveryHost}" -port "${recoveryPort}" -timeout 1800000
echo "$(date): Synchronous recovery scans finished for the first and the second time"
fi
fi
# probe was successful, exit loop
break
else
echo "Sleeping ${SLEEP} seconds before retrying readiness probe."
sleep ${SLEEP}
fi
fi
done

# -- checking if the pod log is clean from errors (only if function of the particular name exists, provided by the os-partition module)
if [ $probeStatus -eq 0 ] && [ "$(type -t probePodLogForRecoveryErrors)" = 'function' ]; then
Expand Down

0 comments on commit ca79935

Please sign in to comment.