Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improve osmosis probes #825

Merged
merged 3 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go/coinstacks/osmosis/daemon/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set -e
start() {
MONIKER=unchained \
CHAIN_JSON=https://raw.githubusercontent.com/cosmos/chain-registry/master/osmosis/chain.json \
SNAPSHOT_QUICKSYNC=https://dl2.quicksync.io/json/osmosis.json \
P2P_POLKACHU=true \
run.sh osmosisd start \
--rpc.laddr tcp://0.0.0.0:26657 \
Expand Down
23 changes: 23 additions & 0 deletions go/coinstacks/osmosis/daemon/liveness.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash

FILE=/root/.osmosisd/.latest_block_height

STATUS=$(curl -sf http://localhost:26657/status) || exit 1

LATEST_BLOCK_HEIGHT=$(echo $STATUS | jq -r '.result.sync_info.latest_block_height')

if [[ ! -f "$FILE" ]]; then
echo $LATEST_BLOCK_HEIGHT > $FILE
exit 1
fi

PREV_LATEST_BLOCK_HEIGHT=$(cat $FILE)
echo $LATEST_BLOCK_HEIGHT > $FILE

if [[ $LATEST_BLOCK_HEIGHT -gt $PREV_LATEST_BLOCK_HEIGHT ]]; then
exit 0
fi

echo "node is stalled..."

exit 1
59 changes: 55 additions & 4 deletions go/coinstacks/osmosis/daemon/readiness.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,61 @@
#!/bin/bash

SYNCING=$(curl -sf http://localhost:1317/syncing | jq -r .syncing)
BLOCK_HEIGHT_BUFFER=5

if [[ $SYNCING == "false" ]]; then
echo "node is synced"
exit 0
SYNCING=$(curl -sf http://localhost:1317/syncing) || exit 1
NET_INFO=$(curl -sf http://localhost:26657/net_info) || exit 1
STATUS=$(curl -sf http://localhost:26657/status) || exit 1

IS_SYNCING=$(echo $SYNCING | jq -r .syncing)
CATCHING_UP=$(echo $STATUS | jq -r '.result.sync_info.catching_up')
NUM_PEERS=$(echo $NET_INFO | jq -r '.result.n_peers')

get_best_block_height() {
local best_block_height=0

for reference_url in "$@"; do
local status=$(curl -sf $reference_url)

if [[ $status != "" ]]; then
local latest_block_height=$(echo $status | jq -r '.result.sync_info.latest_block_height')

if (( latest_block_height > best_block_height )); then
best_block_height=$latest_block_height
fi
fi
done

echo $best_block_height
}

reference_validation() {
BEST_BLOCK_HEIGHT=$(get_best_block_height https://rpc.osmosis.zone/status https://rpc-osmosis.keplr.app/status)
LATEST_BLOCK_HEIGHT=$(echo $STATUS | jq -r '.result.sync_info.latest_block_height')

if (( BEST_BLOCK_HEIGHT > 0 )); then
BEST_BLOCK_HEIGHT_WITH_BUFFER=$(( BEST_BLOCK_HEIGHT - BLOCK_HEIGHT_BUFFER ))

if (( LATEST_BLOCK_HEIGHT >= BEST_BLOCK_HEIGHT_WITH_BUFFER )); then
echo "node is synced with $NUM_PEERS and within safe buffer of reference node"
exit 0
fi

echo "node is synced with $NUM_PEERS peers, but not within safe buffer of reference node"
exit 1
fi
}

if [[ $IS_SYNCING == false && $CATCHING_UP == false ]]; then
if (( $NUM_PEERS > 0 )); then
# if node is reporting synced, double check against reference nodes
reference_validation

echo "node is synced with $NUM_PEERS peers"
exit 0
fi

echo "node is synced, but has no peers"
exit 1
fi

echo "node is still syncing"
Expand Down
7 changes: 7 additions & 0 deletions go/coinstacks/osmosis/pulumi/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ export = async (): Promise<Outputs> => {
'daemon-api': { port: 1317, pathPrefix: '/lcd', stripPathPrefix: true },
'daemon-rpc': { port: 26657, pathPrefix: '/rpc', stripPathPrefix: true },
},
startupProbe: {
httpGet: { path: '/status', port: 26657 },
periodSeconds: 30,
failureThreshold: 60,
timeoutSeconds: 10,
},
livenessProbe: { periodSeconds: 30, timeoutSeconds: 10 },
readinessProbe: { periodSeconds: 30, failureThreshold: 10 },
}
default:
Expand Down