Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

imp: healthchecks #398

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

Tardo
Copy link
Contributor

@Tardo Tardo commented Jul 28, 2023

useful when using monitors

@Tardo Tardo marked this pull request as ready for review July 28, 2023 22:58
@Tardo Tardo force-pushed the imp-healthcheck branch from 44e159e to edfd8c2 Compare March 4, 2024 17:59
@pedrobaeza
Copy link
Member

Please check CI

@Tardo Tardo force-pushed the imp-healthcheck branch from edfd8c2 to 4a1df42 Compare May 3, 2024 15:18
@Tardo Tardo force-pushed the imp-healthcheck branch from 4a1df42 to 743fb80 Compare May 3, 2024 15:23
@josep-tecnativa
Copy link
Contributor

@yajo could you please check why is test_routring failing?

@yajo
Copy link
Contributor

yajo commented May 8, 2024

Probably because Traefik is refusing to route to a container that isn't healthy, which could mean that the healthcheck is wrong.

@ap-wtioit
Copy link
Contributor

ap-wtioit commented May 8, 2024

If you want we have a healthcheck script that's has been improved over the years on our systems:

#!/usr/bin/env bash
# check that odoo
# * bin can be found
# * if not just an odoo-click- script is running
#   and not just tests are running
#   * has process(es) running
#   *  if http_enabled
#     * has http ports open
#     * if multithreaded
#       * check that longpolling port is open
#       * check that cron threads are running
#       * check that http workers are available
#       * check that not all threads are occupied
# * db connection is working

if [[ $1 == '-v' ]]; then
    OUTPUT=""
    set -x
else
    OUTPUT=" >/dev/null"
fi

errors=0
skipped=()
ODOO_CMD=$(which odoo)
# store cookies in a file that is persisted through restarts
CURL_COOKIE_JAR="/var/lib/odoo/${PGDATABASE}-healthcheck-cookies.txt"
if [[ ! -e "$CURL_COOKIE_JAR" ]]; then
    touch "$CURL_COOKIE_JAR"
fi
ONLY_CLICK_ODOO_RUNNING="false"
ONLY_TEST_RUNNING="false"

function is_skipped() {
    local skipped_item
    for skipped_item in "${skipped[@]}"; do
        if [[ $skipped_item == "$*" ]]; then
            return 0
        fi
    done
    return 1
}

function check() {
    local status errors_at_start new_errors
    errors_at_start=$errors
    "$@"
    status=$?
    errors=$((errors + status))
    new_errors=$((errors - errors_at_start))
    if [[ $status == 0 && $new_errors != 0 ]]; then
        # errors triggered in subchecks also should affect our status
        status=$new_errors
    fi
    if [[ $# == 1 ]]; then
        if [[ $status != 0 ]]; then
            echo "$* FAILED"
        elif is_skipped "$*" ; then
            echo "$* SKIPPED"
        else
            echo "$* OK"
        fi
    fi

    return $status
}

function get_odoo_value() {
    local argument config argument_value config_value value default_value

    argument=$1
    config=${2:-$argument}
    default_value=${3:-}

    argument_value=$(ps fax 2>/dev/null | grep -v grep | grep "$ODOO_CMD" | grep -- '--'"$argument"'\([= ][0-9]\|$\| --\)' | sed 's/.*--'"$argument"'[= ]\([0-9]\+\).*/\1/' | sed 's/.*\(--'"$argument"'\)\($\| --.*\)/\1/' | sort | uniq)
    config_value=$(grep -R '^'"$config"' = \([0-9]\|true\|false\)' /opt/odoo/auto/odoo.conf | sed 's/'"$config"' = \([0-9]\+\|true\|false\).*/\1/')
    value=${argument_value:-$config_value}

    echo "${value:-$default_value}"
}

function get_odoo_workers() {
    get_odoo_value workers
}

function get_odoo_cron_threads() {
    get_odoo_value max-cron-threads max_cron_threads 2
}

function get_odoo_http_enabled() {
    local no_xmlrpc no_http
    no_xmlrpc=$(get_odoo_value no-xmlrpc)
    no_http=$(get_odoo_value no-http)
    http_enabled=$(get_odoo_value - http_enabled true)
    if [[ $no_xmlrpc || $no_http ]] ; then
        echo "false"
    else
        echo "$http_enabled"
    fi
}

function odoo_bin_exists() {
    check test "$ODOO_CMD" != ""
    check test -x "$ODOO_CMD"
}

function odoo_run_detection() {
    # if click script is running
    if ps fax 2>/dev/null | grep -v grep | grep "[ /]click-odoo-" >/dev/null; then
        # and odoo is not running
        if ! ps fax 2>/dev/null | grep -v grep | grep "$ODOO_CMD" | grep -v "odoo.* shell\( \|$\)" >/dev/null ; then
            # we set the marker that only click odoo is running
            ONLY_CLICK_ODOO_RUNNING="true"
            echo "Only click-odoo-.* seems to be running"
        fi
    fi
    # if test is running
    if ps fax 2>/dev/null | grep -v grep | grep -E '[ /](run_tests|_jb_pytest_runner.py|pytest)' >/dev/null; then
        # and odoo is not running
        if ! ps fax 2>/dev/null | grep -v grep | grep "$ODOO_CMD" | grep -v "odoo.* --test-enable\( \|$\)" >/dev/null ; then
            ONLY_TEST_RUNNING="true"
            echo "Only test seems to be running"
        fi
    fi
}

function odoo_processes() {
    if [[ $ONLY_CLICK_ODOO_RUNNING == "false" && $ONLY_TEST_RUNNING == "false" ]]; then
        # checking for main odoo process
        check bash -c 'ps fax 2>/dev/null | grep '"$ODOO_CMD"' | grep -v grep | grep -v "odoo.* shell\( \|$\)"'"$OUTPUT"
        # detect if odoo is running with workers
        if [[ $(get_odoo_workers) -gt 0 ]]; then
            check odoo_workers
        fi
    else
        skipped+=("odoo_processes")
    fi
}

function odoo_listening_ports() {
    if [[ $(get_odoo_http_enabled) == "true" && $ONLY_CLICK_ODOO_RUNNING == "false" && $ONLY_TEST_RUNNING == "false" ]] ; then
        # checking that odoo port is listening
        check bash -c 'netstat -lnp --tcp | grep :8069'"$OUTPUT"
        if [[ $(get_odoo_workers) -gt 0 ]]; then
            # check that odoo is listening on 8072 for longpolling
            check bash -c 'netstat -lnp --tcp | grep :8072'"$OUTPUT"
        fi
    else
        skipped+=("odoo_listening_ports")
    fi
}

function odoo_workers() {
    local workers
    local cron_workers
    local gevent_threads
    local expected_processes

    # verify that there is enough odoo/worker processes
    gevent_threads=1
    workers=$(get_odoo_workers)
    cron_workers=$(get_odoo_cron_threads)
    expected_processes=$((1 + workers + cron_workers + gevent_threads))
    # shellcheck disable=SC2016
    check bash -c '[[ $(ps fax 2>/dev/null | grep -v grep | grep -v "odoo.* shell\( \|$\)" | grep -c '"$ODOO_CMD"' 2>/dev/null) == '"$expected_processes"' ]]'
}

function odoo_connectivity() {
    if [[ $(get_odoo_http_enabled) == "true" && $ONLY_CLICK_ODOO_RUNNING == "false" && $ONLY_TEST_RUNNING == "false" ]] ; then
        check bash -c 'curl --cookie-jar "'"$CURL_COOKIE_JAR"'" --cookie "'"$CURL_COOKIE_JAR"'" --silent --show-error --fail 127.0.0.1:8069'"$OUTPUT"
        # detect if odoo is running with workers
        if [[ $(get_odoo_workers) -gt 0 ]]; then
            check odoo_connectivity_longpolling
        fi
    else
        skipped+=("odoo_connectivity")
    fi
}

function odoo_connectivity_longpolling() {
    if [[ ${ODOO_VERSION%.*} -ge 16 ]] ; then
        # /websocket/health was introduced in Odoo version 16.0
        check bash -c 'curl --silent --show-error --fail "http://127.0.0.1:8072/websocket/health"'"$OUTPUT"
    else
        check bash -c 'curl --cookie-jar "'"$CURL_COOKIE_JAR"'" --cookie "'"$CURL_COOKIE_JAR"'" --silent --show-error --fail 127.0.0.1:8072'"$OUTPUT"
    fi
}

function postgres_connectivity() {
    check bash -c 'timeout 1s psql -l'"$OUTPUT"
}

function odoo_installed() {
    if [[ $(get_odoo_http_enabled) == "true" && $ONLY_CLICK_ODOO_RUNNING == "false" && $ONLY_TEST_RUNNING == "false" ]] ; then
        # we add the db to the login url to make sure we do not get redirected upon successful installation of odoo
        # (/web/login should display the login mask)
        check bash -c 'curl --cookie-jar "'"$CURL_COOKIE_JAR"'" --cookie "'"$CURL_COOKIE_JAR"'" --silent --dump-header - --show-error --fail "http://127.0.0.1:8069/web/login" | grep "HTTP/1.[10] 200 OK"'"$OUTPUT"
        check bash -c 'timeout 1s psql -l | grep " '"$PGDATABASE"' "'"$OUTPUT"
    elif [[ $ONLY_TEST_RUNNING == "true" ]] ; then
        # we check only the databse when tests are running
        check bash -c 'timeout 1s psql -l | grep " '"$PGDATABASE"' "'"$OUTPUT"
    else
        skipped+=("odoo_installed")
    fi
}

check odoo_bin_exists
odoo_run_detection
check odoo_processes
check odoo_listening_ports
check odoo_connectivity
check postgres_connectivity
check odoo_installed

[[ $errors == 0 ]]
exit $?

and also a test method for that healthcheck

    def test_healthcheck(self):
        healthcheck_dir = join(WTIOIT_SCAFFOLDINGS_DIR, "healthcheck")
        for sub_env in matrix(odoo_skip={"8.0"}):
            self.compose_test(
                healthcheck_dir,
                sub_env,
                # verify that healthcheck fails if odoo is not running
                (
                    "bash",
                    "-c",
                    "if healthcheck; then echo 'healthcheck is healthy without odoo started' 1>&2; exit 1; fi",
                ),
                # verify that healthcheck is working and becomes healthy when odoo is starting
                (
                    "timeout",
                    "120s",
                    "bash",
                    "-c",
                    "odoo & until healthcheck; do sleep 3; done;",
                ),
                # verify that healthcheck is working with DB_FILTER .*
                (
                    "timeout",
                    "30s",
                    "bash",
                    "-c",
                    "DB_FILTER='.*' odoo & until healthcheck; do sleep 3; done;",
                ),
                # verify that healthcheck is working with workers
                (
                    "timeout",
                    "30s",
                    "bash",
                    "-c",
                    "odoo --workers 2 & until healthcheck; do sleep 3; done",
                ),
                # verify that healthcheck is working with workers and DB_FILTER .*
                (
                    "timeout",
                    "30s",
                    "bash",
                    "-c",
                    "DB_FILTER='.*' odoo --workers 2 & until healthcheck; do sleep 3; done",
                ),
                # verify that healthcheck is working if odoo shell is launched
                (
                    "timeout",
                    "30s",
                    "bash",
                    "-c",
                    "odoo --workers 2 & odoo shell & until healthcheck; do sleep 3; done",
                ),
                # verify that healthcheck works when disabling http (and http checks are skipped)
                (
                    "timeout",
                    "30s",
                    "bash",
                    "-c",
                    "odoo --no-http & until healthcheck; do sleep 3; done && "
                    "healthcheck | grep 'odoo_installed SKIPPED' && "
                    "healthcheck | grep 'odoo_listening_ports SKIPPED' && "
                    "healthcheck | grep 'odoo_connectivity SKIPPED'",
                ),
                # verify that http checks are done when running odoo normally with http
                (
                    "timeout",
                    "30s",
                    "bash",
                    "-c",
                    "odoo & until healthcheck; do sleep 3; done && "
                    "healthcheck | grep 'odoo_installed OK' && "
                    "healthcheck | grep 'odoo_listening_ports OK' && "
                    "healthcheck | grep 'odoo_connectivity OK' && "
                    "healthcheck | grep 'odoo_processes OK'",
                ),
                # verify that healthcheck works with click-odoo scripts, and odoo process check is skipped
                (
                    "timeout",
                    "30s",
                    "bash",
                    "-c",
                    "click-odoo-update & until healthcheck; do sleep 3; done && "
                    "healthcheck | grep 'odoo_processes SKIPPED'"
                ),
                # verify that healthcheck uses cookie jar file
                ("test", "-e", "/var/lib/odoo/prod-healthcheck-cookies.txt"),
                # and stored a session_id cookie
                (
                    "grep",
                    "-R",
                    "session_id",
                    "/var/lib/odoo/prod-healthcheck-cookies.txt",
                ),
                # and after another healthcheck cookies should not change
                # but timestamps (column 5 in awk) do
                (
                    "cp",
                    "/var/lib/odoo/prod-healthcheck-cookies.txt",
                    "/var/lib/odoo/prod-healthcheck-cookies.txt.1",
                ),
                (
                    "timeout",
                    "30s",
                    "bash",
                    "-c",
                    "odoo & until healthcheck; do sleep 3; done",
                ),
                (
                    "bash",
                    "-c",
                    "diff"
                    " <(awk '{$5=\"\"; print $0}' < /var/lib/odoo/prod-healthcheck-cookies.txt)"
                    " <(awk '{$5=\"\"; print $0}' </var/lib/odoo/prod-healthcheck-cookies.txt.1)",
                ),
            )

@Tardo
Copy link
Contributor Author

Tardo commented May 8, 2024

@ap-wtioit thx!

P.S: I'm sorry I don't speak much, but my pronunciation is like killing a kitten.

@yajo
Copy link
Contributor

yajo commented May 9, 2024

FWIW since Odoo 15.0 there's the /web/health controller for this purpose.

@ap-wtioit
Copy link
Contributor

FWIW since Odoo 15.0 there's the /web/health controller for this purpose.

Unfortunately it doesn't report any useful information and still can only be queried if odoo is runnig with http enabled.

@zamberjo
Copy link

Hi @Tardo! Have you been able to make progress on this?

@Tardo
Copy link
Contributor Author

Tardo commented Jul 15, 2024

Hi @zamberjo, feel free to open a new PR :)
At the moment I don't have much time to dedicate to this... if you can do it, great!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants