From ca24e1ee86d866b2c5187bf033ae71f1ff5e6438 Mon Sep 17 00:00:00 2001 From: Leandro Motta Barros Date: Thu, 15 Dec 2022 14:40:16 -0300 Subject: [PATCH] Engine healthcheck: deal with empty uuid file In rare cases (believed to be caused by a non-atomic file creation and writing operation in containerd), we end up with an empty file at `/mnt/data/docker/containerd/daemon/io.containerd.grpc.v1.introspection/uuid`. This causes `ctr version` (and hence the health check) to fail. See https://github.com/balena-os/balena-engine/issues/322 This commit addresses this issue in two ways: 1. Before running `ctr version`, we check if the uuid file exists and is empty. If so, we remove it. (The subsequent execution of `ctr version` by the healthcheck will create the file again.) 2. After running `ctr version`, we check if the uuid file was really created and is not empty. In both cases, when an empty uuid file is detected, we log the event to help us confirm our hypothesis about the root cause. Signed-off-by: Leandro Motta Barros Change-type: patch --- .../balena/balena/balena-healthcheck | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/meta-balena-common/recipes-containers/balena/balena/balena-healthcheck b/meta-balena-common/recipes-containers/balena/balena/balena-healthcheck index 19a1f1b24e..df6688c061 100644 --- a/meta-balena-common/recipes-containers/balena/balena/balena-healthcheck +++ b/meta-balena-common/recipes-containers/balena/balena/balena-healthcheck @@ -8,5 +8,21 @@ CONTAINERD_SOCKET="/run/balena-engine/containerd/balena-engine-containerd.sock" # Check if balena-engine-daemon is responding. curl --fail --unix-socket $BALENAD_SOCKET http:/v1.40/_ping > /dev/null 2>&1 +# Due to a non-atomic file creation and writing operation in containerd, we +# sometimes end up with an empty `uuid` file. This causes `ctr version` (and +# hence the health check) to fail. We therefore remove this file if it is +# present and empty. See https://github.com/balena-os/balena-engine/issues/322 +UUID_FILE="/mnt/data/docker/containerd/daemon/io.containerd.grpc.v1.introspection/uuid" +if [ -f "$UUID_FILE" -a ! -s "$UUID_FILE" ]; then + echo "healthcheck: removing empty $UUID_FILE" + rm -f "$UUID_FILE" +fi + # Check if balena-engine-containerd is responding. balena-engine-containerd-ctr --address $CONTAINERD_SOCKET version > /dev/null 2>&1 + +# The uuid file is expected to exist and be non-empty after `ctr version`. If +# this is not the case, log and record the event. +if [ -f "$UUID_FILE" -a ! -s "$UUID_FILE" ]; then + echo "healthcheck: $UUID_FILE empty after 'ctr version'" +fi