From 3bce113a9746899fd42ebe0502d669a97a9c3b7d Mon Sep 17 00:00:00 2001 From: nkinkade Date: Wed, 7 Aug 2024 13:47:48 -0600 Subject: [PATCH] Reduces terminationGracePeriodSeconds from 180s to 100s (#894) The 180s termination grace period was to account for the very long time it could take for mlab-ns to notice that pod was shutting down. With Locate and Heartbeat, this process only takes 10s. Nothing changes about the configuration here, except removing unnecessary wait time. Tests still have 30s to complete, and pusher still has 60s to push data to GCS as fast as it can. Resolves https://github.com/m-lab/ops-tracker/issues/1742 --- k8s/daemonsets/templates.jsonnet | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/k8s/daemonsets/templates.jsonnet b/k8s/daemonsets/templates.jsonnet index 2d5681cd..ba72c527 100644 --- a/k8s/daemonsets/templates.jsonnet +++ b/k8s/daemonsets/templates.jsonnet @@ -13,12 +13,11 @@ local PROJECT_ID = std.extVar('PROJECT_ID'); // // * k8s sends SIGTERM to container // * container enables lame duck status -// * monitoring reads lame duck status (60s max) -// * mlab-ns updates server status (60s max) +// * heartbeat notices lame duck status and notifies Locate (10s max) // * all currently running tests complete. (30s max) -// * give everything an additional 30s to be safe -// * 60s + 60s + 30s + 30s = 180s grace period -local terminationGracePeriodSeconds = 180; +// * give Pusher an additional 60s to upload all data +// * 10s + 30s + 60s = 100s grace period +local terminationGracePeriodSeconds = 100; local uuid = { initContainer: { @@ -845,7 +844,7 @@ local Experiment(name, index, bucket, anonMode, datatypes=[], datatypesAutoloade dnsConfig: { nameservers: ['8.8.8.8', '8.8.4.4'], }, - // Only enable extended grace period where production traffic is possible. + // Apply extended grace period, except for mlab-sandbox [if std.extVar('PROJECT_ID') != 'mlab-sandbox' then 'terminationGracePeriodSeconds']: terminationGracePeriodSeconds, }, },