From 2f17f92e11123db5534885ec7f9b293352ee4d8b Mon Sep 17 00:00:00 2001 From: Jaromir Wysoglad Date: Thu, 1 Aug 2024 04:46:02 -0400 Subject: [PATCH] Lower autoscaling alarm threshold to 50% We see in the CI, that sometimes the CPU metric doesn't reach above 60%, so the stack never scales up and the CI fails. I've never seen it stop below 50%, so this threshold would always be reached in the past. The core issue of such a low values are possibly caused by the length of the load we create in the autoscaled instances. The load is created only for 2 minutes, which is too low when ceilometer polls it only every 2 minutes. With bad luck in timing we will get these low metrics. I'll also increase the load length in tempest. --- ci/vars-autoscaling-tempest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/vars-autoscaling-tempest.yml b/ci/vars-autoscaling-tempest.yml index 239a83ba..40e52e37 100644 --- a/ci/vars-autoscaling-tempest.yml +++ b/ci/vars-autoscaling-tempest.yml @@ -16,7 +16,7 @@ cifmw_tempest_tempestconf_config: telemetry.prometheus_service_url "https://metric-storage-prometheus.openstack.svc.cluster.local:9090" telemetry.ceilometer_polling_interval 120 telemetry.prometheus_scrape_interval 30 - telemetry.alarm_threshold 60000000000 + telemetry.alarm_threshold 50000000000 cifmw_test_operator_tempest_include_list: | telemetry_tempest_plugin.scenario telemetry_tempest_plugin.aodh