From f6188e35d0bf723facb4f4ed0bc5f9da124e241a Mon Sep 17 00:00:00 2001 From: voetberg Date: Tue, 6 Aug 2024 09:24:17 -0500 Subject: [PATCH] Common: Rewrite check_expired_dids. * Update to sqla2.0 * push with PrometheusPusher * sort imports * add header * change except statement to except Exception --- common/check_expired_dids | 67 +++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/common/check_expired_dids b/common/check_expired_dids index dd8229bc..1f2c9fe8 100755 --- a/common/check_expired_dids +++ b/common/check_expired_dids @@ -1,59 +1,58 @@ -#!/usr/bin/env python -# Copyright European Organization for Nuclear Research (CERN) 2013 +#!/usr/bin/env python3 +# Copyright European Organization for Nuclear Research (CERN) since 2012 # # Licensed under the Apache License, Version 2.0 (the "License"); -# You may not use this file except in compliance with the License. -# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# Authors: -# - Vincent Garonne, , 2013 -# - Thomas Beermann, , 2019 -# - Eric Vaandering , 2020-2021 +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Probe to check the backlog of expired dids. """ -from __future__ import print_function import sys import traceback from datetime import datetime -from prometheus_client import CollectorRegistry, Gauge, push_to_gateway -from rucio.common.config import config_get +from sqlalchemy import and_, func, null, select + from rucio.db.sqla import models from rucio.db.sqla.session import get_session -from rucio.db.sqla.util import get_count -from utils.common import probe_metrics +from utils.common import PrometheusPusher # Exit statuses OK, WARNING, CRITICAL, UNKNOWN = 0, 1, 2, 3 -PROM_SERVERS = config_get('monitor', 'prometheus_servers', raise_exception=False, default='') -if PROM_SERVERS != '': - PROM_SERVERS = PROM_SERVERS.split(',') - if __name__ == "__main__": try: - registry = CollectorRegistry() session = get_session() - query = session.query(models.DataIdentifier.scope).filter(models.DataIdentifier.expired_at.isnot(None), - models.DataIdentifier.expired_at < datetime.utcnow()) - result = get_count(query) - # Possible check against a threshold. If result > max_value then sys.exit(CRITICAL) - probe_metrics.gauge(name='undertaker.expired_dids').set(result) - Gauge('undertaker_expired_dids', '', registry=registry).set(result) - - if len(PROM_SERVERS): - for server in PROM_SERVERS: - try: - push_to_gateway(server.strip(), job='check_expired_dids', registry=registry) - except: - continue - - print(result) - except: + statement = select( + func.count() + ).select_from( + models.DataIdentifier + ).where( + and_( + models.DataIdentifier.expired_at != null(), + models.DataIdentifier.expired_at < datetime.utcnow() + ) + ) + expired_dids = session.execute(statement).scalar_one() + print(expired_dids) + + with PrometheusPusher() as manager: + (manager.gauge( + "expired_dids.total", + documentation="All expired dids") + .set(expired_dids)) + except Exception: print(traceback.format_exc()) sys.exit(UNKNOWN) sys.exit(OK)