From 5f4ba7f4aab2e6d3b6d2cc9c071b47cd7b8c02c5 Mon Sep 17 00:00:00 2001 From: Anton Smorodskyi Date: Mon, 23 Oct 2023 13:59:58 +0200 Subject: [PATCH] Introduce accumulating statistic in time-series DB Idea is to start gathering statistic about amount of entities existing in every account at every point of time. This will allow detect different kind of anomalies in testing pipelines (e.g. bugs in scheduling , bugs in cleanup etc.) This PR just introduce functionality which allow to achieve this in future PRs we will start using it in EVERY query for EVERY cloud provider. --- .gitignore | 3 --- ocw/lib/azure.py | 21 +++++++++++++-------- ocw/lib/influx.py | 45 ++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + templates/pcw.ini | 8 ++++++++ tests/test_influx.py | 6 ++++++ webui/PCWConfig.py | 17 ++++++++++------- 7 files changed, 83 insertions(+), 18 deletions(-) create mode 100644 ocw/lib/influx.py create mode 100644 tests/test_influx.py diff --git a/.gitignore b/.gitignore index 138209fc..0aa39ed8 100644 --- a/.gitignore +++ b/.gitignore @@ -11,9 +11,6 @@ static/ venv/ .venv/ pyvenv.cfg -lib64/ -lib/ -bin/ # Codecov .coverage diff --git a/ocw/lib/azure.py b/ocw/lib/azure.py index 984a9751..3311e699 100644 --- a/ocw/lib/azure.py +++ b/ocw/lib/azure.py @@ -10,7 +10,8 @@ from dateutil.parser import parse from webui.PCWConfig import PCWConfig from .provider import Provider -from ..models import Instance +from ..models import Instance, ProviderChoice +from .influx import Influx class Azure(Provider): @@ -82,9 +83,11 @@ def get_storage_key(self, storage_account: str) -> str: return storage_keys[0] def list_instances(self) -> list: - return list(self.compute_mgmt_client().virtual_machines.list_all()) + all_vms = list(self.compute_mgmt_client().virtual_machines.list_all()) + Influx().write(ProviderChoice.AZURE.value, Influx.VMS_QUANTITY, len(all_vms)) + return all_vms - def get_vm_types_in_resource_group(self, resource_group: str) -> str: + def get_vm_types_in_resource_group(self, resource_group: str) -> str | None: self.log_dbg(f"Listing VMs for {resource_group}") type_set = set() try: @@ -94,9 +97,7 @@ def get_vm_types_in_resource_group(self, resource_group: str) -> str: except ResourceNotFoundError: self.log_dbg(f"{resource_group} already deleted") return None - if len(type_set) > 0: - return ', '.join(type_set) - return "N/A" + return ', '.join(type_set) if type_set else "N/A" def get_resource_properties(self, resource_id): return self.resource_mgmt_client().resources.get_by_id(resource_id, api_version="2023-07-03").properties @@ -112,10 +113,14 @@ def delete_resource(self, resource_id: str) -> None: self.resource_mgmt_client().resource_groups.begin_delete(resource_id) def list_images(self): - return self.list_resource(filters="resourceType eq 'Microsoft.Compute/images'") + all_images = self.list_resource(filters="resourceType eq 'Microsoft.Compute/images'") + Influx().write(ProviderChoice.AZURE.value, Influx.IMAGES_QUANTITY, len(all_images)) + return all_images def list_disks(self): - return self.list_resource(filters="resourceType eq 'Microsoft.Compute/disks'") + all_disks = self.list_resource(filters="resourceType eq 'Microsoft.Compute/disks'") + Influx().write(ProviderChoice.AZURE.value, Influx.DISK_QUANTITY, len(all_disks)) + return all_disks def list_resource(self, filters=None) -> list: return list(self.resource_mgmt_client().resources.list_by_resource_group( diff --git a/ocw/lib/influx.py b/ocw/lib/influx.py new file mode 100644 index 00000000..de5352c4 --- /dev/null +++ b/ocw/lib/influx.py @@ -0,0 +1,45 @@ +import os +import logging +from influxdb_client import InfluxDBClient, Point +from influxdb_client.client.write_api import SYNCHRONOUS, WriteApi +from influxdb_client.client.exceptions import InfluxDBError +from urllib3.exceptions import HTTPError, TimeoutError + + +from webui.PCWConfig import PCWConfig + +logger = logging.getLogger(__name__) + + +class Influx: + __client: WriteApi | None = None + VMS_QUANTITY: str = "vms_quantity" + IMAGES_QUANTITY: str = "images_quantity" + DISK_QUANTITY: str = "disk_quantity" + + def __init__(self) -> None: + self.bucket: str = str(PCWConfig.get_feature_property("influxdb", "bucket")) + self.org: str = str(PCWConfig.get_feature_property("influxdb", "org")) + + # this is implementation of Singleton pattern + def __new__(cls: type["Influx"]) -> "Influx": + if not hasattr(cls, "instance") or cls.instance is None: + if os.getenv("INFLUX_TOKEN") is None: + logger.warning("INFLUX_TOKEN is not set, InfluxDB will not be used") + elif PCWConfig.has("influxdb/url"): + url: str = str(PCWConfig.get_feature_property("influxdb", "url")) + cls.__client = InfluxDBClient( + url=url, + token=os.getenv("INFLUX_TOKEN"), + org=str(PCWConfig.get_feature_property("influxdb", "org")), + ).write_api(write_options=SYNCHRONOUS) + cls.instance = super(Influx, cls).__new__(cls) + return cls.instance + + def write(self, measurement: str, field: str, value: int) -> None: + if self.__client: + point = Point(measurement).field(field, value) + try: + self.__client.write(bucket=self.bucket, org=self.org, record=point) + except (InfluxDBError, HTTPError, TimeoutError) as exception: + logger.warning(f"Failed to write to influxdb(record={point}): {exception}") diff --git a/requirements.txt b/requirements.txt index fe89102c..b671bbf7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,3 +21,4 @@ openstacksdk~=1.5.0 python-dateutil apscheduler kubernetes +influxdb-client diff --git a/templates/pcw.ini b/templates/pcw.ini index 4bdd8008..7f69e307 100644 --- a/templates/pcw.ini +++ b/templates/pcw.ini @@ -57,3 +57,11 @@ vpc_cleanup = true [updaterun] # if openqa_ttl tag is not defined this TTL will be set to the instance default_ttl = 44100 # value is in seconds + +# used to store statistic about amount of entities tracked in the cloud +[influxdb] +# defines standard influxdb connection params - organization , bucket +# for more details please refer to official documentation https://docs.influxdata.com/influxdb/v2/api-guide/client-libraries/python/#Copyright +org=pcw +bucket=cloud_stat +url=http://localhost:8086 diff --git a/tests/test_influx.py b/tests/test_influx.py new file mode 100644 index 00000000..995a519c --- /dev/null +++ b/tests/test_influx.py @@ -0,0 +1,6 @@ +from ocw.lib.influx import Influx + + +def test_influx_init(): + influx = Influx() + assert hasattr(influx, "__client") is False diff --git a/webui/PCWConfig.py b/webui/PCWConfig.py index 0dd1d8ad..c639b10c 100644 --- a/webui/PCWConfig.py +++ b/webui/PCWConfig.py @@ -53,8 +53,8 @@ def getList(self, config_path: str, default: list = None) -> list: class PCWConfig(): @staticmethod - def get_feature_property(feature: str, property: str, namespace: str = None): - default_values = { + def get_feature_property(feature: str, feature_property: str, namespace: str | None = None) -> str | int: + default_values: dict[str, dict[str, int | type[int] | str | type[str] | type[str] | None]] = { 'cleanup/max-age-hours': {'default': 24 * 7, 'return_type': int}, 'cleanup/azure-gallery-name': {'default': 'test_image_gallery', 'return_type': str}, 'cleanup/azure-storage-resourcegroup': {'default': 'openqa-upload', 'return_type': str}, @@ -70,12 +70,15 @@ def get_feature_property(feature: str, property: str, namespace: str = None): 'notify/smtp': {'default': None, 'return_type': str}, 'notify/smtp-port': {'default': 25, 'return_type': int}, 'notify/from': {'default': 'pcw@publiccloud.qa.suse.de', 'return_type': str}, + 'influxdb/org': {'default': 'pcw', 'return_type': str}, + 'influxdb/bucket': {'default': 'cloud_stat', 'return_type': str}, + 'influxdb/url': {'default': None, 'return_type': str}, } - key = '/'.join([feature, property]) + key = '/'.join([feature, feature_property]) if key not in default_values: raise LookupError(f"Missing {key} in default_values list") if namespace: - setting = f'{feature}.namespace.{namespace}/{property}' + setting = f'{feature}.namespace.{namespace}/{feature_property}' if PCWConfig.has(setting): return default_values[key]['return_type'](ConfigFile().get(setting)) return default_values[key]['return_type']( @@ -114,10 +117,10 @@ def has(setting: str) -> bool: return False @staticmethod - def getBoolean(config_path: str, namespace: str = None, default=False) -> bool: + def getBoolean(config_path: str, namespace: str | None = None, default=False) -> bool: if namespace: - feature, property = config_path.split('/') - setting = f'{feature}.namespace.{namespace}/{property}' + feature, feature_property = config_path.split('/') + setting = f'{feature}.namespace.{namespace}/{feature_property}' if PCWConfig.has(setting): value = ConfigFile().get(setting) else: