From 7d974d185bd92788198e684a79ccbed0d3e6aa3e Mon Sep 17 00:00:00 2001 From: Kevin M Granger Date: Fri, 4 Aug 2023 19:55:23 -0400 Subject: [PATCH 1/2] Introduce datastructures from committime cleanup Signed-off-by: Kevin M Granger --- exporters/committime/__init__.py | 132 ++++++++++++++++----- exporters/committime/collector_base.py | 30 ++++- exporters/committime/collector_image.py | 35 +++++- exporters/pelorus/utils/openshift_utils.py | 34 ++++++ exporters/tests/test_repo_parsing.py | 50 ++++++++ 5 files changed, 251 insertions(+), 30 deletions(-) create mode 100644 exporters/pelorus/utils/openshift_utils.py create mode 100644 exporters/tests/test_repo_parsing.py diff --git a/exporters/committime/__init__.py b/exporters/committime/__init__.py index 5121b69e9..304598236 100644 --- a/exporters/committime/__init__.py +++ b/exporters/committime/__init__.py @@ -19,9 +19,9 @@ import logging import re -from typing import Optional +from typing import NamedTuple, Optional -import attr +import attrs import giturlparse from pelorus.utils import collect_bad_attribute_path_error, get_nested @@ -35,42 +35,42 @@ # TODO: the majority of these fields are unused. # Let's figure out why they're there. -@attr.define +@attrs.define class CommitMetric: - name: str = attr.field() - annotations: dict = attr.field(default=None, kw_only=True) - labels: dict = attr.field(default=None, kw_only=True) - namespace: Optional[str] = attr.field(default=None, kw_only=True) - - __repo_url: str = attr.field(default=None, init=False) - __repo_protocol = attr.field(default=None, init=False) - __repo_fqdn: str = attr.field(default=None, init=False) - __repo_group = attr.field(default=None, init=False) - __repo_name = attr.field(default=None, init=False) - __repo_project = attr.field(default=None, init=False) - __repo_port = attr.field(default=None, init=False) - __azure_project = attr.field(default=None, init=False) - - committer: Optional[str] = attr.field(default=None, kw_only=True) - commit_hash: Optional[str] = attr.field(default=None, kw_only=True) - commit_time: Optional[str] = attr.field(default=None, kw_only=True) + name: str = attrs.field() + annotations: dict = attrs.field(default=None, kw_only=True) + labels: dict = attrs.field(default=None, kw_only=True) + namespace: Optional[str] = attrs.field(default=None, kw_only=True) + + __repo_url: str = attrs.field(default=None, init=False) + __repo_protocol = attrs.field(default=None, init=False) + __repo_fqdn: str = attrs.field(default=None, init=False) + __repo_group = attrs.field(default=None, init=False) + __repo_name = attrs.field(default=None, init=False) + __repo_project = attrs.field(default=None, init=False) + __repo_port = attrs.field(default=None, init=False) + __azure_project = attrs.field(default=None, init=False) + + committer: Optional[str] = attrs.field(default=None, kw_only=True) + commit_hash: Optional[str] = attrs.field(default=None, kw_only=True) + commit_time: Optional[str] = attrs.field(default=None, kw_only=True) """ A human-readable timestamp. In the future, this and commit_timestamp should be combined. """ - commit_timestamp: Optional[float] = attr.field(default=None, kw_only=True) + commit_timestamp: Optional[float] = attrs.field(default=None, kw_only=True) """ The unix timestamp. In the future, this and commit_time should be combined. """ - build_name: Optional[str] = attr.field(default=None, kw_only=True) - build_config_name: Optional[str] = attr.field(default=None, kw_only=True) + build_name: Optional[str] = attrs.field(default=None, kw_only=True) + build_config_name: Optional[str] = attrs.field(default=None, kw_only=True) - image_location: Optional[str] = attr.field(default=None, kw_only=True) - image_name: Optional[str] = attr.field(default=None, kw_only=True) - image_tag: Optional[str] = attr.field(default=None, kw_only=True) - image_hash: Optional[str] = attr.field(default=None, kw_only=True) + image_location: Optional[str] = attrs.field(default=None, kw_only=True) + image_name: Optional[str] = attrs.field(default=None, kw_only=True) + image_tag: Optional[str] = attrs.field(default=None, kw_only=True) + image_hash: Optional[str] = attrs.field(default=None, kw_only=True) @property def repo_url(self): @@ -234,3 +234,81 @@ def commit_metric_from_build(app: str, build, errors: list) -> CommitMetric: setattr(metric, attr_name, value) return metric + + +@attrs.frozen +class GitRepo: + "Extracted information about a git repo url." + + url: str + """ + The full URL for the repo. + Obtained from build metadata, Image annotations, etc. + """ + protocol: str + fqdn: str + group: str + name: str + "The git repo name, e.g. myrepo.git" + port: Optional[str] + + @property + def project(self) -> str: + "Alias for the repo name." + return self.name + + @property + def server(self) -> str: + "The protocol, server FQDN, and port in URL format." + url = f"{self.protocol}://{self.fqdn}" + + if self.port: + url += f":{self.port}" + + return url + + @classmethod + def from_url(cls, url: str): + "Parse the given URL and handle the edge cases for it." + + # code inherited from old committime metric class. + # Unsure of the purpose of some of this code. + + # Ensure git URI does not end with "/", issue #590 + url = url.strip("/") + parsed = giturlparse.parse(url) + if len(parsed.protocols) > 0 and parsed.protocols[0] not in SUPPORTED_PROTOCOLS: + raise ValueError("Unsupported protocol %s", parsed.protocols[0]) + protocol = parsed.protocol + # In the case of multiple subgroups the host will be in the pathname + # Otherwise, it will be in the resource + if parsed.pathname.startswith("//"): + fqdn = parsed.pathname.split("/")[2] + protocol = parsed.protocols[0] + else: + fqdn = parsed.resource + group = parsed.owner + name = parsed.name + port = parsed.port + + return cls(url, protocol, fqdn, group, name, port) + + +class CommitInfo(NamedTuple): + """ + Information used to retrieve commit time information. + Previously, this information wasn't guaranteed to be present, + which made the code messy with various checks and exceptions, etc. + Or worse, just hoping things weren't None and we wouldn't crash the exporter. + + In addition, it was unclear how exporters should report the commit time: + they change it on the metric, but also return the metric, + but if they return None, it shouldn't be counted... confusing. + This allows us to handle things more consistently. + """ + + repo: GitRepo + commit_hash: str + + +__all__ = ["CommitMetric"] diff --git a/exporters/committime/collector_base.py b/exporters/committime/collector_base.py index fb1718842..525b6feb0 100644 --- a/exporters/committime/collector_base.py +++ b/exporters/committime/collector_base.py @@ -20,16 +20,18 @@ import logging import re from abc import abstractmethod -from typing import ClassVar, Iterable, Optional +from typing import Any, ClassVar, Iterable, Optional import attrs -from attrs import define, field +from attrs import define, field, frozen from jsonpath_ng import parse from openshift.dynamic import DynamicClient from prometheus_client.core import GaugeMetricFamily import pelorus from committime import CommitMetric, commit_metric_from_build +from exporters.pelorus.deserialization import nested, retain_source +from exporters.pelorus.utils.openshift_utils import CommonResourceInstance from pelorus.config import env_vars from pelorus.config.converters import comma_separated, pass_through from pelorus.utils import Url, get_nested @@ -455,3 +457,27 @@ def _get_repo_from_build_config(self, build): return git_uri + ".git" return None + + +@frozen +class Build(CommonResourceInstance): + strategy: str = field(metadata=nested("spec.strategy.type")) + status: str = field(metadata=nested("status.phase")) + + openshift_source: Any = field(metadata=retain_source()) + + image_hash: Optional[str] = field( + default=None, metadata=nested("status.output.to.imageDigest") + ) + repo_url: Optional[str] = field( + default=None, metadata=nested("spec.source.git.uri") + ) + commit_hash: Optional[str] = field( + default=None, metadata=nested("spec.revision.git.commit") + ) + config_namespace: Optional[str] = field( + default=None, metadata=nested("status.config.namespace") + ) + config_name: Optional[str] = field( + default=None, metadata=nested("status.config.name") + ) diff --git a/exporters/committime/collector_image.py b/exporters/committime/collector_image.py index ee825e1b1..beb0e776c 100644 --- a/exporters/committime/collector_image.py +++ b/exporters/committime/collector_image.py @@ -18,14 +18,47 @@ import logging from typing import Iterable, Optional -from attrs import define +from attrs import define, field, frozen from committime import CommitMetric +from exporters.pelorus.deserialization import nested +from exporters.pelorus.utils.openshift_utils import CommonResourceInstance from pelorus.timeutil import parse_guessing_timezone_DYNAMIC, to_epoch_from_string from pelorus.utils import collect_bad_attribute_path_error, get_nested from .collector_base import AbstractCommitCollector +COMMIT_TIME_DOCKER_LABEL = "io.openshift.build.commit.date" + + +@frozen +class DockerLabelInfo: + namespace: Optional[str] = field( + default=None, metadata=nested(["io.openshift.build.namespace"]) + ) + commit_time: Optional[str] = field( + default=None, metadata=nested([COMMIT_TIME_DOCKER_LABEL]) + ) + commit_hash: Optional[str] = field( + default=None, metadata=nested(["io.openshift.build.commit.id"]) + ) + + +@frozen +class Image(CommonResourceInstance): + "Relevant data from an `image.openshift.io/v1/Image`" + + # TODO: this doesn't seem right but was copied from previous code + image_location: str = field(metadata=nested("dockerImageReference")) + + docker_labels: Optional[DockerLabelInfo] = field( + default=None, metadata=nested("dockerImageMetadata.Config.Labels") + ) + + @property + def hash(self): + return self.metadata.name + @define(kw_only=True) class ImageCommitCollector(AbstractCommitCollector): diff --git a/exporters/pelorus/utils/openshift_utils.py b/exporters/pelorus/utils/openshift_utils.py new file mode 100644 index 000000000..4b5c8c5d8 --- /dev/null +++ b/exporters/pelorus/utils/openshift_utils.py @@ -0,0 +1,34 @@ +""" +Type stubs to make working with openshift objects a little easier. +These are not meant to be "real" types. Instead, you can claim +that untyped, dynamic data from openshift "fits" these shapes. + +These should probably be type stubs only, but I can't figure +out how to do that yet. +""" +from typing import TypeVar + +import attrs + + +@attrs.frozen +class Metadata: + """ + OpenShift metadata. Almost always guaranteed to be present. + """ + + name: str + namespace: str + labels: dict[str, str] + annotations: dict[str, str] + + +@attrs.frozen +class CommonResourceInstance: + "Resource instances that we work with usually have the typical metadata." + apiVersion: str + kind: str + metadata: Metadata + + +R = TypeVar("R", bound=CommonResourceInstance) diff --git a/exporters/tests/test_repo_parsing.py b/exporters/tests/test_repo_parsing.py new file mode 100644 index 000000000..f11f3ae37 --- /dev/null +++ b/exporters/tests/test_repo_parsing.py @@ -0,0 +1,50 @@ +import pytest + +from committime import GitRepo + + +@pytest.mark.parametrize( + "url,protocol,fqdn,project_name", + [ + ("https://dogs.git.foo/dogs/repo.git", "https", "dogs.git.foo", "repo"), + ("http://dogs.git.foo/dogs/repo.git", "http", "dogs.git.foo", "repo"), + ("http://noabank.git.foo/chase/git.git", "http", "noabank.git.foo", "git"), + ("ssh://git.moos.foo/maverick/tootsie.git", "ssh", "git.moos.foo", "tootsie"), + ("git@github.com:konveyor/pelorus.git", "ssh", "github.com", "pelorus"), + ( + "https://dev.azure.com/azuretest", + "https", + "dev.azure.com", + "azuretest", + ), + ( + "https://gitlab.com/firstgroup/secondgroup/myrepo.git", + "https", + "gitlab.com", + "myrepo", + ), + ], +) +def test_gitrepo_parsing(url: str, protocol: str, fqdn: str, project_name: str): + "Tests that git url parsing works as expected." + repo = GitRepo.from_url(url) + assert repo.url == url + assert repo.protocol == protocol + assert repo.fqdn == fqdn + + if fqdn == "dev.azure.com": + # azure does not have groups + assert repo.group is None + else: + assert repo.group is not None + + assert repo.project == project_name + + +@pytest.mark.parametrize( + "malformed_url", + ["kmoos://myprotocol/buffy/noext/noext", "notvalid://breakme/snoopy/gtist.git"], +) +def test_malformed_git_url(malformed_url: str): + with pytest.raises(ValueError): + GitRepo.from_url(malformed_url) From 52c35faa0551e7ab9e740b793844bdf061d11f94 Mon Sep 17 00:00:00 2001 From: Kevin M Granger Date: Wed, 23 Aug 2023 16:38:14 -0400 Subject: [PATCH 2/2] Fix import paths Signed-off-by: Kevin M Granger --- exporters/committime/collector_base.py | 4 ++-- exporters/committime/collector_image.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/exporters/committime/collector_base.py b/exporters/committime/collector_base.py index 525b6feb0..b2844de62 100644 --- a/exporters/committime/collector_base.py +++ b/exporters/committime/collector_base.py @@ -30,11 +30,11 @@ import pelorus from committime import CommitMetric, commit_metric_from_build -from exporters.pelorus.deserialization import nested, retain_source -from exporters.pelorus.utils.openshift_utils import CommonResourceInstance from pelorus.config import env_vars from pelorus.config.converters import comma_separated, pass_through +from pelorus.deserialization import nested, retain_source from pelorus.utils import Url, get_nested +from pelorus.utils.openshift_utils import CommonResourceInstance from provider_common import format_app_name # Custom annotations env for the Build diff --git a/exporters/committime/collector_image.py b/exporters/committime/collector_image.py index beb0e776c..0f2ddec79 100644 --- a/exporters/committime/collector_image.py +++ b/exporters/committime/collector_image.py @@ -21,10 +21,10 @@ from attrs import define, field, frozen from committime import CommitMetric -from exporters.pelorus.deserialization import nested -from exporters.pelorus.utils.openshift_utils import CommonResourceInstance +from pelorus.deserialization import nested from pelorus.timeutil import parse_guessing_timezone_DYNAMIC, to_epoch_from_string from pelorus.utils import collect_bad_attribute_path_error, get_nested +from pelorus.utils.openshift_utils import CommonResourceInstance from .collector_base import AbstractCommitCollector