Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce datastructures from committime cleanup #1043

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 105 additions & 27 deletions exporters/committime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@

import logging
import re
from typing import Optional
from typing import NamedTuple, Optional

import attr
import attrs
import giturlparse

from pelorus.utils import collect_bad_attribute_path_error, get_nested
Expand All @@ -35,42 +35,42 @@

# TODO: the majority of these fields are unused.
# Let's figure out why they're there.
@attr.define
@attrs.define
class CommitMetric:
name: str = attr.field()
annotations: dict = attr.field(default=None, kw_only=True)
labels: dict = attr.field(default=None, kw_only=True)
namespace: Optional[str] = attr.field(default=None, kw_only=True)

__repo_url: str = attr.field(default=None, init=False)
__repo_protocol = attr.field(default=None, init=False)
__repo_fqdn: str = attr.field(default=None, init=False)
__repo_group = attr.field(default=None, init=False)
__repo_name = attr.field(default=None, init=False)
__repo_project = attr.field(default=None, init=False)
__repo_port = attr.field(default=None, init=False)
__azure_project = attr.field(default=None, init=False)

committer: Optional[str] = attr.field(default=None, kw_only=True)
commit_hash: Optional[str] = attr.field(default=None, kw_only=True)
commit_time: Optional[str] = attr.field(default=None, kw_only=True)
name: str = attrs.field()
annotations: dict = attrs.field(default=None, kw_only=True)
labels: dict = attrs.field(default=None, kw_only=True)
namespace: Optional[str] = attrs.field(default=None, kw_only=True)

__repo_url: str = attrs.field(default=None, init=False)
__repo_protocol = attrs.field(default=None, init=False)
__repo_fqdn: str = attrs.field(default=None, init=False)
__repo_group = attrs.field(default=None, init=False)
__repo_name = attrs.field(default=None, init=False)
__repo_project = attrs.field(default=None, init=False)
__repo_port = attrs.field(default=None, init=False)
__azure_project = attrs.field(default=None, init=False)

committer: Optional[str] = attrs.field(default=None, kw_only=True)
commit_hash: Optional[str] = attrs.field(default=None, kw_only=True)
commit_time: Optional[str] = attrs.field(default=None, kw_only=True)
"""
A human-readable timestamp.
In the future, this and commit_timestamp should be combined.
"""
commit_timestamp: Optional[float] = attr.field(default=None, kw_only=True)
commit_timestamp: Optional[float] = attrs.field(default=None, kw_only=True)
"""
The unix timestamp.
In the future, this and commit_time should be combined.
"""

build_name: Optional[str] = attr.field(default=None, kw_only=True)
build_config_name: Optional[str] = attr.field(default=None, kw_only=True)
build_name: Optional[str] = attrs.field(default=None, kw_only=True)
build_config_name: Optional[str] = attrs.field(default=None, kw_only=True)

image_location: Optional[str] = attr.field(default=None, kw_only=True)
image_name: Optional[str] = attr.field(default=None, kw_only=True)
image_tag: Optional[str] = attr.field(default=None, kw_only=True)
image_hash: Optional[str] = attr.field(default=None, kw_only=True)
image_location: Optional[str] = attrs.field(default=None, kw_only=True)
image_name: Optional[str] = attrs.field(default=None, kw_only=True)
image_tag: Optional[str] = attrs.field(default=None, kw_only=True)
image_hash: Optional[str] = attrs.field(default=None, kw_only=True)

@property
def repo_url(self):
Expand Down Expand Up @@ -234,3 +234,81 @@ def commit_metric_from_build(app: str, build, errors: list) -> CommitMetric:
setattr(metric, attr_name, value)

return metric


@attrs.frozen
class GitRepo:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a replace of CommitMetric right?

If so, please checks the changes here #900 so they are also applied here

And if you want, you can look into this issue #903

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not a replacement, but is part of the replacement input.

Mind if I ask a few more questions in #900?

"Extracted information about a git repo url."

url: str
"""
The full URL for the repo.
Obtained from build metadata, Image annotations, etc.
"""
protocol: str
fqdn: str
group: str
name: str
"The git repo name, e.g. myrepo.git"
port: Optional[str]

@property
def project(self) -> str:
"Alias for the repo name."
return self.name

@property
def server(self) -> str:
"The protocol, server FQDN, and port in URL format."
url = f"{self.protocol}://{self.fqdn}"

if self.port:
url += f":{self.port}"

return url

@classmethod
def from_url(cls, url: str):
"Parse the given URL and handle the edge cases for it."

# code inherited from old committime metric class.
# Unsure of the purpose of some of this code.

# Ensure git URI does not end with "/", issue #590
url = url.strip("/")
parsed = giturlparse.parse(url)
if len(parsed.protocols) > 0 and parsed.protocols[0] not in SUPPORTED_PROTOCOLS:
raise ValueError("Unsupported protocol %s", parsed.protocols[0])
protocol = parsed.protocol
# In the case of multiple subgroups the host will be in the pathname
# Otherwise, it will be in the resource
if parsed.pathname.startswith("//"):
fqdn = parsed.pathname.split("/")[2]
protocol = parsed.protocols[0]
else:
fqdn = parsed.resource
group = parsed.owner
name = parsed.name
port = parsed.port

return cls(url, protocol, fqdn, group, name, port)


class CommitInfo(NamedTuple):
"""
Information used to retrieve commit time information.
Previously, this information wasn't guaranteed to be present,
which made the code messy with various checks and exceptions, etc.
Or worse, just hoping things weren't None and we wouldn't crash the exporter.

In addition, it was unclear how exporters should report the commit time:
they change it on the metric, but also return the metric,
but if they return None, it shouldn't be counted... confusing.
This allows us to handle things more consistently.
"""

repo: GitRepo
commit_hash: str


__all__ = ["CommitMetric"]
30 changes: 28 additions & 2 deletions exporters/committime/collector_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,18 @@
import logging
import re
from abc import abstractmethod
from typing import ClassVar, Iterable, Optional
from typing import Any, ClassVar, Iterable, Optional

import attrs
from attrs import define, field
from attrs import define, field, frozen
from jsonpath_ng import parse
from openshift.dynamic import DynamicClient
from prometheus_client.core import GaugeMetricFamily

import pelorus
from committime import CommitMetric, commit_metric_from_build
from exporters.pelorus.deserialization import nested, retain_source
from exporters.pelorus.utils.openshift_utils import CommonResourceInstance
from pelorus.config import env_vars
from pelorus.config.converters import comma_separated, pass_through
from pelorus.utils import Url, get_nested
Expand Down Expand Up @@ -455,3 +457,27 @@ def _get_repo_from_build_config(self, build):
return git_uri + ".git"

return None


@frozen
class Build(CommonResourceInstance):
strategy: str = field(metadata=nested("spec.strategy.type"))
status: str = field(metadata=nested("status.phase"))

openshift_source: Any = field(metadata=retain_source())

image_hash: Optional[str] = field(
default=None, metadata=nested("status.output.to.imageDigest")
)
repo_url: Optional[str] = field(
default=None, metadata=nested("spec.source.git.uri")
)
commit_hash: Optional[str] = field(
default=None, metadata=nested("spec.revision.git.commit")
)
config_namespace: Optional[str] = field(
default=None, metadata=nested("status.config.namespace")
)
config_name: Optional[str] = field(
default=None, metadata=nested("status.config.name")
)
35 changes: 34 additions & 1 deletion exporters/committime/collector_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,47 @@
import logging
from typing import Iterable, Optional

from attrs import define
from attrs import define, field, frozen

from committime import CommitMetric
from exporters.pelorus.deserialization import nested
from exporters.pelorus.utils.openshift_utils import CommonResourceInstance
from pelorus.timeutil import parse_guessing_timezone_DYNAMIC, to_epoch_from_string
from pelorus.utils import collect_bad_attribute_path_error, get_nested

from .collector_base import AbstractCommitCollector

COMMIT_TIME_DOCKER_LABEL = "io.openshift.build.commit.date"


@frozen
class DockerLabelInfo:
namespace: Optional[str] = field(
default=None, metadata=nested(["io.openshift.build.namespace"])
)
commit_time: Optional[str] = field(
default=None, metadata=nested([COMMIT_TIME_DOCKER_LABEL])
)
commit_hash: Optional[str] = field(
default=None, metadata=nested(["io.openshift.build.commit.id"])
)


@frozen
class Image(CommonResourceInstance):
"Relevant data from an `image.openshift.io/v1/Image`"

# TODO: this doesn't seem right but was copied from previous code
image_location: str = field(metadata=nested("dockerImageReference"))

docker_labels: Optional[DockerLabelInfo] = field(
default=None, metadata=nested("dockerImageMetadata.Config.Labels")
)

@property
def hash(self):
return self.metadata.name


@define(kw_only=True)
class ImageCommitCollector(AbstractCommitCollector):
Expand Down
34 changes: 34 additions & 0 deletions exporters/pelorus/utils/openshift_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
Type stubs to make working with openshift objects a little easier.
These are not meant to be "real" types. Instead, you can claim
that untyped, dynamic data from openshift "fits" these shapes.

These should probably be type stubs only, but I can't figure
out how to do that yet.
"""
from typing import TypeVar

import attrs


@attrs.frozen
class Metadata:
"""
OpenShift metadata. Almost always guaranteed to be present.
"""

name: str
namespace: str
labels: dict[str, str]
annotations: dict[str, str]


@attrs.frozen
class CommonResourceInstance:
"Resource instances that we work with usually have the typical metadata."
apiVersion: str
kind: str
metadata: Metadata


R = TypeVar("R", bound=CommonResourceInstance)
50 changes: 50 additions & 0 deletions exporters/tests/test_repo_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import pytest

from committime import GitRepo


@pytest.mark.parametrize(
"url,protocol,fqdn,project_name",
[
("https://dogs.git.foo/dogs/repo.git", "https", "dogs.git.foo", "repo"),
("http://dogs.git.foo/dogs/repo.git", "http", "dogs.git.foo", "repo"),
("http://noabank.git.foo/chase/git.git", "http", "noabank.git.foo", "git"),
("ssh://git.moos.foo/maverick/tootsie.git", "ssh", "git.moos.foo", "tootsie"),
("[email protected]:konveyor/pelorus.git", "ssh", "github.com", "pelorus"),
(
"https://dev.azure.com/azuretest",
"https",
"dev.azure.com",
"azuretest",
),
(
"https://gitlab.com/firstgroup/secondgroup/myrepo.git",
"https",
"gitlab.com",
"myrepo",
),
],
)
def test_gitrepo_parsing(url: str, protocol: str, fqdn: str, project_name: str):
"Tests that git url parsing works as expected."
repo = GitRepo.from_url(url)
assert repo.url == url
assert repo.protocol == protocol
assert repo.fqdn == fqdn

if fqdn == "dev.azure.com":
# azure does not have groups
assert repo.group is None
else:
assert repo.group is not None

assert repo.project == project_name


@pytest.mark.parametrize(
"malformed_url",
["kmoos://myprotocol/buffy/noext/noext", "notvalid://breakme/snoopy/gtist.git"],
)
def test_malformed_git_url(malformed_url: str):
with pytest.raises(ValueError):
GitRepo.from_url(malformed_url)