Skip to content

Commit

Permalink
fixup(FindSources): move get_matching_tag() in front of loop
Browse files Browse the repository at this point in the history
  • Loading branch information
16Martin committed Nov 20, 2024
1 parent 5259baa commit 900029f
Showing 1 changed file with 26 additions and 47 deletions.
73 changes: 26 additions & 47 deletions capycli/bom/findsources.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class TagCache:
existing tag many times.
"""
def __init__(self) -> None:
self.data: dict[Tuple[str, str], set[str]] = {}
self.data: Dict[Tuple[str, str], set[str]] = {}

def __getitem__(self, key: Any) -> set[str]:
"""Get the set of all cached tags for a key."""
Expand Down Expand Up @@ -214,16 +214,14 @@ def get_github_info(repository_url: str, username: str = "",
tags.extend(tmp)
return tags

def _get_github_repo(self, github_ref: str) -> dict[str, Any]:
def _get_github_repo(self, github_ref: str) -> Dict[str, Any]:
"""Fetch GitHub API object identified by @github_ref.
This method interpretes @github_ref and fetches the
referenced project's API object from GitHub.
@github_ref could be a simple "<owner>/<repo>" string or any
from the plethora of links that refer to a project on
GitHub.
@github_ref can be a simple "<owner>/<repo>" string or any
from the plethora of links that refer to a
project on GitHub.
By using urlparse() we save ourselves a little bit of work
with trailing queries and fragments, but any @github_ref with
colons where the first colon is not part of '://' will not
colons, where the first colon is not part of '://' will not
yield viable results,
e.g. 'api.github.com:443/repos/sw360/capycli'.
"""
Expand Down Expand Up @@ -265,10 +263,11 @@ def _trailing_zeroes(input_string: str) -> int:
return len(input_string) # it's all 0s

def _gen_tags(self, project: str, version: str, tag: str) ->List[str]:
"""For @project, generate a new set of plausible tags.
"""For @project, interprete @tag to build tags we expect to
correspond to @version.
@tag contains a tag known to exist in @project, while
@version is a semver-ish version string we want encoded
using @tag as a (sort of) template.
using @tag as a (sort of) template.
"""
best_guess: Any = self.version_regex.search(tag)
prefix, suffix = '', ''
Expand Down Expand Up @@ -309,7 +308,7 @@ def _gen_tags(self, project: str, version: str, tag: str) ->List[str]:

return self.tag_cache.filter_and_cache(project, version, guesses)

def _render_github_source_url(self, repo: dict[str, Any], ref: str) -> str:
def _render_github_source_url(self, repo: Dict[str, Any], ref: str) -> str:
"""From API knowledge, create a download link.
There are quite a few indirections involved, when downloading
source code from GitHub. With this method we build what we
Expand All @@ -331,38 +330,18 @@ def _render_github_source_url(self, repo: dict[str, Any], ref: str) -> str:
def get_matching_source_url(self, version: Any, github_ref: str,
version_prefix: Any=None
) -> str:
"""Heuristics to find a tag in GitHub that corresponds to
@version in the project identified by @github_ref.
First we must normalize @github_ref, because we are unsure
what is actually passed as this paramter.
The first check for each retrieved tag is the original
get_matching_tag() and all the guessing happens afterwards.
This has the effect that if our guessing does not yield any
viable results, this algo implicitly falls back to checking
every tag with get_matching_tag(), which is the behaviour of
the previous implementation.
If get_matching_tag() did not yield a positive result, we
start guessing tags:
We only care about such tags that produce a non empty match
with self.version_regex, because only these would ever yield
accepted compare() results in get_matching_tag().
Every such tag can be read as a fixed prefix, followed by a
substring as matched by self.version_regex, followed by a
fixed suffix. Usually, the prefix will be "v" and the suffix
will be empty, but sometimes tags are more elaborate.
We expect only the regex-matchable part of a tag changes
from version to version, while the prefix and the suffix are
static.
Given a tag with a static prefix, a static suffix and a
self.version_regex-matchable substring, we can generate
tag names from semantic versions, by reversing the logic
in to_semver_string().
Comparing the original matchable substring, to the result of
to_semver_string() we should be able to generate similar
matchable substrings from @version.
"""Find a URL to download source code from GitHub. We are
looking for the source code in @github_ref at @version.
We expect to match @version to an existing tag in the repo
identified by @github_ref. We want to have the source
code download URL of that existing tag!
In order to perform this matching, we must retrieve the tags
from GitHub and then analyse them. First, we use
get_matching_tag(). If that doesn't yield a positive result,
we try to infer a tag for @version, to prevent an exhaustive
search over all tags.
"""
try:
repo = self._get_github_repo(github_ref)
Expand All @@ -385,10 +364,10 @@ def get_matching_source_url(self, version: Any, github_ref: str,
# 'name' is a viable index, for instance an error message
tags = {}

for name, api_obj in tags.items():
source_url = self.get_matching_tag([api_obj], version, url)
if len(source_url) > 0: # we found what we believe is
return source_url # the correct source_url
source_url = self.get_matching_tag(tags.values(), version, url)
if len(source_url) > 0: # we found what we believe is
return source_url # the correct source_url
for name in tags:
for guess in self._gen_tags(repo['full_name'], version, name):
if guess in tags: # found on current result-page
return self._render_github_source_url(repo, guess)
Expand Down

0 comments on commit 900029f

Please sign in to comment.