diff --git a/capycli/bom/findsources.py b/capycli/bom/findsources.py index 099e85c..7678bb3 100644 --- a/capycli/bom/findsources.py +++ b/capycli/bom/findsources.py @@ -54,7 +54,7 @@ class TagCache: existing tag many times. """ def __init__(self) -> None: - self.data: dict[Tuple[str, str], set[str]] = {} + self.data: Dict[Tuple[str, str], set[str]] = {} def __getitem__(self, key: Any) -> set[str]: """Get the set of all cached tags for a key.""" @@ -214,16 +214,14 @@ def get_github_info(repository_url: str, username: str = "", tags.extend(tmp) return tags - def _get_github_repo(self, github_ref: str) -> dict[str, Any]: + def _get_github_repo(self, github_ref: str) -> Dict[str, Any]: """Fetch GitHub API object identified by @github_ref. - This method interpretes @github_ref and fetches the - referenced project's API object from GitHub. - @github_ref could be a simple "/" string or any - from the plethora of links that refer to a project on - GitHub. + @github_ref can be a simple "/" string or any + from the plethora of links that refer to a + project on GitHub. By using urlparse() we save ourselves a little bit of work with trailing queries and fragments, but any @github_ref with - colons where the first colon is not part of '://' will not + colons, where the first colon is not part of '://' will not yield viable results, e.g. 'api.github.com:443/repos/sw360/capycli'. """ @@ -265,10 +263,11 @@ def _trailing_zeroes(input_string: str) -> int: return len(input_string) # it's all 0s def _gen_tags(self, project: str, version: str, tag: str) ->List[str]: - """For @project, generate a new set of plausible tags. + """For @project, interprete @tag to build tags we expect to + correspond to @version. @tag contains a tag known to exist in @project, while @version is a semver-ish version string we want encoded - using @tag as a (sort of) template. + using @tag as a (sort of) template. """ best_guess: Any = self.version_regex.search(tag) prefix, suffix = '', '' @@ -309,7 +308,7 @@ def _gen_tags(self, project: str, version: str, tag: str) ->List[str]: return self.tag_cache.filter_and_cache(project, version, guesses) - def _render_github_source_url(self, repo: dict[str, Any], ref: str) -> str: + def _render_github_source_url(self, repo: Dict[str, Any], ref: str) -> str: """From API knowledge, create a download link. There are quite a few indirections involved, when downloading source code from GitHub. With this method we build what we @@ -331,38 +330,18 @@ def _render_github_source_url(self, repo: dict[str, Any], ref: str) -> str: def get_matching_source_url(self, version: Any, github_ref: str, version_prefix: Any=None ) -> str: - """Heuristics to find a tag in GitHub that corresponds to - @version in the project identified by @github_ref. - - First we must normalize @github_ref, because we are unsure - what is actually passed as this paramter. - - The first check for each retrieved tag is the original - get_matching_tag() and all the guessing happens afterwards. - This has the effect that if our guessing does not yield any - viable results, this algo implicitly falls back to checking - every tag with get_matching_tag(), which is the behaviour of - the previous implementation. - - If get_matching_tag() did not yield a positive result, we - start guessing tags: - We only care about such tags that produce a non empty match - with self.version_regex, because only these would ever yield - accepted compare() results in get_matching_tag(). - Every such tag can be read as a fixed prefix, followed by a - substring as matched by self.version_regex, followed by a - fixed suffix. Usually, the prefix will be "v" and the suffix - will be empty, but sometimes tags are more elaborate. - We expect only the regex-matchable part of a tag changes - from version to version, while the prefix and the suffix are - static. - Given a tag with a static prefix, a static suffix and a - self.version_regex-matchable substring, we can generate - tag names from semantic versions, by reversing the logic - in to_semver_string(). - Comparing the original matchable substring, to the result of - to_semver_string() we should be able to generate similar - matchable substrings from @version. + """Find a URL to download source code from GitHub. We are + looking for the source code in @github_ref at @version. + + We expect to match @version to an existing tag in the repo + identified by @github_ref. We want to have the source + code download URL of that existing tag! + + In order to perform this matching, we must retrieve the tags + from GitHub and then analyse them. First, we use + get_matching_tag(). If that doesn't yield a positive result, + we try to infer a tag for @version, to prevent an exhaustive + search over all tags. """ try: repo = self._get_github_repo(github_ref) @@ -385,10 +364,10 @@ def get_matching_source_url(self, version: Any, github_ref: str, # 'name' is a viable index, for instance an error message tags = {} - for name, api_obj in tags.items(): - source_url = self.get_matching_tag([api_obj], version, url) - if len(source_url) > 0: # we found what we believe is - return source_url # the correct source_url + source_url = self.get_matching_tag(tags.values(), version, url) + if len(source_url) > 0: # we found what we believe is + return source_url # the correct source_url + for name in tags: for guess in self._gen_tags(repo['full_name'], version, name): if guess in tags: # found on current result-page return self._render_github_source_url(repo, guess)