Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: accept github urls longer than organization/repo segments #1692

Closed
wants to merge 9 commits into from
5 changes: 4 additions & 1 deletion frictionless/portals/github/__spec__/test_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from frictionless import Catalog, FrictionlessException, Package, platform, portals
from frictionless.resources import TableResource

# TODO: recover
pytestmark = pytest.mark.skip(reason="Cassetes for vcr need to be regenerated")

OUTPUT_OPTIONS_WITH_DP_YAML = {
Expand All @@ -30,6 +29,7 @@
}
]
}

OUTPUT_OPTIONS_WITH_DP = {
"name": "test-package",
"resources": [
Expand Down Expand Up @@ -71,6 +71,7 @@
},
],
}

OUTPUT_OPTIONS_WITHOUT_DP = {
"name": "test-repo-without-datapackage",
"resources": [
Expand Down Expand Up @@ -109,6 +110,8 @@
def test_github_adapter_read(options_without_dp):
url = options_without_dp.pop("url")
package = Package(url)
print("here we are")
print(package.to_descriptor())
assert package.to_descriptor() == OUTPUT_OPTIONS_WITHOUT_DP
assert (
package.resources[0].basepath
Expand Down
42 changes: 42 additions & 0 deletions frictionless/portals/github/__spec__/test_plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pytest

from frictionless import FrictionlessException
from frictionless.portals.github.plugin import GithubControl, GithubPlugin


def test_github_plugin_parse_repo():
test_cases = [
{"url": "https://github.com/user/some-repo"},
{"url": "https://github.com/user/some-repo/some-other-stuff"},
{
"url": "https://github.com/user/some-repo/some-stuff",
"control": GithubControl(user="user", repo="some-repo"),
},
]

plugin = GithubPlugin()
for test_case in test_cases:
control = test_case["control"] if "control" in test_case else None

adapter = plugin.create_adapter(source=test_case["url"], control=control)

assert adapter.control.user == "user"
assert adapter.control.repo == "some-repo"


def test_github_url_control_mismatch():
test_cases = [
{
"url": "https://github.com/user/some-repo",
"control": GithubControl(user="wrong-user"),
},
{
"url": "https://github.com/user/some-repo",
"control": GithubControl(repo="wrong-repo"),
},
]

for test_case in test_cases:
plugin = GithubPlugin()
with pytest.raises(FrictionlessException):
plugin.create_adapter(source=test_case["url"], control=test_case["control"])
4 changes: 2 additions & 2 deletions frictionless/portals/github/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class GithubControl(Control):
"""The number of results per page. Default value is 30. Max value is 100."""

repo: Optional[str] = None
"""Name of the repo to read or write."""
"""Name of the repository to read or write."""

search: Optional[str] = None
"""Search query containing one or more search keywords and qualifiers to filter the repositories.
Expand All @@ -65,7 +65,7 @@ class GithubControl(Control):
By default the results are sorted by best match in desc order."""

user: Optional[str] = None
"""username of the github account."""
"""Owner of the repository (user or organisation)"""

filename: Optional[str] = None
"""Custom data package file name while publishing the data. By default it will use 'datapackage.json'."""
Expand Down
44 changes: 35 additions & 9 deletions frictionless/portals/github/plugin.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Literal, Optional, Tuple
from urllib.parse import urlparse

from ...exception import FrictionlessException
from ...system import Plugin
from .adapter import GithubAdapter
from .control import GithubControl
Expand All @@ -18,27 +19,52 @@ class GithubPlugin(Plugin):

def create_adapter(
self,
source: Any,
source: Optional[str],
*,
control: Optional[Control] = None,
basepath: Optional[str] = None,
packagify: bool = False,
):
) -> Optional[GithubAdapter]:
if isinstance(source, str):
parsed = urlparse(source)
if not control or isinstance(control, GithubControl):
if parsed.netloc == "github.com":
control = control or GithubControl()
splited_url = parsed.path.split("/")[1:]
if len(splited_url) == 1:
control.user = splited_url[0]
return GithubAdapter(control)
if len(splited_url) == 2:
control.user, control.repo = splited_url

user, repo = self._extract_user_and_repo(parsed.path)

self._assert_no_mismatch(user, control.user, "user")
self._assert_no_mismatch(repo, control.repo, "repo")

control.user = user
control.repo = repo

return GithubAdapter(control)

if source is None and isinstance(control, GithubControl):
return GithubAdapter(control=control)

def select_control_class(self, type: Optional[str] = None):
if type == "github":
return GithubControl

@staticmethod
def _extract_user_and_repo(url_path: str) -> Tuple[Optional[str], Optional[str]]:
splitted_url = url_path.split("/")[1:]

user = splitted_url[0] if len(splitted_url) >= 1 else None
repo = splitted_url[1] if len(splitted_url) >= 2 else None

return (user, repo)

@staticmethod
def _assert_no_mismatch(
value: Optional[str],
control_value: Optional[str],
user_or_repo: Literal["user", "repo"],
):
if value and control_value and control_value != value:
raise FrictionlessException(
f'Mismatch between url and provided "{user_or_repo}"'
f"information (in url: {value}), in control: {control_value}"
)
Loading