-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
pueblo.io.to_io
utility function
- Loading branch information
Showing
6 changed files
with
173 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .universal import to_io |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import contextlib | ||
import io | ||
import typing as t | ||
from pathlib import Path | ||
|
||
from pathlibfs import Path as PathPlus | ||
from yarl import URL | ||
|
||
|
||
@contextlib.contextmanager | ||
def to_io(source: t.Union[str, Path, t.IO], mode: t.Literal["r", "rb", "rt"] = "rt") -> t.Generator[t.IO, None, None]: | ||
""" | ||
Converge filesystem path, remote URL, or file-like object into an IO handle. | ||
""" | ||
fp: t.IO | ||
if isinstance(source, io.TextIOWrapper): | ||
fp = source | ||
elif isinstance(source, (str, Path, PathPlus)): | ||
source = str(source) | ||
path = open_url(source) | ||
fp = path.open(mode=mode) | ||
else: | ||
raise TypeError(f"Unable to converge to IO handle. type={type(source)}, value={source}") | ||
yield fp | ||
fp.close() | ||
|
||
|
||
def open_url(url: str) -> PathPlus: | ||
""" | ||
Access URL, with specific handling for GitHub URLs. | ||
When approached using a GitHub HTTP URL, converge it to a pathlibfs / fsspec URL, | ||
and open it. | ||
Input URLs | ||
---------- | ||
github+https://foobar:[email protected]/acme/sweet-camino/path/to/document.md | ||
github+https://foobar:[email protected]/acme/sweet-camino/blob/main/path/to/document.md | ||
Output Path | ||
----------- | ||
fs = Path("github://path/to/document.md", username="foobar", token="ghp_lalala", org="acme", repo="sweet-camino") | ||
""" | ||
uri = URL(url) | ||
|
||
if uri.scheme.startswith("github+https"): | ||
path_fragments = uri.path.split("/")[1:] | ||
path_kwargs = { | ||
"username": uri.user, | ||
"token": uri.password, | ||
"org": path_fragments[0], | ||
"repo": path_fragments[1], | ||
} | ||
|
||
real_path_fragments = path_fragments[2:] | ||
if path_fragments[2] in ["blob", "raw"]: | ||
real_path_fragments = path_fragments[4:] | ||
|
||
downstream_url = "github://" + "/".join(real_path_fragments) | ||
path = PathPlus(downstream_url, **path_kwargs) | ||
|
||
else: | ||
path = PathPlus(url) | ||
return path | ||
|
||
|
||
def path_without_scheme(url_like: str) -> PathPlus: | ||
""" | ||
Return a pathlibfs Path, without the scheme. | ||
""" | ||
url = URL(str(url_like)) | ||
if url.is_absolute(): | ||
url = url.with_scheme("") | ||
return PathPlus(str(url)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from pathlib import Path | ||
|
||
import pytest | ||
|
||
|
||
@pytest.fixture | ||
def readme_file() -> Path: | ||
return Path(__file__).parent.parent / "README.md" | ||
|
||
|
||
def get_readme_url(infix: str = "", scheme: str = "https:") -> str: | ||
return f"{scheme}//github.com/pyveci/pueblo/{infix}README.md" | ||
|
||
|
||
@pytest.fixture | ||
def readme_url_https_raw() -> str: | ||
return get_readme_url(infix="raw/main/") | ||
|
||
|
||
@pytest.fixture | ||
def readme_url_github_https_bare() -> str: | ||
return get_readme_url(scheme="github+https:") | ||
|
||
|
||
@pytest.fixture | ||
def readme_url_github_https_raw() -> str: | ||
return get_readme_url(infix="raw/main/", scheme="github+https:") | ||
|
||
|
||
@pytest.fixture | ||
def readme_url_github_https_blob() -> str: | ||
return get_readme_url(infix="blob/main/", scheme="github+https:") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import pytest | ||
from pathlibfs import Path as PathPlus | ||
|
||
from pueblo.io import to_io | ||
from pueblo.io.universal import path_without_scheme | ||
|
||
README_NEEDLE = "A Python toolbox library" | ||
|
||
|
||
def test_to_io_failure(): | ||
with pytest.raises(TypeError) as ex: | ||
with to_io(None): | ||
pass | ||
assert ex.match("Unable to converge to IO handle. type=<class 'NoneType'>, value=None") | ||
|
||
|
||
def test_to_io_file(readme_file): | ||
with to_io(readme_file) as fp: | ||
content = fp.read() | ||
assert README_NEEDLE in content | ||
|
||
|
||
def test_to_io_memory(readme_file): | ||
infile = open(readme_file, "r") | ||
with to_io(infile) as fp: | ||
content = fp.read() | ||
assert README_NEEDLE in content | ||
|
||
|
||
def test_to_io_url(readme_url_https_raw): | ||
with to_io(readme_url_https_raw) as fp: | ||
content = fp.read() | ||
assert README_NEEDLE in content | ||
|
||
|
||
def test_to_io_github_url_bare(readme_url_github_https_bare): | ||
with to_io(readme_url_github_https_bare) as fp: | ||
content = fp.read() | ||
assert README_NEEDLE in content | ||
|
||
|
||
def test_to_io_github_url_raw(readme_url_github_https_raw): | ||
with to_io(readme_url_github_https_raw) as fp: | ||
content = fp.read() | ||
assert README_NEEDLE in content | ||
|
||
|
||
def test_to_io_github_url_blob(readme_url_github_https_blob): | ||
with to_io(readme_url_github_https_blob) as fp: | ||
content = fp.read() | ||
assert README_NEEDLE in content | ||
|
||
|
||
def test_path_without_scheme_absolute(): | ||
assert path_without_scheme("foo://localhost/bar/baz") == PathPlus("file:////localhost/bar/baz") | ||
|
||
|
||
def test_path_without_scheme_relative(): | ||
assert path_without_scheme("/bar/baz") == PathPlus("file:///bar/baz") |