Skip to content

Commit

Permalink
experimental upath implementation for dvcfilesystem (#10302)
Browse files Browse the repository at this point in the history
  • Loading branch information
skshetry authored Feb 27, 2024
1 parent 9bb4501 commit 491155c
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 0 deletions.
54 changes: 54 additions & 0 deletions dvc/fs/dvc_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""UPath implementation for DVCFileSystem.
This provides a `pathlib.Path` like interface to
work with DVCFileSystem.
Examples
--------
>>> from upath import UPath
>>> local = UPath("dvc://path/to/local/repo")
>>> https = UPath("dvc+https://github.com/iterative/example-get-started", rev="main")
>>> ssh = UPath("dvc+ssh://[email protected]:iterative/example-get-started.git")
"""

from urllib.parse import urlsplit

from upath import UPath


class DVCPath(UPath):
@classmethod
def _transform_init_args(cls, args, protocol, storage_options):
if not args:
args = ("/",)
elif (
args
and "url" not in storage_options
and protocol in {"dvc+http", "dvc+https", "dvc+ssh"}
):
url, *rest = args
url = urlsplit(str(url))
proto = protocol.split("+")[1]
if proto == "ssh":
base_url = url.netloc + url.path
else:
base_url = url._replace(scheme=proto).geturl()
storage_options["url"] = base_url
# Assume the given path is a root url
args = ("/", *rest)
return super()._transform_init_args(args, "dvc", storage_options)

def __str__(self):
s = super().__str__()
if url := self.storage_options.get("url"):
return s.replace("dvc://", f"dvc+{url}", 1)
return s

def with_segments(self, *pathsegments):
obj = super().with_segments(*pathsegments)
# cache filesystem, as dvcfs does not cache filesystem
# caveat: any joinpath operation will instantiate filesystem
obj._fs_cached = self.fs
return obj
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@ dvc = "dvc.cli:main"
[project.entry-points."fsspec.specs"]
dvc = "dvc.api:DVCFileSystem"

[project.entry-points."universal_pathlib.implementations"]
dvc = "dvc.fs.dvc_path:DVCPath"
# universal_pathlib does not support fsspec url chaining yet.
# see https://github.com/fsspec/universal_pathlib/issues/28.
"dvc+http" = "dvc.fs.dvc_path:DVCPath"
"dvc+https" = "dvc.fs.dvc_path:DVCPath"
"dvc+ssh" = "dvc.fs.dvc_path:DVCPath"

[project.entry-points."pyinstaller40"]
hook-dirs = "dvc.__pyinstaller:get_hook_dirs"
tests = "dvc.__pyinstaller:get_PyInstaller_tests"
Expand Down Expand Up @@ -232,6 +240,7 @@ module = [
"ruamel.yaml.*",
"shortuuid",
"shtab",
"upath",
"virtualenv",
"viztracer",
"voluptuous",
Expand Down

0 comments on commit 491155c

Please sign in to comment.