Skip to content

Commit

Permalink
feat: save project files
Browse files Browse the repository at this point in the history
  • Loading branch information
hrz6976 committed Dec 12, 2024
1 parent 2195bfc commit 627b1bf
Showing 1 changed file with 72 additions and 1 deletion.
73 changes: 72 additions & 1 deletion woc/objects.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import difflib
import re
import os
import warnings
from datetime import datetime, timedelta, timezone
from functools import cached_property, lru_cache
Expand Down Expand Up @@ -172,6 +173,13 @@ def unique_authors(self) -> List["UniqueAuthor"]:
@property
def authors(self):
raise NotImplementedError("Author object does not have authors method")

@property
def aliases(self) -> List["Author"]:
_unique_authors = self.unique_authors
if len(_unique_authors) == 0:
return []
return _unique_authors[0].authors

@cached_property
def first_blobs(self) -> List["Blob"]:
Expand All @@ -197,7 +205,7 @@ class Blob(_GitObject):

@cached_property
def _pos(self) -> Tuple[int, int]:
return self.woc.get_pos("blob", self.key)
return self.woc._get_pos("blob", self.key)

def __len__(self) -> int:
return self._pos[1]
Expand Down Expand Up @@ -745,6 +753,69 @@ def __iter__(self) -> "Generator[Commit, None, None]":
@property
def projects(self) -> List["Project"]:
raise NotImplementedError("Project object does not have projects method")

def download_blob(self, blob_sha: str) -> str:
"""
Download the blob content from remote.
"""
try:
import requests
from urllib.parse import quote_plus
except ImportError:
raise ImportError("This function requires the requests module. Install it via `pip install requests`")

if self._platform_repo[0] == 'github.com':
project = self._platform_repo[1]
_r = requests.get(f'https://api.github.com/repos/{project}/git/blobs/{blob_sha}',
allow_redirects=True,
headers={'Accept': 'application/vnd.github.raw+json'})
_r.raise_for_status()
return _r.content
elif self._platform_repo[0] == 'gitlab.com':
if not hasattr(self, "gitlab_project_id"):
project = quote_plus(self._platform_repo[1])
r = requests.get(f'https://gitlab.com/api/v4/projects/{project}')
r.raise_for_status()
self.gitlab_project_id = r.json()['id']
_r = requests.get(f'https://gitlab.com/api/v4/projects/{self.gitlab_project_id}/repository/blobs/{blob_sha}/raw',
allow_redirects=True)
_r.raise_for_status()
return _r.content
else:
raise NotImplementedError("The function is not implemented for " + self._platform_repo[0])


def save(self, path: str, commit: Optional[Commit] = None):
"""
Save the project files to the disk. Binary blobs are retrieved from the remote.
:param path: The path to save the files.
:param commit: Save the files at this commit. If None, the head or latest commit is used.
"""
if commit is None:
try:
commit = self.head
except ValueError:
_logger.warning(f"No head commit found for {self.key}, using latest commit")
commit = self.latest_commit

flist = list(commit.tree.traverse())
for idx, (f, blob) in enumerate(flist):
_logger.debug(f"{idx + 1}/{len(flist)}: {f.path}")
_p = os.path.join(path, f.path)
os.makedirs(os.path.dirname(_p), exist_ok=True)
with open(_p, 'wb') as f:
try:
f.write(blob.data.encode())
except KeyError as e:
_logger.info(f"Missing blob {blob.key}")
try:
if self._platform_repo[0] in ('github.com', 'gitlab.com'):
f.write(self.download_blob(blob.hash))
except Exception as e:
_logger.error(f"Failed to download blob {blob.hash}: {e}")
except Exception as e:
_logger.error(f"Failed to write blob {blob.hash}: {e}")


class RootProject(Project):
Expand Down

0 comments on commit 627b1bf

Please sign in to comment.