Skip to content

Commit

Permalink
Create a backup approach for GitPython
Browse files Browse the repository at this point in the history
Make `aws_bucket_name` mandatory
Remove empty repositories when cloning wiki fails
Bump version to 0.0.2
  • Loading branch information
dormant-user committed Jul 28, 2024
1 parent 8323ca2 commit 5f5b597
Show file tree
Hide file tree
Showing 9 changed files with 140 additions and 29 deletions.
2 changes: 1 addition & 1 deletion docs/README.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>Git2S3 &#8212; Git2S3 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css" />
Expand Down
10 changes: 7 additions & 3 deletions docs/genindex.html
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,17 @@ <h2 id="A">A</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="index.html#git2s3.config.SourceControl.all">all (git2s3.config.SourceControl attribute)</a>
</li>
<li><a href="index.html#git2s3.squire.archer">archer() (in module git2s3.squire)</a>
</li>
<li><a href="index.html#git2s3.exc.ArchiveError">ArchiveError</a>
</li>
<li><a href="index.html#git2s3.config.EnvConfig.aws_access_key_id">aws_access_key_id (git2s3.config.EnvConfig attribute)</a>
</li>
<li><a href="index.html#git2s3.config.EnvConfig.aws_bucket_name">aws_bucket_name (git2s3.config.EnvConfig attribute)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="index.html#git2s3.config.EnvConfig.aws_bucket_name">aws_bucket_name (git2s3.config.EnvConfig attribute)</a>
</li>
<li><a href="index.html#git2s3.config.EnvConfig.aws_profile_name">aws_profile_name (git2s3.config.EnvConfig attribute)</a>
</li>
<li><a href="index.html#git2s3.config.EnvConfig.aws_region_name">aws_region_name (git2s3.config.EnvConfig attribute)</a>
Expand Down Expand Up @@ -103,10 +105,12 @@ <h2 id="C">C</h2>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="index.html#git2s3.squire.check_file_presence">check_file_presence() (in module git2s3.squire)</a>
</li>
<li><a href="index.html#git2s3.config.DataStore.clone_url">clone_url (git2s3.config.DataStore attribute)</a>
<li><a href="index.html#git2s3.main.Git2S3.cli">cli() (git2s3.main.Git2S3 method)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="index.html#git2s3.config.DataStore.clone_url">clone_url (git2s3.config.DataStore attribute)</a>
</li>
<li><a href="index.html#git2s3.main.Git2S3.clone_wiki">clone_wiki() (git2s3.main.Git2S3 method)</a>
</li>
<li><a href="index.html#git2s3.main.Git2S3.cloner">cloner() (git2s3.main.Git2S3 method)</a>
Expand Down
43 changes: 40 additions & 3 deletions docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />

<title>Welcome to Git2S3’s documentation! &#8212; Git2S3 documentation</title>
<link rel="stylesheet" type="text/css" href="_static/pygments.css" />
Expand Down Expand Up @@ -94,6 +94,26 @@ <h1>Welcome to Git2S3’s documentation!<a class="headerlink" href="#welcome-to-
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="git2s3.main.Git2S3.cli">
<span class="sig-name descname"><span class="pre">cli</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">cmd</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fail</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">int</span></span></span><a class="headerlink" href="#git2s3.main.Git2S3.cli" title="Permalink to this definition"></a></dt>
<dd><p>Runs CLI commands.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>cmd</strong> – Command to run.</p></li>
<li><p><strong>fail</strong> – Boolean flag to fail on errors.</p></li>
</ul>
</dd>
<dt class="field-even">Returns<span class="colon">:</span></dt>
<dd class="field-even"><p>Return code after running the command.</p>
</dd>
<dt class="field-odd">Return type<span class="colon">:</span></dt>
<dd class="field-odd"><p>int</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="git2s3.main.Git2S3.get_all">
<span class="sig-name descname"><span class="pre">get_all</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">source</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="#git2s3.config.SourceControl" title="git2s3.config.SourceControl"><span class="pre">SourceControl</span></a></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">Generator</span><span class="p"><span class="pre">[</span></span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#git2s3.main.Git2S3.get_all" title="Permalink to this definition"></a></dt>
Expand All @@ -110,7 +130,7 @@ <h1>Welcome to Git2S3’s documentation!<a class="headerlink" href="#welcome-to-

<dl class="py method">
<dt class="sig sig-object py" id="git2s3.main.Git2S3.set_pat">
<span class="sig-name descname"><span class="pre">set_pat</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">url</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Url</span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#git2s3.main.Git2S3.set_pat" title="Permalink to this definition"></a></dt>
<span class="sig-name descname"><span class="pre">set_pat</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">url</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Url</span><span class="p"><span class="pre">]</span></span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">Optional</span><span class="p"><span class="pre">[</span></span><span class="pre">Union</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Url</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></span></span><a class="headerlink" href="#git2s3.main.Git2S3.set_pat" title="Permalink to this definition"></a></dt>
<dd><p>Creates an authenticated URL by updating the netloc, and sets that as the origin URL.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
Expand Down Expand Up @@ -240,6 +260,23 @@ <h1>Welcome to Git2S3’s documentation!<a class="headerlink" href="#welcome-to-
</section>
<section id="module-git2s3.squire">
<span id="squire"></span><h1>Squire<a class="headerlink" href="#module-git2s3.squire" title="Permalink to this heading"></a></h1>
<dl class="py function">
<dt class="sig sig-object py" id="git2s3.squire.archer">
<span class="sig-prename descclassname"><span class="pre">git2s3.squire.</span></span><span class="sig-name descname"><span class="pre">archer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">destination</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#git2s3.squire.archer" title="Permalink to this definition"></a></dt>
<dd><p>Archives a given directory and deletes it while retaining the zipfile.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters<span class="colon">:</span></dt>
<dd class="field-odd"><p><strong>destination</strong> – Directory path to be archived.</p>
</dd>
<dt class="field-even">Raises<span class="colon">:</span></dt>
<dd class="field-even"><ul class="simple">
<li><p><strong>AssertionError</strong></p></li>
<li><p><strong>If zipfile is not present after archiving.</strong></p></li>
</ul>
</dd>
</dl>
</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="git2s3.squire.env_loader">
<span class="sig-prename descclassname"><span class="pre">git2s3.squire.</span></span><span class="sig-name descname"><span class="pre">env_loader</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">filename</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">os.PathLike</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference internal" href="#git2s3.config.EnvConfig" title="git2s3.config.EnvConfig"><span class="pre">EnvConfig</span></a></span></span><a class="headerlink" href="#git2s3.squire.env_loader" title="Permalink to this definition"></a></dt>
Expand Down Expand Up @@ -413,7 +450,7 @@ <h1>Configuration<a class="headerlink" href="#configuration" title="Permalink to

<dl class="py attribute">
<dt class="sig sig-object py" id="git2s3.config.EnvConfig.aws_bucket_name">
<span class="sig-name descname"><span class="pre">aws_bucket_name</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#git2s3.config.EnvConfig.aws_bucket_name" title="Permalink to this definition"></a></dt>
<span class="sig-name descname"><span class="pre">aws_bucket_name</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#git2s3.config.EnvConfig.aws_bucket_name" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
Expand Down
Binary file modified docs/objects.inv
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/searchindex.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion git2s3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from git2s3.main import Git2S3

version = "0.0.1"
version = "0.0.2"


@click.command()
Expand Down
2 changes: 1 addition & 1 deletion git2s3/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class EnvConfig(BaseSettings):
aws_access_key_id: str | None = None
aws_secret_access_key: str | None = None
aws_region_name: str | None = None
aws_bucket_name: str | None = None
aws_bucket_name: str
aws_s3_prefix: str = f"github_{int(time.time())}"
boto3_retry_attempts: int = 10
boto3_retry_mode: Boto3RetryMode = Boto3RetryMode.standard
Expand Down
92 changes: 73 additions & 19 deletions git2s3/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os
import secrets
import shutil
import subprocess
import threading
import warnings
from collections.abc import Generator
Expand All @@ -12,7 +12,7 @@

import git
import requests
from git.exc import GitCommandError
from git.exc import GitCommandError, InvalidGitRepositoryError
from pydantic import HttpUrl

from git2s3 import config, exc, s3, squire
Expand Down Expand Up @@ -48,8 +48,14 @@ def __init__(
"X-GitHub-Api-Version": "2022-11-28",
"Content-Type": "application/x-www-form-urlencoded",
}
self.repo = git.Repo()
self.origin = self.repo.remote()
try:
self.repo = git.Repo()
self.origin = self.repo.remote()
except (InvalidGitRepositoryError, ValueError):
self.logger.warning("Unable to use git python, switching to git cli")
self.cli("command -v git") # Make sure git cli works
self.repo = None
self.origin = None
self.clone_dir = os.path.join(os.getcwd(), self.env.git_owner)
warnings.simplefilter("always", exc.DirectoryExists)
warnings.simplefilter("always", exc.UnsupportedSource)
Expand All @@ -63,7 +69,8 @@ def __init__(
if profile == "orgs":
if config.SourceControl.gist in self.env.source:
warnings.warn(
f"Gists are not supported for organizations. Removing {config.SourceControl.gist!r} from source.",
"Gists are not supported for organization profiles. "
f"Removing {config.SourceControl.gist.name!r} from source.",
exc.UnsupportedSource,
)
self.env.source.remove(config.SourceControl.gist)
Expand Down Expand Up @@ -96,6 +103,35 @@ def profile_type(self) -> str:
f"Failed to get the profile type for {self.env.git_owner}. Please check the owner/organization name."
)

def cli(self, cmd: str, fail: bool = True) -> int:
"""Runs CLI commands.
Args:
cmd: Command to run.
fail: Boolean flag to fail on errors.
Returns:
int:
Return code after running the command.
"""
redacted = cmd.replace(self.env.git_token, "****")
try:
ret_code = subprocess.check_call(
cmd, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, shell=True
)
except subprocess.CalledProcessError as error:
ret_code = error.returncode
if fail:
if error.output:
self.logger.error(error.output)
else:
self.logger.error("Failed to run %s", redacted)
if fail:
assert (
ret_code == 0
), f"{redacted!r} - returned a non-zero exit code: {ret_code}"
return ret_code

def get_all(self, source: config.SourceControl) -> Generator[Dict[str, str]]:
"""Iterate through a target owner/organization to get all available repositories/gists.
Expand Down Expand Up @@ -142,7 +178,7 @@ def get_all(self, source: config.SourceControl) -> Generator[Dict[str, str]]:
self.logger.debug("No repos found in page: %d, ending loop.", idx)
break

def set_pat(self, url: str | HttpUrl) -> None:
def set_pat(self, url: str | HttpUrl) -> str | HttpUrl | None:
"""Creates an authenticated URL by updating the netloc, and sets that as the origin URL.
Args:
Expand All @@ -164,8 +200,10 @@ def set_pat(self, url: str | HttpUrl) -> None:
url_split.fragment,
)
)
self.origin.config_writer.set("url", joined)
self.origin.config_writer.release()
if self.repo and self.origin:
self.origin.config_writer.set("url", joined)
self.origin.config_writer.release()
return joined

def clone_wiki(self, datastore: config.DataStore) -> None:
"""Clone all the wikis from the repository.
Expand All @@ -188,9 +226,25 @@ def clone_wiki(self, datastore: config.DataStore) -> None:
)
if not os.path.isdir(wiki_dest):
os.makedirs(wiki_dest)
wiki_url = self.set_pat(wiki_url)
try:
self.set_pat(wiki_url)
self.repo.clone_from(wiki_url, wiki_dest)
if self.repo and self.origin:
self.repo.clone_from(wiki_url, wiki_dest)
else:
output = self.cli(f"cd {wiki_dest} && git clone {wiki_url}", fail=False)
if output == 0:
try:
squire.archer(wiki_dest)
except AssertionError:
self.logger.error(
"Failed to create a zip file for %s", datastore.name
)
raise exc.ArchiveError(
f"Failed to create a zip file for {datastore.name!r}"
)
else:
# Skip if cloning failed, as wiki pages are not guaranteed to exist
shutil.rmtree(wiki_dest)
except GitCommandError as error:
msg = error.stderr or error.stdout or ""
msg = msg.strip().replace("\n", "").replace("'", "").replace('"', "")
Expand Down Expand Up @@ -230,24 +284,24 @@ def worker(self, repo: Dict[str, str]) -> None:
).start()
if not os.path.isdir(repo_dest):
os.makedirs(repo_dest)
datastore.clone_url = self.set_pat(datastore.clone_url)
try:
self.set_pat(datastore.clone_url)
self.repo.clone_from(datastore.clone_url, repo_dest)
if self.repo and self.origin:
self.repo.clone_from(datastore.clone_url, repo_dest)
else:
self.cli(f"cd {repo_dest} && git clone {datastore.clone_url}")
try:
if datastore.description:
desc_file = os.path.join(
repo_dest, f"description_{secrets.token_hex(2)}.txt"
)
desc_file = os.path.join(repo_dest, "description_git2s3.txt")
with open(desc_file, "w") as desc:
desc.write(datastore.description)
desc.flush()
except Exception as warning:
# Adding description file is only an added feature, so no need to fail
self.logger.warning(warning)
shutil.make_archive(repo_dest, "zip", repo_dest)
if os.path.isfile(f"{repo_dest}.zip"):
shutil.rmtree(repo_dest)
else:
try:
squire.archer(repo_dest)
except AssertionError:
self.logger.error("Failed to create a zip file for %s", datastore.name)
raise exc.ArchiveError(
f"Failed to create a zip file for {datastore.name!r}"
Expand Down
16 changes: 16 additions & 0 deletions git2s3/squire.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import pathlib
import shutil
from datetime import datetime
from typing import Dict

Expand All @@ -10,6 +11,21 @@
from git2s3 import config


def archer(destination: str) -> None:
"""Archives a given directory and deletes it while retaining the zipfile.
Args:
destination: Directory path to be archived.
Raises:
AssertionError:
If zipfile is not present after archiving.
"""
shutil.make_archive(destination, "zip", destination)
assert os.path.isfile(f"{destination}.zip")
shutil.rmtree(destination)


def env_loader(filename: str | os.PathLike) -> config.EnvConfig:
"""Loads environment variables based on filetypes.
Expand Down

0 comments on commit 5f5b597

Please sign in to comment.