Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Quansight's 0.8.1.dev #87

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ usage: conda-mirror [-h] [--upstream-channel UPSTREAM_CHANNEL]
[--temp-directory TEMP_DIRECTORY] [--platform PLATFORM]
[-v] [--config CONFIG] [--pdb] [--num-threads NUM_THREADS]
[--version] [--dry-run] [--no-validate-target]
[--minimum-free-space MINIMUM_FREE_SPACE]
[--minimum-free-space MINIMUM_FREE_SPACE] [--proxy PROXY]
[--ssl-verify SSL_VERIFY] [-k] [--max-retries MAX_RETRIES]

CLI interface for conda-mirror.py

Expand Down Expand Up @@ -68,6 +69,17 @@ optional arguments:
directory
--minimum-free-space MINIMUM_FREE_SPACE
Threshold for free diskspace. Given in megabytes.
--proxy PROXY Proxy URL to access internet if needed
--ssl-verify SSL_VERIFY, --ssl_verify SSL_VERIFY
Path to a CA_BUNDLE file with certificates of trusted
CAs, this may be "False" to disable verification as
per the requests API.
-k, --insecure Allow conda to perform "insecure" SSL connections and
transfers. Equivalent to setting 'ssl_verify' to
'false'.
--max-retries MAX_RETRIES
Maximum number of retries before a download error is
reraised, defaults to 100
```

## Example Usage
Expand Down
136 changes: 129 additions & 7 deletions conda_mirror/conda_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import sys
import tarfile
import tempfile
import time
import random
from pprint import pformat

import requests
Expand Down Expand Up @@ -105,6 +107,16 @@ def _match(all_packages, key_glob_dict):
return matched


def _str_or_false(x):
"""
Returns a boolean False if x is the string "False" or similar.
Returns the original string otherwise.
"""
if x.lower() == "false":
x = False
return x


def _make_arg_parser():
"""
Localize the ArgumentParser logic
Expand Down Expand Up @@ -191,6 +203,36 @@ def _make_arg_parser():
type=int,
default=1000,
)
ap.add_argument(
'--proxy',
help=('Proxy URL to access internet if needed'),
type=str,
default=None,
)
ap.add_argument(
'--ssl-verify', '--ssl_verify',
help=('Path to a CA_BUNDLE file with certificates of trusted CAs, '
'this may be "False" to disable verification as per the '
'requests API.'),
type=_str_or_false,
default=None,
dest='ssl_verify',
)
ap.add_argument(
'-k', '--insecure',
help=('Allow conda to perform "insecure" SSL connections and '
"transfers. Equivalent to setting 'ssl_verify' to 'false'."),
action="store_false",
dest="ssl_verify",
)
ap.add_argument(
'--max-retries',
help=('Maximum number of retries before a download error is reraised, '
"defaults to 100"),
type=int,
default=100,
dest="max_retries",
)
return ap


Expand Down Expand Up @@ -270,6 +312,21 @@ def pdb_hook(exctype, value, traceback):
pdb.post_mortem(traceback)
sys.excepthook = pdb_hook

proxies = args.proxy
if proxies is not None:
# use scheme of proxy url to generate dictionary
# if no extra scheme is given
# examples:
# "http:https://user:[email protected]"
# -> {'http': 'https://user:[email protected]'}
# "https://user:[email protected]"
# -> {'https': 'https://user:[email protected]'}
scheme, *url = proxies.split(':')
if len(url) > 1:
url = ':'.join(url)
else:
url = '{}:{}'.format(scheme, url[0])
proxies = {scheme: url}
return {
'upstream_channel': args.upstream_channel,
'target_directory': args.target_directory,
Expand All @@ -281,6 +338,9 @@ def pdb_hook(exctype, value, traceback):
'dry_run': args.dry_run,
'no_validate_target': args.no_validate_target,
'minimum_free_space': args.minimum_free_space,
'proxies': proxies,
'ssl_verify': args.ssl_verify,
'max_retries': args.max_retries,
}


Expand Down Expand Up @@ -361,7 +421,7 @@ def _validate(filename, md5=None, size=None):
return filename, None


def get_repodata(channel, platform):
def get_repodata(channel, platform, proxies=None, ssl_verify=None):
"""Get the repodata.json file for a channel/platform combo on anaconda.org

Parameters
Expand All @@ -370,6 +430,10 @@ def get_repodata(channel, platform):
anaconda.org/CHANNEL
platform : {'linux-64', 'linux-32', 'osx-64', 'win-32', 'win-64'}
The platform of interest
proxies : dict
Proxys for connecting internet
ssl_verify : str or bool
Path to a CA_BUNDLE file or directory with certificates of trusted CAs

Returns
-------
Expand All @@ -381,7 +445,7 @@ def get_repodata(channel, platform):
url = url_template.format(channel=channel, platform=platform,
file_name='repodata.json')

resp = requests.get(url).json()
resp = requests.get(url, proxies=proxies, verify=ssl_verify).json()
info = resp.get('info', {})
packages = resp.get('packages', {})
# Patch the repodata.json so that all package info dicts contain a "subdir"
Expand All @@ -393,7 +457,7 @@ def get_repodata(channel, platform):
return info, packages


def _download(url, target_directory):
def _download(url, target_directory, proxies=None, ssl_verify=None):
"""Download `url` to `target_directory`

Parameters
Expand All @@ -402,6 +466,10 @@ def _download(url, target_directory):
The url to download
target_directory : str
The path to a directory where `url` should be downloaded
proxies : dict
Proxys for connecting internet
ssl_verify : str or bool
Path to a CA_BUNDLE file or directory with certificates of trusted CAs

Returns
-------
Expand All @@ -416,13 +484,55 @@ def _download(url, target_directory):
download_filename = os.path.join(target_directory, target_filename)
logger.debug('downloading to %s', download_filename)
with open(download_filename, 'w+b') as tf:
ret = requests.get(url, stream=True)
ret = requests.get(url, stream=True,
proxies=proxies, verify=ssl_verify)
for data in ret.iter_content(chunk_size):
tf.write(data)
file_size = os.path.getsize(download_filename)
return file_size


def _download_backoff_retry(url, target_directory, proxies=None, ssl_verify=None, max_retries=100):
"""Download `url` to `target_directory` with exponential backoff in the
event of failure.

Parameters
----------
url : str
The url to download
target_directory : str
The path to a directory where `url` should be downloaded
proxies : dict
Proxys for connecting internet
ssl_verify : str or bool
Path to a CA_BUNDLE file or directory with certificates of trusted CAs
max_retries : int, optional
The maximum number of times to retry before the download error is reraised,
default 100.

Returns
-------
file_size: int
The size in bytes of the file that was downloaded
"""
c = 0
two_c = 1
delay = 5.12e-5 # 51.2 us
while c < max_retries:
c += 1
two_c *= 2
try:
rtn = _download(url, target_directory, proxies=proxies, ssl_verify=ssl_verify)
break
except Exception:
if c < max_retries:
logger.debug('downloading failed, retrying {0}/{1}'.format(c, max_retries))
time.sleep(delay * random.randint(0, two_c - 1))
else:
raise
return rtn


def _list_conda_packages(local_dir):
"""List the conda packages (*.tar.bz2 files) in `local_dir`

Expand Down Expand Up @@ -547,7 +657,8 @@ def _validate_or_remove_package(args):

def main(upstream_channel, target_directory, temp_directory, platform,
blacklist=None, whitelist=None, num_threads=1, dry_run=False,
no_validate_target=False, minimum_free_space=0):
no_validate_target=False, minimum_free_space=0, proxies=None,
ssl_verify=None, max_retries=100):
"""

Parameters
Expand Down Expand Up @@ -589,6 +700,13 @@ def main(upstream_channel, target_directory, temp_directory, platform,
If True, skip validation of files already present in target_directory.
minimum_free_space : int, optional
Stop downloading when free space target_directory or temp_directory reach this threshold.
proxies : dict
Proxys for connecting internet
ssl_verify : str or bool
Path to a CA_BUNDLE file or directory with certificates of trusted CAs
max_retries : int, optional
The maximum number of times to retry before the download error is reraised,
default 100.

Returns
-------
Expand Down Expand Up @@ -649,7 +767,8 @@ def main(upstream_channel, target_directory, temp_directory, platform,
if not os.path.exists(os.path.join(target_directory, platform)):
os.makedirs(os.path.join(target_directory, platform))

info, packages = get_repodata(upstream_channel, platform)
info, packages = get_repodata(upstream_channel, platform,
proxies=proxies, ssl_verify=ssl_verify)
local_directory = os.path.join(target_directory, platform)

# 1. validate local repo
Expand Down Expand Up @@ -739,7 +858,10 @@ def main(upstream_channel, target_directory, temp_directory, platform,
break

# download package
total_bytes += _download(url, download_dir)
total_bytes += _download_backoff_retry(url, download_dir,
proxies=proxies,
ssl_verify=ssl_verify,
max_retries=max_retries)

# make sure we have enough free disk space in the target folder to meet threshold
# while also being able to fit the packages we have already downloaded
Expand Down