Skip to content

Commit

Permalink
Implement option to only get n latest versions of matching packages (c…
Browse files Browse the repository at this point in the history
…onda-incubator#52))

Adds `--latest` and `--latest-dev` options to
limit download to only the n most recent non-dev/dev versions of each package.
  • Loading branch information
analog-cbarber authored and zinal committed Jan 24, 2023
1 parent bf6dd9c commit 64cba44
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
[\#22](https://github.com/conda-incubator/conda-mirror/issues/22)
* Support use of conda package version specifiers
[\#37](https://github.com/conda-incubator/conda-mirror/issues/37)
* Options to only get n latest versions of matching packages.
[\#52](https://github.com/conda-incubator/conda-mirror/issues/52)
* Added tqdm based progress bars.
[\#29](https://github.com/conda-incubator/conda-mirror/issues/29)
* Improve download speed, especially for smaller packages.
Expand Down
115 changes: 113 additions & 2 deletions conda_mirror/conda_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import time
import random
from pprint import pformat
from typing import Any, Callable, Dict, Set, Union
from typing import Any, Callable, Dict, Iterable, Set, Union, List, NamedTuple

import requests
import yaml
Expand All @@ -25,7 +25,7 @@
try:
from conda.models.version import BuildNumberMatch, VersionSpec
except ImportError:
from .versionspec import BuildNumberMatch, VersionSpec
from .versionspec import BuildNumberMatch, VersionSpec, VersionOrder

logger = None

Expand Down Expand Up @@ -288,6 +288,27 @@ def _make_arg_parser():
action="store_true",
help=("Include packages matching any dependencies of packages in whitelist."),
)
ap.add_argument(
"--latest",
metavar="<n>",
type=int,
nargs="?",
const=1,
default=-1,
help=(
"Only download most-recent <n> non-dev instance(s) of each package. "
"If specified then "
),
)
ap.add_argument(
"--latest-dev",
metavar="<n>",
type=int,
nargs="?",
const=1,
default=-1,
help="Only download most-recent <n> dev instance(s) of each package.",
)
ap.add_argument(
"-v",
"--verbose",
Expand Down Expand Up @@ -483,6 +504,14 @@ def pdb_hook(exctype, value, traceback):
else:
url = "{}:{}".format(scheme, url[0])
proxies = {scheme: url}

latest_dev = int(args.latest_dev)
latest_non_dev = int(args.latest)

# If --latest is specified, then --latest-dev are specified defaults to zero.
if latest_dev < 0 and latest_non_dev >= 0:
latest_dev = 0

return {
"upstream_channel": args.upstream_channel,
"target_directory": args.target_directory,
Expand All @@ -492,6 +521,8 @@ def pdb_hook(exctype, value, traceback):
"blacklist": blacklist,
"whitelist": whitelist,
"include_depends": args.include_depends,
"latest_dev": latest_dev,
"latest_non_dev": latest_non_dev,
"dry_run": args.dry_run,
"no_validate_target": args.no_validate_target,
"minimum_free_space": args.minimum_free_space,
Expand Down Expand Up @@ -881,6 +912,68 @@ def _validate_or_remove_package(args):
)


def _find_non_recent_packages(
packages: Dict[str, Dict[str, Any]],
*,
include: Iterable[str],
latest_non_dev: int,
latest_dev: int,
) -> Set[str]:
"""Computes set of package filenames that are not sufficiently recent
Parameters
----------
packages: packages dictionary from repodata.json
include: package filenames to be considered
latest_non_dev: number of non-dev packages that are sufficiently recent to be included
if negative, then all non-dev packages will be included
latest_dev: number of dev packages that are sufficnetly recent to be included
if negative, then all dev packages will be included
Returns
-------
non-recent packages: Set[str]
Package filenames that are not sufficiently recent. This will be a subset of `include`
"""

non_recent_packages: Set[str] = set()

if latest_non_dev >= 0 or latest_dev >= 0:

class PackageAndVersion(NamedTuple):
package_file: str
version: VersionOrder

packages_by_name: Dict[str, List[PackageAndVersion]] = {}
for key in include:
metadata = packages[key]
try:
packages_by_name.setdefault(metadata["name"], []).append(
PackageAndVersion(key, VersionOrder(metadata["version"]))
)
except KeyError:
pass # ignore bad entries

for curpackages in packages_by_name.values():
curpackages.sort(
key=lambda x: x.version, reverse=True
) # recent versions first
dev_versions = [
p.package_file for p in curpackages if "DEV" in p.version.version[-1]
]
non_dev_versions = [
p.package_file
for p in curpackages
if "DEV" not in p.version.version[-1]
]
if latest_dev >= 0:
non_recent_packages.update(dev_versions[latest_dev:])
if latest_non_dev >= 0:
non_recent_packages.update(non_dev_versions[latest_non_dev:])

return non_recent_packages


def main(
upstream_channel,
target_directory,
Expand All @@ -889,6 +982,8 @@ def main(
blacklist=None,
whitelist=None,
include_depends=False,
latest_non_dev: int = -1,
latest_dev: int = -1,
num_threads=1,
dry_run=False,
no_validate_target=False,
Expand Down Expand Up @@ -930,6 +1025,12 @@ def main(
include_depends: bool
If true, then include packages matching dependencies of whitelisted
packages as well.
latest_dev: int
If >= zero, then only that number of the most recent development versions of
each package in a repo subdir will be downloaded.
latest_non_dev: int
If >= zero, then only that number of the most recent non development versions of
each package in a repo subdir will be downloaded.
num_threads : int, optional
Number of threads to be used for concurrent validation. Defaults to
`num_threads=1` for non-concurrent mode. To use all available cores,
Expand Down Expand Up @@ -993,6 +1094,7 @@ def main(
'size': 1960193,
'version': '8.5.18'}
"""
# TODO update these comments. They are no longer totally correct.
# Steps:
# 1. figure out blacklisted packages
# 2. un-blacklist packages that are actually whitelisted
Expand Down Expand Up @@ -1068,6 +1170,15 @@ def main(

possible_packages_to_mirror = set(packages.keys()) - excluded_packages

# 3b remove non-latest packages if so specified.
non_recent_packages = _find_non_recent_packages(
packages,
include=possible_packages_to_mirror,
latest_non_dev=latest_non_dev,
latest_dev=latest_dev,
)
possible_packages_to_mirror -= non_recent_packages

# 4. Validate all local packages
# construct the desired package repodata
desired_repodata = {
Expand Down

0 comments on commit 64cba44

Please sign in to comment.