Skip to content

Commit

Permalink
[WIP] collection download count sync command (#1836)
Browse files Browse the repository at this point in the history
* Test.

No-Issue

Signed-off-by: James Tanner <[email protected]>

* Add integration test.

No-Issue

Signed-off-by: James Tanner <[email protected]>

* Lint cleanup.

No-Issue

Signed-off-by: James Tanner <[email protected]>

* Whoops.

No-Issue

Signed-off-by: James Tanner <[email protected]>

---------

Signed-off-by: James Tanner <[email protected]>
  • Loading branch information
jctanner authored Aug 14, 2023
1 parent 7e20e39 commit 00edd0b
Show file tree
Hide file tree
Showing 2 changed files with 203 additions and 0 deletions.
103 changes: 103 additions & 0 deletions galaxy_ng/app/management/commands/sync-collection-download-counts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/usr/bin/env python3


import datetime
import logging
import requests

from django.core.management.base import BaseCommand
from django.db import transaction
from pulp_ansible.app.models import Collection, CollectionDownloadCount


log = logging.getLogger(__name__)


DEFAULT_UPSTREAM = 'https://galaxy.ansible.com'

SKIPLIST = [
'larrymou9',
'github_qe_test_user',
]


class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument(
'--upstream',
default=DEFAULT_UPSTREAM,
help=f"remote host to retrieve data from [{DEFAULT_UPSTREAM}]"
)
parser.add_argument('--limit', type=int, help="stop syncing after N collections")
parser.add_argument(
'--force', action='store_true', help='sync all counts and ignore last update'
)

def handle(self, *args, **options):
log.info(f"Processing upstream download counts from {options['upstream']}")
upstream = options['upstream']
limit = options['limit']

now = datetime.datetime.now()

collection_count = 0
collection_total = Collection.objects.count()
for collection in Collection.objects.all().order_by('pulp_created'):

namespace = collection.namespace
name = collection.name
collection_count += 1

log.info(f'{collection_total}|{collection_count} {namespace}.{name}')

if limit and collection_count > limit:
break

if namespace in SKIPLIST:
continue

# optimization: don't try to resync something that changed less than a day ago
counter = CollectionDownloadCount.objects.filter(namespace=namespace, name=name).first()
if counter is not None and not options['force']:
delta = (now - counter.pulp_last_updated.replace(tzinfo=None)).total_seconds()
if (delta / 60) < (24 * 60 * 60):
continue

detail_url = (
upstream
+ f'/api/internal/ui/repo-or-collection-detail/?namespace={namespace}&name={name}'
)
log.info('\t' + detail_url)
drr = requests.get(detail_url)
ds = drr.json()
if 'data' not in ds:
log.error(ds)
continue
if 'collection' not in ds['data']:
log.error(ds['data'].keys())
continue

cid = ds['data']['collection']['id']
dcount = ds['data']['collection']['download_count']
log.info(f'\t{cid} {namespace}.{name} downloads:{dcount}')

if counter is None:
log.info(f'\tcreate downloadcount for {namespace}.{name} with value of {dcount}')
with transaction.atomic():
counter = CollectionDownloadCount(
namespace=namespace,
name=name,
download_count=dcount
)
counter.save()
continue

if counter.download_count < dcount:
log.info(
f'\tupdate downloadcount for {namespace}.{name}'
+ f' from {counter.download_count} to {dcount}'
)
with transaction.atomic():
counter.download_count = dcount
continue
100 changes: 100 additions & 0 deletions galaxy_ng/tests/integration/cli/test_community_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""test_community.py - Tests related to the community featureset.
"""

import json
import pytest
import requests
import subprocess

from ansible.galaxy.api import GalaxyError

from ..utils import get_client, wait_for_task


pytestmark = pytest.mark.qa # noqa: F821


@pytest.mark.deployment_community
def test_community_collection_download_count_sync(ansible_config):
""" Test collection download count sync command """

# FIXME - once beta switches over, this test is no longer needed.

config = ansible_config("admin")
api_client = get_client(config, require_auth=True)

# pick an upstream collection at random that does not exist locally ...
sync_collection = None
base_url = 'https://galaxy.ansible.com'
next_url = base_url + '/api/v2/collections/'
while next_url:
rr = requests.get(next_url)
ds = rr.json()
for collection in ds['results']:
namespace = collection['namespace']['name']
name = collection['name']
check_url = (
'/api/v3/plugin/ansible/content/community'
+ f'/collections/index/{namespace}/{name}/'
)
try:
api_client.request(check_url)
except GalaxyError:
sync_collection = (namespace, name)
break

if sync_collection:
break

if not ds['next_link']:
break

next_url = base_url + ds['next_link']

assert sync_collection, "all upstream collections already exist on the system ... how?"

# configure the remote
resp = api_client.request('/api/pulp/api/v3/remotes/ansible/collection/')
remotes = dict((x['name'], x) for x in resp['results'])
community_remote_config = {
'name': 'community',
'url': 'https://galaxy.ansible.com/',
'sync_dependencies': False,
'requirements_file': json.dumps({'collections': ['.'.join(list(sync_collection))]}),
}
remote_task = api_client.request(
remotes['community']['pulp_href'],
method='PATCH',
args=community_remote_config
)
wait_for_task(api_client, remote_task)

# start the sync
resp = api_client.request('/api/pulp/api/v3/repositories/ansible/ansible/')
repos = dict((x['name'], x) for x in resp['results'])
sync_payload = {'mirror': False, 'optimize': False, 'remote': remotes['community']['pulp_href']}
sync_task = api_client.request(
repos['community']['pulp_href'] + 'sync/',
method='POST',
args=sync_payload
)

# wait for the sync
wait_for_task(api_client, sync_task)

# run the django command
pid = subprocess.run(
'pulpcore-manager sync-collection-download-counts',
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
assert pid.returncode == 0

# check the counter in the api
check_url = (
'/api/v3/plugin/ansible/content/community/collections/'
+ f'index/{sync_collection[0]}/{sync_collection[1]}/'
)
check_resp = api_client.request(check_url)
assert check_resp['download_count'] > 0, check_resp

0 comments on commit 00edd0b

Please sign in to comment.