From ee22e0841db2281c77dd5d1e0bf7c966709a3eac Mon Sep 17 00:00:00 2001 From: James Tanner Date: Tue, 8 Aug 2023 22:45:06 -0400 Subject: [PATCH 1/4] Test. No-Issue Signed-off-by: James Tanner --- .../sync-collection-download-counts.py | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 galaxy_ng/app/management/commands/sync-collection-download-counts.py diff --git a/galaxy_ng/app/management/commands/sync-collection-download-counts.py b/galaxy_ng/app/management/commands/sync-collection-download-counts.py new file mode 100644 index 0000000000..51901c2fe9 --- /dev/null +++ b/galaxy_ng/app/management/commands/sync-collection-download-counts.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + + +import datetime +import logging +import requests + +from django.core.management.base import BaseCommand +from django.db import transaction +from pulp_ansible.app.models import Collection, CollectionDownloadCount + + +log = logging.getLogger(__name__) + + +DEFAULT_UPSTREAM = 'https://galaxy.ansible.com' + +SKIPLIST = [ + 'larrymou9', + 'github_qe_test_user', +] + + +class Command(BaseCommand): + + def add_arguments(self, parser): + parser.add_argument( + '--upstream', + default=DEFAULT_UPSTREAM, + help=f"remote host to retrieve data from [{DEFAULT_UPSTREAM}]" + ) + + parser.add_argument('--limit', type=int, help="stop syncing after N collections") + + def handle(self, *args, **options): + log.info(f"Processing upstream download counts from {options['upstream']}") + upstream = options['upstream'] + limit = options['limit'] + + now = datetime.datetime.now() + + collection_count = 0 + collection_total = Collection.objects.count() + for collection in Collection.objects.all().order_by('pulp_created'): + + namespace = collection.namespace + name = collection.name + collection_count += 1 + + log.info(f'{collection_total}|{collection_count} {namespace}.{name}') + + if limit and collection_count > limit: + break + + if namespace in SKIPLIST: + continue + + counter = CollectionDownloadCount.objects.filter(namespace=namespace, name=name).first() + if counter is not None: + delta = (now - counter.pulp_last_updated.replace(tzinfo=None)).total_seconds() + if (delta / 60) < ( 24 * 60 * 60 ): + continue + #import epdb; epdb.st() + + detail_url = upstream + f'/api/internal/ui/repo-or-collection-detail/?namespace={namespace}&name={name}' + log.info('\t' + detail_url) + drr = requests.get(detail_url) + ds = drr.json() + if 'data' not in ds: + log.error(ds) + continue + if 'collection' not in ds['data']: + log.error(ds['data'].keys()) + continue + + cid = ds['data']['collection']['id'] + dcount = ds['data']['collection']['download_count'] + log.info(f'\t{cid} {namespace}.{name} downloads:{dcount}') + + if counter is None: + log.info(f'\tcreate downloadcount for {namespace}.{name} with value of {dcount}') + with transaction.atomic(): + counter = CollectionDownloadCount(namespace=namespace, name=name, download_count=dcount) + counter.save() + continue + + if counter.download_count < dcount: + log.info(f'\tupdate downloadcount for {namespace}.{name} from {counter.download_count} to {dcount}') + with transaction.atomic(): + counter.download_count = dcount + continue From c865bb49aa9d183e2328bfb68f209ef7ad465dcb Mon Sep 17 00:00:00 2001 From: James Tanner Date: Wed, 9 Aug 2023 08:09:52 -0400 Subject: [PATCH 2/4] Add integration test. No-Issue Signed-off-by: James Tanner --- .../integration/cli/test_community_sync.py | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 galaxy_ng/tests/integration/cli/test_community_sync.py diff --git a/galaxy_ng/tests/integration/cli/test_community_sync.py b/galaxy_ng/tests/integration/cli/test_community_sync.py new file mode 100644 index 0000000000..4ca5c9bcd7 --- /dev/null +++ b/galaxy_ng/tests/integration/cli/test_community_sync.py @@ -0,0 +1,96 @@ +"""test_community.py - Tests related to the community featureset. +""" + +import copy +import json +import os +import tempfile +import pytest +import requests +import shutil +import subprocess + +from ansible.galaxy.api import GalaxyError + +from ..utils import ( + ansible_galaxy, + SocialGithubClient +) + +from ..utils.legacy import clean_all_roles, cleanup_social_user +from ..utils import get_client, wait_for_task + + +pytestmark = pytest.mark.qa # noqa: F821 + + +@pytest.mark.deployment_community +def test_community_collection_download_count_sync(ansible_config): + """ Test collection download count sync command """ + + # FIXME - once beta switches over, this test is no longer needed. + + config = ansible_config("admin") + api_client = get_client(config, require_auth=True) + + # pick an upstream collection at random that does not exist locally ... + sync_collection = None + base_url = 'https://galaxy.ansible.com' + next_url = base_url + '/api/v2/collections/' + while next_url: + rr = requests.get(next_url) + ds = rr.json() + for collection in ds['results']: + namespace = collection['namespace']['name'] + name = collection['name'] + check_url = f'/api/v3/plugin/ansible/content/community/collections/index/{namespace}/{name}/' + try: + api_client.request(check_url) + except GalaxyError: + sync_collection = (namespace, name) + break + + if sync_collection: + break + + if not ds['next_link']: + break + + next_url = base_url + ds['next_link'] + + assert sync_collection, "all upstream collections already exist on the system ... how?" + + # configure the remote + resp = api_client.request('/api/pulp/api/v3/remotes/ansible/collection/') + remotes = dict((x['name'], x) for x in resp['results']) + community_remote_config = { + 'name': 'community', + 'url': 'https://galaxy.ansible.com/', + 'sync_dependencies': False, + 'requirements_file': json.dumps({'collections': ['.'.join(list(sync_collection))]}), + } + remote_task = api_client.request(remotes['community']['pulp_href'], method='PATCH', args=community_remote_config) + wait_for_task(api_client, remote_task) + + # start the sync + resp = api_client.request('/api/pulp/api/v3/repositories/ansible/ansible/') + repos = dict((x['name'], x) for x in resp['results']) + sync_payload = {'mirror': False, 'optimize': False, 'remote': remotes['community']['pulp_href']} + sync_task = api_client.request(repos['community']['pulp_href'] + 'sync/', method='POST', args=sync_payload) + + # wait for the sync + wait_for_task(api_client, sync_task) + + # run the django command + pid = subprocess.run( + 'pulpcore-manager sync-collection-download-counts', + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + assert pid.returncode == 0 + + # check the counter in the api + check_url = f'/api/v3/plugin/ansible/content/community/collections/index/{sync_collection[0]}/{sync_collection[1]}/' + check_resp = api_client.request(check_url) + assert check_resp['download_count'] > 0, check_resp From 7ae38ebf26179c3c36cdb5784955d6e42cac2182 Mon Sep 17 00:00:00 2001 From: James Tanner Date: Wed, 9 Aug 2023 08:19:24 -0400 Subject: [PATCH 3/4] Lint cleanup. No-Issue Signed-off-by: James Tanner --- .../sync-collection-download-counts.py | 26 ++++++++++---- .../integration/cli/test_community_sync.py | 34 +++++++++++-------- 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/galaxy_ng/app/management/commands/sync-collection-download-counts.py b/galaxy_ng/app/management/commands/sync-collection-download-counts.py index 51901c2fe9..32e64afcdc 100644 --- a/galaxy_ng/app/management/commands/sync-collection-download-counts.py +++ b/galaxy_ng/app/management/commands/sync-collection-download-counts.py @@ -29,8 +29,10 @@ def add_arguments(self, parser): default=DEFAULT_UPSTREAM, help=f"remote host to retrieve data from [{DEFAULT_UPSTREAM}]" ) - parser.add_argument('--limit', type=int, help="stop syncing after N collections") + parser.add_argument( + '--force', action='store_true', help='sync all counts and ignore last update' + ) def handle(self, *args, **options): log.info(f"Processing upstream download counts from {options['upstream']}") @@ -55,14 +57,17 @@ def handle(self, *args, **options): if namespace in SKIPLIST: continue + # optimization: don't try to resync something that changed less than a day ago counter = CollectionDownloadCount.objects.filter(namespace=namespace, name=name).first() - if counter is not None: + if counter is not None and not args.force: delta = (now - counter.pulp_last_updated.replace(tzinfo=None)).total_seconds() - if (delta / 60) < ( 24 * 60 * 60 ): + if (delta / 60) < (24 * 60 * 60): continue - #import epdb; epdb.st() - detail_url = upstream + f'/api/internal/ui/repo-or-collection-detail/?namespace={namespace}&name={name}' + detail_url = ( + upstream + + f'/api/internal/ui/repo-or-collection-detail/?namespace={namespace}&name={name}' + ) log.info('\t' + detail_url) drr = requests.get(detail_url) ds = drr.json() @@ -80,12 +85,19 @@ def handle(self, *args, **options): if counter is None: log.info(f'\tcreate downloadcount for {namespace}.{name} with value of {dcount}') with transaction.atomic(): - counter = CollectionDownloadCount(namespace=namespace, name=name, download_count=dcount) + counter = CollectionDownloadCount( + namespace=namespace, + name=name, + download_count=dcount + ) counter.save() continue if counter.download_count < dcount: - log.info(f'\tupdate downloadcount for {namespace}.{name} from {counter.download_count} to {dcount}') + log.info( + f'\tupdate downloadcount for {namespace}.{name}' + + f' from {counter.download_count} to {dcount}' + ) with transaction.atomic(): counter.download_count = dcount continue diff --git a/galaxy_ng/tests/integration/cli/test_community_sync.py b/galaxy_ng/tests/integration/cli/test_community_sync.py index 4ca5c9bcd7..611566b69c 100644 --- a/galaxy_ng/tests/integration/cli/test_community_sync.py +++ b/galaxy_ng/tests/integration/cli/test_community_sync.py @@ -1,24 +1,14 @@ """test_community.py - Tests related to the community featureset. """ -import copy import json -import os -import tempfile import pytest import requests -import shutil import subprocess from ansible.galaxy.api import GalaxyError -from ..utils import ( - ansible_galaxy, - SocialGithubClient -) - -from ..utils.legacy import clean_all_roles, cleanup_social_user -from ..utils import get_client, wait_for_task +from ..utils import get_client, wait_for_task pytestmark = pytest.mark.qa # noqa: F821 @@ -43,7 +33,10 @@ def test_community_collection_download_count_sync(ansible_config): for collection in ds['results']: namespace = collection['namespace']['name'] name = collection['name'] - check_url = f'/api/v3/plugin/ansible/content/community/collections/index/{namespace}/{name}/' + check_url = ( + '/api/v3/plugin/ansible/content/community' + + f'/collections/index/{namespace}/{name}/' + ) try: api_client.request(check_url) except GalaxyError: @@ -69,14 +62,22 @@ def test_community_collection_download_count_sync(ansible_config): 'sync_dependencies': False, 'requirements_file': json.dumps({'collections': ['.'.join(list(sync_collection))]}), } - remote_task = api_client.request(remotes['community']['pulp_href'], method='PATCH', args=community_remote_config) + remote_task = api_client.request( + remotes['community']['pulp_href'], + method='PATCH', + args=community_remote_config + ) wait_for_task(api_client, remote_task) # start the sync resp = api_client.request('/api/pulp/api/v3/repositories/ansible/ansible/') repos = dict((x['name'], x) for x in resp['results']) sync_payload = {'mirror': False, 'optimize': False, 'remote': remotes['community']['pulp_href']} - sync_task = api_client.request(repos['community']['pulp_href'] + 'sync/', method='POST', args=sync_payload) + sync_task = api_client.request( + repos['community']['pulp_href'] + 'sync/', + method='POST', + args=sync_payload + ) # wait for the sync wait_for_task(api_client, sync_task) @@ -91,6 +92,9 @@ def test_community_collection_download_count_sync(ansible_config): assert pid.returncode == 0 # check the counter in the api - check_url = f'/api/v3/plugin/ansible/content/community/collections/index/{sync_collection[0]}/{sync_collection[1]}/' + check_url = ( + '/api/v3/plugin/ansible/content/community/collections/' + + f'index/{sync_collection[0]}/{sync_collection[1]}/' + ) check_resp = api_client.request(check_url) assert check_resp['download_count'] > 0, check_resp From d417724d8cc2e60bd4e3685f4b87038c16917f8f Mon Sep 17 00:00:00 2001 From: James Tanner Date: Wed, 9 Aug 2023 08:36:38 -0400 Subject: [PATCH 4/4] Whoops. No-Issue Signed-off-by: James Tanner --- .../app/management/commands/sync-collection-download-counts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy_ng/app/management/commands/sync-collection-download-counts.py b/galaxy_ng/app/management/commands/sync-collection-download-counts.py index 32e64afcdc..20b3593caf 100644 --- a/galaxy_ng/app/management/commands/sync-collection-download-counts.py +++ b/galaxy_ng/app/management/commands/sync-collection-download-counts.py @@ -59,7 +59,7 @@ def handle(self, *args, **options): # optimization: don't try to resync something that changed less than a day ago counter = CollectionDownloadCount.objects.filter(namespace=namespace, name=name).first() - if counter is not None and not args.force: + if counter is not None and not options['force']: delta = (now - counter.pulp_last_updated.replace(tzinfo=None)).total_seconds() if (delta / 60) < (24 * 60 * 60): continue