diff --git a/CHANGES/6244.feature b/CHANGES/6244.feature new file mode 100644 index 0000000000..dd4c7bb781 --- /dev/null +++ b/CHANGES/6244.feature @@ -0,0 +1 @@ +Added support to create and distribute checkpoint publications in Pulp. \ No newline at end of file diff --git a/CHANGES/plugin_api/6244.feature b/CHANGES/plugin_api/6244.feature new file mode 100644 index 0000000000..1fd7720d04 --- /dev/null +++ b/CHANGES/plugin_api/6244.feature @@ -0,0 +1,3 @@ +Added support to create and distribute checkpoint publications in Pulp. +Plugins can choose to enable this feature by exposing the checkpoint field in their inherited PublicationSerializer and DistributionSerializer. +Checkpoint publications and distributions can be created by passing checkpoint=True when creating them. \ No newline at end of file diff --git a/CHANGES/pulp_file/6244.feature b/CHANGES/pulp_file/6244.feature new file mode 100644 index 0000000000..baabc1eb2e --- /dev/null +++ b/CHANGES/pulp_file/6244.feature @@ -0,0 +1 @@ +Added support to create checkpoint file publications and distribute them through checkpoint file distributions. \ No newline at end of file diff --git a/docs/admin/reference/tech-preview.md b/docs/admin/reference/tech-preview.md index 8377ce5b2f..eb079041a7 100644 --- a/docs/admin/reference/tech-preview.md +++ b/docs/admin/reference/tech-preview.md @@ -5,3 +5,4 @@ The following features are currently being released as part of tech preview: - [Support for Open Telemetry](site:pulpcore/docs/admin/learn/architecture/#telemetry-support) - Upstream replicas - Domains - Multi-Tenancy +- [Checkpoint](site:pulpcore/docs/user/guides/checkpoint.md) \ No newline at end of file diff --git a/docs/dev/learn/subclassing/checkpoint.md b/docs/dev/learn/subclassing/checkpoint.md new file mode 100644 index 0000000000..d9f9f231da --- /dev/null +++ b/docs/dev/learn/subclassing/checkpoint.md @@ -0,0 +1,64 @@ +# Checkpoint + +!!! warning + This feature is provided as a tech preview and could change in backwards incompatible + ways in the future. + +Pulp's checkpoint feature offers a robust way to manage and access historical versions of +repositories. By integrating checkpoints into your plugins, you enable users to recreate +environments from specific points in time, which is invaluable for identifying when changes or +regressions were introduced. This feature supports reproducible deployments, helps track changes in +package behavior, and facilitates a structured update workflow. + +!!! warning + The checkpoint feature is only supported for plugins using publications. + +Plugin writers need to expose the `checkpoint` field on their distribution and publication +serializers to allow users to create checkpoint publications and create checkpoint distributions to +serve these publications. The `checkpoint` field is already present on the base distribution and +publication models, so no new migration is needed. + +Example: enabling the checkpoint feature in the pulp_file plugin. +```python +class FileDistributionSerializer(DistributionSerializer): + """ + Serializer for File Distributions. + """ + publication = DetailRelatedField( + required=False, + help_text=_("Publication to be served"), + view_name_pattern=r"publications(-.*/.*)?-detail", + queryset=models.Publication.objects.exclude(complete=False), + allow_null=True, + ) + checkpoint = serializers.BooleanField(default=False) + + class Meta: + fields = DistributionSerializer.Meta.fields + ("publication", "checkpoint") + model = FileDistribution +``` + +```python +class FilePublicationSerializer(PublicationSerializer): + """ + Serializer for File Publications. + """ + distributions = DetailRelatedField( + help_text=_("This publication is currently hosted as defined by these distributions."), + source="distribution_set", + view_name="filedistributions-detail", + many=True, + read_only=True, + ) + manifest = serializers.CharField( + help_text=_("Filename to use for manifest file containing metadata for all the files."), + default="PULP_MANIFEST", + required=False, + allow_null=True, + ) + checkpoint = serializers.BooleanField(default=False) + + class Meta: + model = FilePublication + fields = PublicationSerializer.Meta.fields + ("distributions", "manifest", "checkpoint") +``` \ No newline at end of file diff --git a/docs/user/guides/checkpoint.md b/docs/user/guides/checkpoint.md new file mode 100644 index 0000000000..82b87a47cb --- /dev/null +++ b/docs/user/guides/checkpoint.md @@ -0,0 +1,119 @@ +# Create and Distribute Checkpoints + +!!! warning + This feature requires plugin support to work correctly. + +!!! warning + This feature is provided as a tech preview and could change in backwards incompatible + ways in the future. + +## Overview + +Checkpoints in Pulp provide a way to access and manage historical versions of repositories. This +feature allows users to view and install packages as they existed at specific points in time. By +using checkpoints, you can recreate environments from any given date/time, which is particularly +useful for tracking down when changes or regressions were introduced. + +Checkpoints support reproducible deployments, help identify changes in package behavior over time, +and facilitate a structured update workflow. This ensures that a validated environment can be +consistently replicated across different stages of development and production. + +For a similar concept, you can refer to [Debian's snapshot archive](https://snapshot.debian.org/), +which offers access to old snapshots of the repositories based on timestamps. + +## Enabling Checkpoints + +Checkpoint is a plugin-dependent feature. It needs to be enabled in a plugin before you can start +using it. + +## Creating Checkpoints + +The first step to start using checkpoint, is to create a checkpoint distribution which will be used +to distribute checkpoint publications. A checkpoint distribution serves all the checkpoint +publications of the related repository. + +```bash +pulp file distribution create \ + --name \ + --repository \ + --base-path \ + --checkpoint +``` + +The next step is to create checkpoint publications. Only publications marked as checkpoint will be +served from the checkpoint distribution. Checkpoint publications can only be created using the +repository's latest version. Repository versions of the distributed checkpoint publications will be +protected from the `retain_repo_versions` cleanup. + +```bash +pulp file publication create \ + --repository \ + --checkpoint +``` + +## Accessing Checkpoints + +### Listing All Checkpoints +You can access a listing of all the available repository's checkpoint publications by accessing the +base path of any of the repository's checkpoint distributions. + +```bash +http :24816/pulp/content/checkpoint/myfile +``` + +```html + +Index of checkpoint/myfile/ + +

Index of checkpoint/myfile/

+
../
+20250130T203000Z/                                  30-Jan-2025 20:30
+20250130T205000Z/                                  30-Jan-2025 20:50
+

+ +``` + +### Accessing a Specific Checkpoint +To access a specific checkpoint, suffix the checkpoint distribution's path with a timestamp in the format +`yyyyMMddTHHmmssZ` (e.g. 20250130T205339Z), If a checkpoint was created at this time, it will be +served. Otherwise, you will be redirected to the latest checkpoint created before this timestamp. +Trying to access a checkpoint using a timestamp in the future or before the first checkpoint's +timestamp, will result in a 404 response. + +Assuming the checkpoints from the above example, the below table show responses for sample requests + + + + + + + + + + + + + + + + + + + + + + + + + +
Request pathResponse
checkpoint/myfile/20250130T203000Z/200
checkpoint/myfile/20250130T204000Z/ + 301
+ Location: checkpoint/myfile/20250130T203000Z/ +
checkpoint/myfile/20250130T206000Z/ + 301
+ Location: checkpoint/myfile/20250130T205000Z/ +
checkpoint/myfile/20250130T202000Z/ + 404 +
checkpoint/myfile/29250130T203000Z/ + 404 +
diff --git a/docs/user/guides/update-repo-retention.md b/docs/user/guides/update-repo-retention.md index 27a3255a21..67130ead2d 100644 --- a/docs/user/guides/update-repo-retention.md +++ b/docs/user/guides/update-repo-retention.md @@ -15,7 +15,7 @@ Setting retain_repo_versions to 1 effectively disables repository versioning sin store the latest version. Cleanup will ignore any repo versions that are being served directly via a distribution or via a -publication. +publication. This includes repo versions of distributed checkpoint publications. To update this field for a file Repository called myrepo, simply call: diff --git a/pulp_file/app/serializers.py b/pulp_file/app/serializers.py index 8865c7b8fc..e29d93cc1b 100644 --- a/pulp_file/app/serializers.py +++ b/pulp_file/app/serializers.py @@ -115,10 +115,11 @@ class FilePublicationSerializer(PublicationSerializer): required=False, allow_null=True, ) + checkpoint = serializers.BooleanField(default=False) class Meta: model = FilePublication - fields = PublicationSerializer.Meta.fields + ("distributions", "manifest") + fields = PublicationSerializer.Meta.fields + ("distributions", "manifest", "checkpoint") class FileDistributionSerializer(DistributionSerializer): @@ -133,9 +134,10 @@ class FileDistributionSerializer(DistributionSerializer): queryset=models.Publication.objects.exclude(complete=False), allow_null=True, ) + checkpoint = serializers.BooleanField(default=False) class Meta: - fields = DistributionSerializer.Meta.fields + ("publication",) + fields = DistributionSerializer.Meta.fields + ("publication", "checkpoint") model = FileDistribution diff --git a/pulp_file/app/tasks/publishing.py b/pulp_file/app/tasks/publishing.py index 36e86b71a4..3893fe580a 100644 --- a/pulp_file/app/tasks/publishing.py +++ b/pulp_file/app/tasks/publishing.py @@ -19,7 +19,7 @@ log = logging.getLogger(__name__) -def publish(manifest, repository_version_pk): +def publish(manifest, repository_version_pk, checkpoint=False): """ Create a Publication based on a RepositoryVersion. @@ -37,7 +37,9 @@ def publish(manifest, repository_version_pk): ) with tempfile.TemporaryDirectory(dir="."): - with FilePublication.create(repo_version, pass_through=True) as publication: + with FilePublication.create( + repo_version, pass_through=True, checkpoint=checkpoint + ) as publication: publication.manifest = manifest if manifest: manifest = Manifest(manifest) diff --git a/pulp_file/app/viewsets.py b/pulp_file/app/viewsets.py index b2b80cac68..4ba5a3d935 100644 --- a/pulp_file/app/viewsets.py +++ b/pulp_file/app/viewsets.py @@ -433,11 +433,16 @@ def create(self, request): serializer.is_valid(raise_exception=True) repository_version = serializer.validated_data.get("repository_version") manifest = serializer.validated_data.get("manifest") + checkpoint = serializer.validated_data.get("checkpoint") result = dispatch( tasks.publish, shared_resources=[repository_version.repository], - kwargs={"repository_version_pk": str(repository_version.pk), "manifest": manifest}, + kwargs={ + "repository_version_pk": str(repository_version.pk), + "manifest": manifest, + "checkpoint": checkpoint, + }, ) return OperationPostponedResponse(result, request) diff --git a/pulpcore/app/migrations/0128_distribution_checkpoint_publication_checkpoint.py b/pulpcore/app/migrations/0128_distribution_checkpoint_publication_checkpoint.py new file mode 100644 index 0000000000..a24518f79f --- /dev/null +++ b/pulpcore/app/migrations/0128_distribution_checkpoint_publication_checkpoint.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.18 on 2025-01-30 19:14 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("core", "0127_remove_upstreampulp_pulp_label_select"), + ] + + operations = [ + migrations.AddField( + model_name="distribution", + name="checkpoint", + field=models.BooleanField(default=False), + ), + migrations.AddField( + model_name="publication", + name="checkpoint", + field=models.BooleanField(default=False, editable=False), + ), + ] diff --git a/pulpcore/app/models/publication.py b/pulpcore/app/models/publication.py index c19d5cf032..c8efc04534 100644 --- a/pulpcore/app/models/publication.py +++ b/pulpcore/app/models/publication.py @@ -73,6 +73,7 @@ class Publication(MasterModel): pass_through (models.BooleanField): Indicates that the publication is a pass-through to the repository version. Enabling pass-through has the same effect as creating a PublishedArtifact for all of the content (artifacts) in the repository. + checkpoint (models.BooleanField): Indicates a checkpoint publication. Relations: repository_version (models.ForeignKey): The RepositoryVersion used to @@ -98,12 +99,13 @@ class Publication(MasterModel): complete = models.BooleanField(db_index=True, default=False) pass_through = models.BooleanField(default=False) + checkpoint = models.BooleanField(default=False, editable=False) repository_version = models.ForeignKey("RepositoryVersion", on_delete=models.CASCADE) pulp_domain = models.ForeignKey("Domain", default=get_domain_pk, on_delete=models.PROTECT) @classmethod - def create(cls, repository_version, pass_through=False): + def create(cls, repository_version, pass_through=False, checkpoint=False): """ Create a publication. @@ -125,7 +127,11 @@ def create(cls, repository_version, pass_through=False): Adds a Task.created_resource for the publication. """ with transaction.atomic(): - publication = cls(pass_through=pass_through, repository_version=repository_version) + publication = cls( + pass_through=pass_through, + repository_version=repository_version, + checkpoint=checkpoint, + ) publication.save() resource = CreatedResource(content_object=publication) resource.save() @@ -159,6 +165,10 @@ def delete(self, **kwargs): # It's possible for errors to occur before any publication has been completed, # so we need to handle the case when no Publication exists. try: + if self.checkpoint: + base_paths |= Distribution.objects.filter( + checkpoint=self.checkpoint, repository=self.repository_version.repository + ).values_list("base_path", flat=True) versions = self.repository.versions.all() pubs = Publication.objects.filter(repository_version__in=versions, complete=True) publication = pubs.latest("repository_version", "pulp_created") @@ -629,6 +639,7 @@ class Distribution(MasterModel): pulp_labels (HStoreField): Dictionary of string values. base_path (models.TextField): The base (relative) path component of the published url. hidden (models.BooleanField): Whether this distribution should be hidden in the content app. + checkpoint (models.BooleanField): Whether this distribution serves checkpoint publications. Relations: content_guard (models.ForeignKey): An optional content-guard. @@ -649,6 +660,7 @@ class Distribution(MasterModel): base_path = models.TextField() pulp_domain = models.ForeignKey("Domain", default=get_domain_pk, on_delete=models.PROTECT) hidden = models.BooleanField(default=False, null=True) + checkpoint = models.BooleanField(default=False) content_guard = models.ForeignKey(ContentGuard, null=True, on_delete=models.SET_NULL) publication = models.ForeignKey(Publication, null=True, on_delete=models.SET_NULL) @@ -706,6 +718,7 @@ def content_headers_for(self, path): "remote", "repository", "repository_version", + "checkpoint", ], has_changed=True, ) diff --git a/pulpcore/app/models/repository.py b/pulpcore/app/models/repository.py index 35a06809b9..ecb78538a2 100644 --- a/pulpcore/app/models/repository.py +++ b/pulpcore/app/models/repository.py @@ -328,7 +328,15 @@ def protected_versions(self): publication__pk__in=Distribution.objects.values_list("publication_id") ) - if distro := Distribution.objects.filter(repository=self.pk).first(): + # Protect repo versions of distributed checkpoint publications. + if Distribution.objects.filter(repository=self.pk, checkpoint=True).exists(): + qs |= self.versions.filter( + publication__pk__in=Publication.objects.filter(checkpoint=True).values_list( + "pulp_id" + ) + ) + + if distro := Distribution.objects.filter(repository=self.pk, checkpoint=False).first(): if distro.detail_model().SERVE_FROM_PUBLICATION: # if the distro serves publications, protect the latest published repo version version = self.versions.filter( diff --git a/pulpcore/app/serializers/publication.py b/pulpcore/app/serializers/publication.py index 738e969181..52e68b41df 100644 --- a/pulpcore/app/serializers/publication.py +++ b/pulpcore/app/serializers/publication.py @@ -36,11 +36,18 @@ def validate(self, data): repository = data.pop("repository", None) # not an actual field on publication repository_version = data.get("repository_version") + checkpoint = data.get("checkpoint", None) if not repository and not repository_version: raise serializers.ValidationError( _("Either the 'repository' or 'repository_version' need to be specified") ) elif not repository and repository_version: + if checkpoint: + latest_version = repository_version.repository.latest_version() + if latest_version and latest_version != repository_version: + raise serializers.ValidationError( + _("Checkpoint can only be created for the repository's latest version") + ) return data elif repository and not repository_version: version = repository.latest_version() @@ -294,6 +301,8 @@ def validate(self, data): "publication", (self.partial and self.instance.publication) or None ) + checkpoint = data.get("checkpoint", (self.partial and self.instance.checkpoint) or None) + if publication_provided and repository_version_provided: raise serializers.ValidationError( _( @@ -316,6 +325,12 @@ def validate(self, data): "may be used simultaneously." ) ) + elif checkpoint and ( + not repository_provided or publication_provided or repository_version_provided + ): + raise serializers.ValidationError( + _("The 'checkpoint' attribute may only be used with the 'repository' attribute.") + ) return data diff --git a/pulpcore/app/viewsets/publication.py b/pulpcore/app/viewsets/publication.py index ee17776d54..2a811ec702 100644 --- a/pulpcore/app/viewsets/publication.py +++ b/pulpcore/app/viewsets/publication.py @@ -70,6 +70,7 @@ class Meta: model = Publication fields = { "pulp_created": DATETIME_FILTER_OPTIONS, + "checkpoint": ["exact"], } @@ -497,6 +498,7 @@ class Meta: "name": NAME_FILTER_OPTIONS, "base_path": ["exact", "contains", "icontains", "in"], "repository": ["exact", "in"], + "checkpoint": ["exact"], } diff --git a/pulpcore/content/handler.py b/pulpcore/content/handler.py index e043b0b1c1..0b28ebdd23 100644 --- a/pulpcore/content/handler.py +++ b/pulpcore/content/handler.py @@ -6,7 +6,7 @@ import socket import struct from gettext import gettext as _ -from datetime import timedelta +from datetime import datetime, timedelta from aiohttp.client_exceptions import ClientResponseError, ClientConnectionError from aiohttp.web import FileResponse, StreamResponse, HTTPOk @@ -118,6 +118,28 @@ def __init__(self, path, distros): super().__init__(body=html, headers={"Content-Type": "text/html"}) +class CheckpointListings(HTTPOk): + """ + Response for browsing through the checkpoints of a specific checkpoint distro. + + This is returned when visiting the base path of a checkpoint distro. + """ + + def __init__(self, path, repo): + """Create the HTML response.""" + + checkpoints = ( + Publication.objects.filter(repository_version__repository=repo, checkpoint=True) + .order_by("pulp_created") + .values_list("pulp_created", flat=True) + .distinct() + ) + dates = {f"{Handler._format_checkpoint_timestamp(s)}/": s for s in checkpoints} + directory_list = dates.keys() + html = Handler.render_html(directory_list, dates=dates, path=path) + super().__init__(body=html, headers={"Content-Type": "text/html"}) + + class ArtifactNotFound(Exception): """ The artifact associated with a published-artifact does not exist. @@ -164,6 +186,7 @@ class Handler: ] distribution_model = None + checkpoint_ts_format = "%Y%m%dT%H%M%SZ" @staticmethod def _reset_db_connection(): @@ -312,7 +335,7 @@ def _match_distribution(cls, path, add_trailing_slash=True): distro_model = cls.distribution_model or Distribution domain = get_domain() try: - return ( + distro_object = ( distro_model.objects.filter(pulp_domain=domain) .select_related( "repository", @@ -326,6 +349,10 @@ def _match_distribution(cls, path, add_trailing_slash=True): .get(base_path__in=base_paths) .cast() ) + + if distro_object.checkpoint: + return cls._handle_checkpoint_distribution(distro_object, original_path) + return distro_object except ObjectDoesNotExist: if path.rstrip("/") in base_paths: distros = distro_model.objects.filter( @@ -336,12 +363,7 @@ def _match_distribution(cls, path, add_trailing_slash=True): raise DistroListings(path=path, distros=distros) else: # The list of a subset of distributions was requested without a trailing / - if settings.DOMAIN_ENABLED: - raise HTTPMovedPermanently( - f"{settings.CONTENT_PATH_PREFIX}{domain.name}/{path}" - ) - else: - raise HTTPMovedPermanently(f"{settings.CONTENT_PATH_PREFIX}{path}") + Handler._redirect_sub_path(path) log.debug( _("Distribution not matched for {path} using: {base_paths}").format( @@ -351,6 +373,125 @@ def _match_distribution(cls, path, add_trailing_slash=True): raise PathNotResolved(original_path) + @classmethod + def _handle_checkpoint_distribution(cls, distro, original_path): + """ + Handle a checkpoint distribution. + + Args: + distro (Distribution): The checkpoint distribution. + original_path (str): The original path component of the URL. + + Returns: + The detail object of the matched distribution. + + Raises: + PathNotResolved: when the path is invalid. + CheckpointListings: when the path is the base path of a checkpoint distribution. + """ + # Determine whether it's a listing or a specific checkpoint + if original_path == f"{distro.base_path}": + Handler._redirect_sub_path(f"{original_path}/") + elif original_path == f"{distro.base_path}/": + raise CheckpointListings(path=original_path, repo=distro.repository) + else: + base_path = distro.base_path + request_timestamp = Handler._extract_checkpoint_timestamp(base_path, original_path) + + # Find the latest checkpoint publication before or at the timestamp + checkpoint_publication = ( + Publication.objects.filter( + pulp_created__lte=request_timestamp, + repository_version__repository=distro.repository, + checkpoint=True, + ) + .order_by("-pulp_created") + .first() + ) + + if not checkpoint_publication: + raise PathNotResolved(original_path) + + pub_timestamp_str = Handler._format_checkpoint_timestamp( + checkpoint_publication.pulp_created + ) + request_timestamp_str = Handler._format_checkpoint_timestamp(request_timestamp) + if pub_timestamp_str != request_timestamp_str: + Handler._redirect_sub_path(f"{base_path}/{pub_timestamp_str}/") + + distro.base_path = f"{base_path}/{request_timestamp_str}" + distro.repository = None + distro.publication = checkpoint_publication + return distro + + @staticmethod + def _extract_checkpoint_timestamp(base_path, original_path): + """ + Validate the path and extract the timestamp from it. + + Args: + base_path (str): The base path of the distribution. + original_path (str): The path component of the URL. + + Returns: + The checkpoint timestamp in the request URL. + + Raises: + PathNotResolved: when the path is invalid. + """ + pattern = rf"^{re.escape(base_path)}/(\d{{8}}T\d{{6}}Z)(/.*)?$" + re.compile(pattern) + match = re.search(pattern, original_path) + if match: + request_timestamp_str = match.group(1) + try: + request_timestamp = datetime.strptime( + request_timestamp_str, Handler.checkpoint_ts_format + ) + except ValueError: + raise PathNotResolved(original_path) + else: + raise PathNotResolved(original_path) + + # The timestamp is truncated to seconds, so we need to cover the whole second + request_timestamp = request_timestamp.replace(microsecond=999999).replace( + tzinfo=timezone.utc + ) + # Future timestamps are not allowed for checkpoints + if request_timestamp > datetime.now(tz=timezone.utc): + raise PathNotResolved(original_path) + + return request_timestamp + + @staticmethod + def _format_checkpoint_timestamp(timestamp): + """ + Format a timestamp to the checkpoint format. + + Args: + timestamp (datetime): The timestamp to format. + + Returns: + The formatted timestamp using the checkpoint_ts_format. + """ + return datetime.strftime(timestamp, Handler.checkpoint_ts_format) + + @staticmethod + def _redirect_sub_path(path): + """ + Redirect to the correct path based on whether domain is enabled. + + Args: + path (str): The path component after the path prefix. + + Raises: + HTTPMovedPermanently: to the correct path. + """ + if settings.DOMAIN_ENABLED: + raise HTTPMovedPermanently(f"{settings.CONTENT_PATH_PREFIX}{get_domain().name}/{path}") + else: + raise HTTPMovedPermanently(f"{settings.CONTENT_PATH_PREFIX}{path}") + @staticmethod def _permit(request, distribution): """ diff --git a/pulpcore/tests/functional/api/using_plugin/test_content_access.py b/pulpcore/tests/functional/api/using_plugin/test_content_access.py index ac068fe1d9..66f611e56c 100644 --- a/pulpcore/tests/functional/api/using_plugin/test_content_access.py +++ b/pulpcore/tests/functional/api/using_plugin/test_content_access.py @@ -1,5 +1,10 @@ """Tests related to content delivery.""" +from datetime import datetime, timedelta +import re +from time import sleep +from urllib.parse import urlparse +from aiohttp import ClientResponseError import pytest import uuid @@ -10,6 +15,7 @@ from pulpcore.tests.functional.utils import ( download_file, ) +from pulpcore.content.handler import Handler @pytest.mark.parallel @@ -69,3 +75,91 @@ def test_upload_file_on_demand_already( content = file_bindings.ContentFilesApi.read(content.pulp_href) assert content.artifact is not None + + +@pytest.mark.parallel +def test_checkpoint( + file_repository_factory, + file_distribution_factory, + file_content_unit_with_name_factory, + file_bindings, + gen_object_with_cleanup, + monitor_task, + http_get, +): + """Test checkpoint.""" + + def create_publication(repo, checkpoint): + content = file_content_unit_with_name_factory(str(uuid.uuid4())) + task = file_bindings.RepositoriesFileApi.modify( + repo.pulp_href, {"add_content_units": [content.pulp_href]} + ).task + monitor_task(task) + repo = file_bindings.RepositoriesFileApi.read(repo.pulp_href) + pub = gen_object_with_cleanup( + file_bindings.PublicationsFileApi, + {"repository_version": repo.latest_version_href, "checkpoint": checkpoint}, + ) + sleep(1) + return pub + + # setup + repo = file_repository_factory() + distribution = file_distribution_factory(repository=repo.pulp_href, checkpoint=True) + + pub_0 = create_publication(repo, False) + pub_1 = create_publication(repo, True) + pub_2 = create_publication(repo, False) + pub_3 = create_publication(repo, True) + pub_4 = create_publication(repo, False) + + # checkpoints listing + response = http_get(distribution.base_url).decode("utf-8") + checkpoints_ts = set(re.findall(r"\d{8}T\d{6}Z", response)) + assert len(checkpoints_ts) == 2 + assert Handler._format_checkpoint_timestamp(pub_1.pulp_created) in checkpoints_ts + assert Handler._format_checkpoint_timestamp(pub_3.pulp_created) in checkpoints_ts + + # exact ts + pub_1_url = ( + f"{distribution.base_url}{Handler._format_checkpoint_timestamp(pub_1.pulp_created)}/" + ) + response = http_get(pub_1_url).decode("utf-8") + assert f"

Index of {urlparse(pub_1_url).path}

" in response + + # invalid ts + with pytest.raises(ClientResponseError) as exc: + response = http_get(f"{distribution.base_url}invalid_ts/") + assert exc.value.status == 404 + + # arbitrary ts + pub_2_url = ( + f"{distribution.base_url}{Handler._format_checkpoint_timestamp(pub_2.pulp_created)}/" + ) + response = http_get(pub_2_url).decode("utf-8") + assert f"

Index of {urlparse(pub_1_url).path}

" in response + + # another arbitrary ts + pub_3_url = ( + f"{distribution.base_url}{Handler._format_checkpoint_timestamp(pub_3.pulp_created)}/" + ) + pub_4_url = ( + f"{distribution.base_url}{Handler._format_checkpoint_timestamp(pub_4.pulp_created)}/" + ) + response = http_get(pub_4_url).decode("utf-8") + assert f"

Index of {urlparse(pub_3_url).path}

" in response + + # before first checkpoint ts + pub_0_url = ( + f"{distribution.base_url}{Handler._format_checkpoint_timestamp(pub_0.pulp_created)}/" + ) + with pytest.raises(ClientResponseError) as exc: + http_get(pub_0_url).decode("utf-8") + assert exc.value.status == 404 + + # future ts + ts = datetime.now() + timedelta(days=1) + url = f"{distribution.base_url}{Handler._format_checkpoint_timestamp(ts)}/" + with pytest.raises(ClientResponseError) as exc: + http_get(url).decode("utf-8") + assert exc.value.status == 404 diff --git a/pulpcore/tests/functional/api/using_plugin/test_repo_versions.py b/pulpcore/tests/functional/api/using_plugin/test_repo_versions.py index 4fef29f29f..b8f261dfa6 100644 --- a/pulpcore/tests/functional/api/using_plugin/test_repo_versions.py +++ b/pulpcore/tests/functional/api/using_plugin/test_repo_versions.py @@ -1,5 +1,6 @@ """Tests related to repository versions.""" +import uuid import pytest from random import choice from tempfile import NamedTemporaryFile @@ -823,7 +824,7 @@ def test_repo_version_retention( @pytest.mark.parallel def test_repo_versions_protected_from_cleanup( file_bindings, - file_repository_content, + file_content_unit_with_name_factory, file_repository_factory, file_distribution_factory, gen_object_with_cleanup, @@ -831,7 +832,8 @@ def test_repo_versions_protected_from_cleanup( ): """Test that distributed repo versions are protected from retain_repo_versions.""" - def _modify_and_validate(repo, content, expected_version, expected_total): + def _modify_and_validate(repo, expected_version, expected_total): + content = file_content_unit_with_name_factory(str(uuid.uuid4())) task = file_bindings.RepositoriesFileApi.modify( repo.pulp_href, {"add_content_units": [content.pulp_href]} ).task @@ -846,7 +848,6 @@ def _modify_and_validate(repo, content, expected_version, expected_total): return repo # Setup - contents = file_repository_content repo = file_repository_factory(retain_repo_versions=1) # Publish and distribute version 0 @@ -856,7 +857,7 @@ def _modify_and_validate(repo, content, expected_version, expected_total): file_distribution_factory(publication=publication.pulp_href) # Version 0 is protected since it's distributed - repo = _modify_and_validate(repo, contents.results[0], "1", 2) + repo = _modify_and_validate(repo, "1", 2) # Create a new publication and distribution which protects version 1 from deletion file_distribution_factory(repository=repo.pulp_href) @@ -866,10 +867,32 @@ def _modify_and_validate(repo, content, expected_version, expected_total): file_distribution_factory(publication=publication.pulp_href) # Create version 2 and there should be 3 versions now (2 protected) - repo = _modify_and_validate(repo, contents.results[1], "2", 3) + repo = _modify_and_validate(repo, "2", 3) # Version 2 will be removed since we're creating version 3 and it's not protected - _modify_and_validate(repo, contents.results[2], "3", 3) + repo = _modify_and_validate(repo, "3", 3) + + # Publish version 3 as a checkpoint and ditribute it + gen_object_with_cleanup( + file_bindings.PublicationsFileApi, + {"repository_version": repo.latest_version_href, "checkpoint": True}, + ) + file_distribution_factory(repository=repo.pulp_href, checkpoint=True) + + # Version 3 is protected since it's ditributed by the checkpoint distribution + repo = _modify_and_validate(repo, "4", 4) + + # Publish version 4 as a checkpoint (it's already distributed) + gen_object_with_cleanup( + file_bindings.PublicationsFileApi, + {"repository_version": repo.latest_version_href, "checkpoint": True}, + ) + + # Version 4 is protected since it's ditributed by the checkpoint distribution + repo = _modify_and_validate(repo, "5", 5) + + # Version 5 will be removed since it's not protected and we're creating version 6 + _modify_and_validate(repo, "6", 5) @pytest.mark.parallel diff --git a/pulpcore/tests/unit/content/test_handler.py b/pulpcore/tests/unit/content/test_handler.py index 2dda5666d0..12ae7e1162 100644 --- a/pulpcore/tests/unit/content/test_handler.py +++ b/pulpcore/tests/unit/content/test_handler.py @@ -1,9 +1,11 @@ +from datetime import timedelta import pytest import uuid from unittest.mock import Mock, AsyncMock -from pulpcore.content import Handler +from aiohttp.web_exceptions import HTTPMovedPermanently +from pulpcore.content.handler import Handler, CheckpointListings, PathNotResolved from pulpcore.plugin.models import ( Artifact, Content, @@ -11,6 +13,9 @@ Distribution, Remote, RemoteArtifact, + Repository, + RepositoryVersion, + Publication, ) @@ -56,6 +61,61 @@ def ra2(ca2): return Mock(content_artifact=ca2) +@pytest.fixture +def repo(): + return Repository.objects.create(name=str(uuid.uuid4())) + + +@pytest.fixture +def repo_version_1(repo): + return RepositoryVersion.objects.create(repository=repo, number=1) + + +@pytest.fixture +def repo_version_2(repo): + return RepositoryVersion.objects.create(repository=repo, number=2) + + +@pytest.fixture +def repo_version_3(repo): + return RepositoryVersion.objects.create(repository=repo, number=3) + + +@pytest.fixture +def checkpoint_distribution(repo): + return Distribution.objects.create( + name=str(uuid.uuid4()), base_path=str(uuid.uuid4()), repository=repo, checkpoint=True + ) + + +@pytest.fixture +def checkpoint_publication_1(repo_version_1): + publication = Publication.objects.create(repository_version=repo_version_1, checkpoint=True) + # Avoid creating publications in the future, which would cause a 404 + publication.pulp_created = publication.pulp_created - timedelta(seconds=6) + publication.save() + + return publication + + +@pytest.fixture +def noncheckpoint_publication(repo_version_2, checkpoint_publication_1): + publication = Publication.objects.create(repository_version=repo_version_2, checkpoint=False) + publication.pulp_created = checkpoint_publication_1.pulp_created + timedelta(seconds=2) + publication.save() + + return publication + + +@pytest.fixture +def checkpoint_publication_2(repo_version_3, noncheckpoint_publication): + publication = Publication.objects.create(repository_version=repo_version_3, checkpoint=True) + publication.pulp_created = noncheckpoint_publication.pulp_created + timedelta(seconds=2) + publication.save() + + return publication + + def test_save_artifact(c1, ra1, download_result_mock): """Artifact needs to be created.""" handler = Handler() @@ -267,3 +327,138 @@ def content_init(art, path): artifacts = set(ca.content._artifacts.all()) assert len(artifacts) == 2 assert {artifact, artifact123} == artifacts + + +@pytest.mark.django_db +def test_handle_checkpoint_listing( + monkeypatch, + checkpoint_distribution, + checkpoint_publication_1, + noncheckpoint_publication, + checkpoint_publication_2, +): + """Checkpoint listing is generated correctly.""" + # Extract the pulp_created timestamps + checkpoint_pub_1_ts = Handler._format_checkpoint_timestamp( + checkpoint_publication_1.pulp_created + ) + noncheckpoint_pub_ts = Handler._format_checkpoint_timestamp( + noncheckpoint_publication.pulp_created + ) + checkpoint_pub_2_ts = Handler._format_checkpoint_timestamp( + checkpoint_publication_2.pulp_created + ) + + # Mock the render_html function to capture the checkpoint list + original_render_html = Handler.render_html + checkpoint_list = None + + def mock_render_html(directory_list, dates=None, path=None): + nonlocal checkpoint_list + html = original_render_html(directory_list, dates=dates, path=path) + checkpoint_list = directory_list + return html + + render_html_mock = Mock(side_effect=mock_render_html) + monkeypatch.setattr(Handler, "render_html", render_html_mock) + + with pytest.raises(CheckpointListings): + Handler._handle_checkpoint_distribution( + checkpoint_distribution, + f"{checkpoint_distribution.base_path}/", + ) + assert len(checkpoint_list) == 2 + assert ( + f"{checkpoint_pub_1_ts}/" in checkpoint_list + ), f"{checkpoint_pub_1_ts} not found in error body" + assert ( + f"{checkpoint_pub_2_ts}/" in checkpoint_list + ), f"{checkpoint_pub_2_ts} not found in error body" + assert ( + f"{noncheckpoint_pub_ts}/" not in checkpoint_list + ), f"{noncheckpoint_pub_ts} found in error body" + + +@pytest.mark.django_db +def test_handle_checkpoint_exact_ts( + checkpoint_distribution, + checkpoint_publication_1, + noncheckpoint_publication, + checkpoint_publication_2, +): + """Checkpoint is correctly served when using exact timestamp.""" + checkpoint_pub_2_ts = Handler._format_checkpoint_timestamp( + checkpoint_publication_2.pulp_created + ) + distro_object = Handler._handle_checkpoint_distribution( + checkpoint_distribution, + f"{checkpoint_distribution.base_path}/{checkpoint_pub_2_ts}/", + ) + + assert distro_object is not None + assert distro_object.publication == checkpoint_publication_2 + + +@pytest.mark.django_db +def test_handle_checkpoint_invalid_ts( + checkpoint_distribution, + checkpoint_publication_1, +): + """Invalid checkpoint timestamp raises PathNotResolved.""" + with pytest.raises(PathNotResolved): + Handler._handle_checkpoint_distribution( + checkpoint_distribution, + f"{checkpoint_distribution.base_path}/99990115T181699Z/", + ) + + with pytest.raises(PathNotResolved): + Handler._handle_checkpoint_distribution( + checkpoint_distribution, + f"{checkpoint_distribution.base_path}/invalid_ts/", + ) + + +@pytest.mark.django_db +def test_handle_checkpoint_arbitrary_ts( + checkpoint_distribution, + checkpoint_publication_1, + noncheckpoint_publication, + checkpoint_publication_2, +): + """Checkpoint is correctly served when using an arbitrary timestamp.""" + request_ts = Handler._format_checkpoint_timestamp( + checkpoint_publication_1.pulp_created + timedelta(seconds=3) + ) + with pytest.raises(HTTPMovedPermanently) as excinfo: + Handler._handle_checkpoint_distribution( + checkpoint_distribution, + f"{checkpoint_distribution.base_path}/{request_ts}/", + ) + redirect_location = excinfo.value.location + + with pytest.raises(HTTPMovedPermanently) as excinfo: + Handler._redirect_sub_path( + f"{checkpoint_distribution.base_path}" + f"/{Handler._format_checkpoint_timestamp(checkpoint_publication_1.pulp_created)}/" + ) + expected_location = excinfo.value.location + + assert ( + redirect_location == expected_location + ), f"Unexpected redirect location: {redirect_location}" + + +@pytest.mark.django_db +def test_handle_checkpoint_before_first_ts( + checkpoint_distribution, + checkpoint_publication_1, +): + """Checkpoint timestamp before the first checkpoint raises PathNotResolved..""" + request_ts = Handler._format_checkpoint_timestamp( + checkpoint_publication_1.pulp_created - timedelta(seconds=1) + ) + with pytest.raises(PathNotResolved): + Handler._handle_checkpoint_distribution( + checkpoint_distribution, + f"{checkpoint_distribution.base_path}/{request_ts}/", + ) diff --git a/pulpcore/tests/unit/serializers/test_repository.py b/pulpcore/tests/unit/serializers/test_repository.py index ce5184f0e3..f5dc349db3 100644 --- a/pulpcore/tests/unit/serializers/test_repository.py +++ b/pulpcore/tests/unit/serializers/test_repository.py @@ -7,6 +7,7 @@ from pulpcore.app import models from pulpcore.app.serializers import ( PublicationSerializer, + DistributionSerializer, RemoteSerializer, ) @@ -120,3 +121,26 @@ def test_validate_repository_version_only_unknown_field(): serializer = PublicationSerializer(data=data) with pytest.raises(serializers.ValidationError): serializer.validate(data) + + +def test_validate_checkpoint_and_repository(): + mock_repository = Mock() + mock_version = Mock() + mock_publication = Mock() + + data = {"checkpoint": False, "repository": mock_repository} + serializer = DistributionSerializer() + serializer.validate(data) + + data["checkpoint"] = True + serializer.validate(data) + + data.pop("repository") + data["repository_version"] = mock_version + with pytest.raises(serializers.ValidationError): + serializer.validate(data) + + data.pop("repository_version") + data["publication"] = mock_publication + with pytest.raises(serializers.ValidationError): + serializer.validate(data)