diff --git a/content_sync/constants.py b/content_sync/constants.py index a7e709eb2..d29639e2f 100644 --- a/content_sync/constants.py +++ b/content_sync/constants.py @@ -19,3 +19,8 @@ DEV_DRAFT_URL = "http://10.1.0.102:8044" DEV_LIVE_URL = "http://10.1.0.102:8045" DEV_TEST_URL = "http://10.1.0.102:8046" + + +# Publish Date Constants +PUBLISH_DATE_LIVE = "publish_date" +PUBLISH_DATE_DRAFT = "draft_publish_date" diff --git a/content_sync/pipelines/definitions/concourse/common/identifiers.py b/content_sync/pipelines/definitions/concourse/common/identifiers.py index fc3a047e2..f27a0fb56 100644 --- a/content_sync/pipelines/definitions/concourse/common/identifiers.py +++ b/content_sync/pipelines/definitions/concourse/common/identifiers.py @@ -9,6 +9,7 @@ S3_IAM_RESOURCE_TYPE_IDENTIFIER = Identifier("s3-resource-iam").root OCW_STUDIO_WEBHOOK_RESOURCE_TYPE_IDENTIFIER = Identifier("ocw-studio-webhook").root OCW_STUDIO_WEBHOOK_CURL_STEP_IDENTIFIER = Identifier("ocw-studio-webhook-curl").root +OCW_STUDIO_WEBHOOK_OFFLINE_GATE_IDENTIFIER = Identifier("offline-build-gate").root SLACK_ALERT_RESOURCE_IDENTIFIER = Identifier("slack-alert").root WEBPACK_MANIFEST_S3_IDENTIFIER = Identifier("webpack-manifest-s3").root WEBPACK_MANIFEST_S3_TRIGGER_IDENTIFIER = Identifier( diff --git a/content_sync/pipelines/definitions/concourse/common/resource_types.py b/content_sync/pipelines/definitions/concourse/common/resource_types.py index d41131f9f..42337a448 100644 --- a/content_sync/pipelines/definitions/concourse/common/resource_types.py +++ b/content_sync/pipelines/definitions/concourse/common/resource_types.py @@ -17,7 +17,7 @@ def __init__(self, **kwargs): super().__init__( name=HTTP_RESOURCE_TYPE_IDENTIFIER, type=REGISTRY_IMAGE, - source={"repository": "jgriff/http-resource", "tag": "latest"}, + source={"repository": "umar8hassan/http-resource", "tag": "latest"}, **kwargs, ) diff --git a/content_sync/pipelines/definitions/concourse/common/resources.py b/content_sync/pipelines/definitions/concourse/common/resources.py index 81bb8800e..ba6f9f2a9 100644 --- a/content_sync/pipelines/definitions/concourse/common/resources.py +++ b/content_sync/pipelines/definitions/concourse/common/resources.py @@ -11,6 +11,7 @@ HTTP_RESOURCE_TYPE_IDENTIFIER, OCW_HUGO_PROJECTS_GIT_IDENTIFIER, OCW_HUGO_THEMES_GIT_IDENTIFIER, + OCW_STUDIO_WEBHOOK_OFFLINE_GATE_IDENTIFIER, OCW_STUDIO_WEBHOOK_RESOURCE_TYPE_IDENTIFIER, S3_IAM_RESOURCE_TYPE_IDENTIFIER, SLACK_ALERT_RESOURCE_IDENTIFIER, @@ -124,6 +125,45 @@ def __init__( ) +class OcwStudioOfflineGateResource(Resource): + """ + A Resource for making API calls to ocw-studio to check if a Website's + offline version is downloadable. + + args: + site_name(str): The name of the site the status is in reference to + api_token(str): The ocw-studio API token + """ + + def __init__( + self, + site_name: str, + api_token: str, + **kwargs, + ): + ocw_studio_url = get_ocw_studio_api_url() + api_path = os.path.join( # noqa: PTH118 + "api", "websites", site_name, "hide_download" + ) + api_url = f"{urljoin(ocw_studio_url, api_path)}/" + super().__init__( + name=OCW_STUDIO_WEBHOOK_OFFLINE_GATE_IDENTIFIER, + icon="language-python", + type=HTTP_RESOURCE_TYPE_IDENTIFIER, + check_every="never", + source={ + "url": api_url, + "method": "GET", + "version": {"jq": ".version", "default": "none"}, + "headers": { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_token}", + }, + }, + **kwargs, + ) + + class WebpackManifestResource(Resource): """ A Resource for fetching the ocw-hugo-themes webpack manifest from S3 diff --git a/content_sync/pipelines/definitions/concourse/mass_build_sites.py b/content_sync/pipelines/definitions/concourse/mass_build_sites.py index 9722b4beb..230c7e31a 100644 --- a/content_sync/pipelines/definitions/concourse/mass_build_sites.py +++ b/content_sync/pipelines/definitions/concourse/mass_build_sites.py @@ -90,7 +90,7 @@ def __init__( # noqa: PLR0913 hugo_arg_overrides: Optional[str] = None, ): vars = get_common_pipeline_vars() # noqa: A001 - sites = list(get_publishable_sites(version)) + sites = list(get_publishable_sites(version, is_offline=offline)) shuffle(sites) self.sites = sites self.version = version diff --git a/content_sync/pipelines/definitions/concourse/site_pipeline.py b/content_sync/pipelines/definitions/concourse/site_pipeline.py index fb7108a79..3ada832fa 100644 --- a/content_sync/pipelines/definitions/concourse/site_pipeline.py +++ b/content_sync/pipelines/definitions/concourse/site_pipeline.py @@ -25,9 +25,9 @@ from content_sync.constants import DEV_TEST_URL, TARGET_OFFLINE, TARGET_ONLINE from content_sync.pipelines.definitions.concourse.common.identifiers import ( - KEYVAL_RESOURCE_TYPE_IDENTIFIER, OCW_HUGO_PROJECTS_GIT_IDENTIFIER, OCW_HUGO_THEMES_GIT_IDENTIFIER, + OCW_STUDIO_WEBHOOK_OFFLINE_GATE_IDENTIFIER, SITE_CONTENT_GIT_IDENTIFIER, STATIC_RESOURCES_S3_IDENTIFIER, WEBPACK_ARTIFACTS_IDENTIFIER, @@ -45,6 +45,7 @@ from content_sync.pipelines.definitions.concourse.common.resources import ( OcwHugoProjectsGitResource, OcwHugoThemesGitResource, + OcwStudioOfflineGateResource, OcwStudioWebhookResource, OpenCatalogResource, SiteContentGitResource, @@ -303,6 +304,10 @@ def __init__(self, config: SitePipelineDefinitionConfig): site_name=config.vars["site_name"], api_token=settings.API_BEARER_TOKEN or "", ) + ocw_studio_webhook_offline_gate_resource = OcwStudioOfflineGateResource( + site_name=config.vars["site_name"], + api_token=settings.API_BEARER_TOKEN or "", + ) self.extend( [ webpack_manifest_resource, @@ -310,6 +315,7 @@ def __init__(self, config: SitePipelineDefinitionConfig): ocw_hugo_themes_resource, ocw_hugo_projects_resource, ocw_studio_webhook_resource, + ocw_studio_webhook_offline_gate_resource, SlackAlertResource(), ] ) @@ -821,7 +827,9 @@ class SitePipelineDefinition(Pipeline): config(SitePipelineDefinitionConfig): The site pipeline configuration object """ - _offline_build_gate_identifier = Identifier("offline-build-gate").root + _offline_build_gate_identifier = Identifier( + OCW_STUDIO_WEBHOOK_OFFLINE_GATE_IDENTIFIER + ).root _online_site_job_identifier = Identifier("online-site-job").root _offline_site_job_identifier = Identifier("offline-site-job").root @@ -834,24 +842,14 @@ def __init__(self, config: SitePipelineDefinitionConfig, **kwargs): resource_types = SitePipelineResourceTypes() resource_types.append(KeyvalResourceType()) resources = SitePipelineResources(config=config) - offline_build_gate_resource = Resource( - name=self._offline_build_gate_identifier, - type=KEYVAL_RESOURCE_TYPE_IDENTIFIER, - icon="gate", - check_every="never", - ) - resources.append(offline_build_gate_resource) online_job = self.get_online_build_job(config=config) - offline_build_gate_put_step = add_error_handling( - step=PutStep( + offline_build_gate_put_step = TryStep( + try_=PutStep( put=self._offline_build_gate_identifier, - params={"mapping": "timestamp = now()"}, - inputs=[], - ), - step_description=f"{self._offline_build_gate_identifier} task step", - pipeline_name=config.vars["pipeline_name"], - short_id=config.vars["short_id"], - instance_vars=config.vars["instance_vars"], + timeout="1m", + attempts=1, + get_params={"no_get": True, "strict": True}, + ) ) online_job.plan.append(offline_build_gate_put_step) offline_job = self.get_offline_build_job(config=config) diff --git a/content_sync/pipelines/definitions/concourse/site_pipeline_test.py b/content_sync/pipelines/definitions/concourse/site_pipeline_test.py index c5733439c..4a714edd3 100644 --- a/content_sync/pipelines/definitions/concourse/site_pipeline_test.py +++ b/content_sync/pipelines/definitions/concourse/site_pipeline_test.py @@ -191,7 +191,7 @@ def test_generate_theme_assets_pipeline_definition( # noqa: C901, PLR0912, PLR0 field="name", value=pipeline_definition._offline_build_gate_identifier, # noqa: SLF001 ) - assert offline_build_gate_resource["type"] == "keyval" + assert offline_build_gate_resource["type"] == "http-resource" site_content_git_resource = get_dict_list_item_by_field( items=resources, field="name", value=SITE_CONTENT_GIT_IDENTIFIER ) @@ -428,7 +428,7 @@ def assert_base_build_tasks(tasks: list[dict], offline: bool): # noqa: FBT001 == config.vars["pipeline_name"] ) assert ( - online_site_tasks[-1]["put"] + online_site_tasks[-1]["try"]["put"] == pipeline_definition._offline_build_gate_identifier # noqa: SLF001 ) offline_site_job = get_dict_list_item_by_field( diff --git a/content_sync/tasks.py b/content_sync/tasks.py index a26bd5de7..2b1430ecd 100644 --- a/content_sync/tasks.py +++ b/content_sync/tasks.py @@ -20,6 +20,7 @@ from content_sync.constants import VERSION_DRAFT, VERSION_LIVE, WEBSITE_LISTING_DIRPATH from content_sync.decorators import single_task from content_sync.models import ContentSyncState +from content_sync.utils import get_publishable_sites from main.celery import app from main.s3_utils import get_boto3_resource from websites.api import ( @@ -460,13 +461,14 @@ def update_websites_in_root_website(): if settings.CONTENT_SYNC_BACKEND: root_website = Website.objects.get(name=settings.ROOT_WEBSITE_NAME) # Get all sites, minus any sites that have never been successfully published - sites = Website.objects.exclude( - Q(**{"draft_publish_date__isnull": True}) - & Q(**{"publish_date__isnull": True}) - ) - sites = sites.exclude(Q(url_path__isnull=True)) + # and have downloading disabled + sites = get_publishable_sites(is_offline=True) # Exclude the root website sites = sites.exclude(name=settings.ROOT_WEBSITE_NAME) + + # Remove the content for unpublished or not downloadable sites + WebsiteContent.objects.exclude(website__in=sites).delete() + fields = [ "website", "type", diff --git a/content_sync/utils.py b/content_sync/utils.py index 2d3a4baf6..94bcb5e17 100644 --- a/content_sync/utils.py +++ b/content_sync/utils.py @@ -22,14 +22,15 @@ OFFLINE_START, ONLINE_END, ONLINE_START, + PUBLISH_DATE_DRAFT, + PUBLISH_DATE_LIVE, START_TAG_PREFIX, TARGET_OFFLINE, VERSION_DRAFT, - VERSION_LIVE, ) from main.s3_utils import get_boto3_resource from main.utils import is_dev -from websites.constants import WEBSITE_CONTENT_FILETYPE +from websites.constants import CONTENT_TYPE_METADATA, WEBSITE_CONTENT_FILETYPE from websites.models import Website, WebsiteContent from websites.site_config_api import SiteConfig @@ -158,24 +159,42 @@ def get_ocw_studio_api_url(): return "http://10.1.0.102:8043" if is_dev() else settings.SITE_BASE_URL -def get_publishable_sites(version: str): +def get_publishable_sites(version: str = "", *, is_offline: bool = False): """ Get a QuerySet of Website objects that are eligible for publishing Args: version(str): The version (draft/live) to check publish eligibility with + is_offline(bool): Is the build type offline or not """ - publish_date_field = ( - "publish_date" if version == VERSION_LIVE else "draft_publish_date" - ) + publish_date_field_filter = { + f"{PUBLISH_DATE_DRAFT}__isnull": True, + f"{PUBLISH_DATE_LIVE}__isnull": True, + } + + if version: + publish_date_field_filter.pop( + f"{PUBLISH_DATE_LIVE}__isnull" + if version == VERSION_DRAFT + else f"{PUBLISH_DATE_DRAFT}__isnull" + ) + # Get all sites, minus any sites that have never been successfully published and test sites # noqa: E501 sites = ( Website.objects.exclude( - Q(**{f"{publish_date_field}__isnull": True}) | Q(url_path__isnull=True) + Q(**publish_date_field_filter) | Q(url_path__isnull=True) ) .exclude(unpublish_status__isnull=False) .exclude(name__in=settings.OCW_TEST_SITE_SLUGS) ) + + if is_offline: + # filter sites with download disabled + download_disabled_sites = WebsiteContent.objects.filter( + type=CONTENT_TYPE_METADATA, metadata__hide_download=True + ).values_list("website__short_id", flat=True) + sites = sites.exclude(short_id__in=download_disabled_sites) + return sites.prefetch_related("starter") diff --git a/content_sync/utils_test.py b/content_sync/utils_test.py index 354dd2f90..2fc12b34b 100644 --- a/content_sync/utils_test.py +++ b/content_sync/utils_test.py @@ -40,6 +40,7 @@ ) from main.s3_utils import get_boto3_client from ocw_import.conftest import MOCK_BUCKET_NAME, setup_s3 +from websites.constants import CONTENT_TYPE_METADATA from websites.factories import WebsiteContentFactory, WebsiteStarterFactory from websites.site_config_api import ConfigItem, SiteConfig @@ -257,7 +258,8 @@ def test_get_ocw_studio_api_url(settings, mocker, is_dev): @pytest.mark.parametrize("version", [VERSION_DRAFT, VERSION_LIVE]) -def test_get_publishable_sites(settings, mocker, mass_build_websites, version): +@pytest.mark.parametrize("offline", [False, True]) +def test_get_publishable_sites(settings, mocker, mass_build_websites, version, offline): """get_publishable_sites should return a queryset of sites that have been published before""" unpublished_site = mass_build_websites[0] if version == VERSION_DRAFT: @@ -270,8 +272,19 @@ def test_get_publishable_sites(settings, mocker, mass_build_websites, version): test_site.name = test_site_slug test_site.save() assert len(mass_build_websites) == 7 - publishable_sites = get_publishable_sites(version) - assert publishable_sites.count() == 4 + + publishable_sites_count = 4 + + if offline: + WebsiteContentFactory.create( + type=CONTENT_TYPE_METADATA, + metadata={"hide_download": True}, + website=mass_build_websites[4], + ) + publishable_sites_count = 3 + + publishable_sites = get_publishable_sites(version, is_offline=offline) + assert publishable_sites.count() == publishable_sites_count @pytest.mark.parametrize("version", [VERSION_DRAFT, VERSION_LIVE]) diff --git a/websites/views.py b/websites/views.py index a7716028f..ff5a54b22 100644 --- a/websites/views.py +++ b/websites/views.py @@ -315,6 +315,19 @@ def pipeline_status(self, request, name=None): ) return Response(status=200) + @action(detail=True, methods=["get"], permission_classes=[BearerTokenPermission]) + def hide_download(self, request, name=None): # noqa: ARG002 + """Process webhook requests from concourse pipeline runs""" + website = get_object_or_404(Website, name=name) + content = WebsiteContent.objects.get( + website=website, type=CONTENT_TYPE_METADATA + ) + hide_download = content and content.metadata.get("hide_download") + return Response( + status=200, + data={} if hide_download else {"version": str(now_in_utc().timestamp())}, + ) + class WebsiteMassBuildViewSet(viewsets.ViewSet): """Return a list of previously published sites, with the info required by the mass-build-sites pipeline""" # noqa: E501