Skip to content

Commit

Permalink
Exclude courses which have the download button disabled from mirror d…
Browse files Browse the repository at this point in the history
…rives (#2282)

* added filter for offline builds to exclude sites with download disabled

* addedd default value for is_offline argument in site build

* updated unit test for offline build

* excluded sites in content to build fuse offline index for sites

* added remove for sites content with type website for unpublished or not downloadable sites

* added ocw studio webhook resource and api to check offline build allow status

* added offline gate webhook step to update resource

* updated site pipeline to include a conditional execution of offline-gate-put-step

* added conditional put step based on APi response

* added conditional version for gate resource

* added HTTP get for offline build

* added strictness in version matching for http resource

* added version matching strictness and default value to skip versions in http resource

* removed strictness in version matching to honor latest version strategy

* fixed default value matching to skip versions

* fixed versioning api for http gate

* fixed param passing to implicit get

* updated http resource to its fork umar8hassan/http-resource

* added try step to fail on null version

* fixed failing test

* fixed key error in pipeline definition tet

* updated hide dpwnload api

* updated API url for offline gate resource
  • Loading branch information
umar8hassan authored Oct 24, 2024
1 parent e8abba8 commit 36e0b62
Show file tree
Hide file tree
Showing 11 changed files with 128 additions and 37 deletions.
5 changes: 5 additions & 0 deletions content_sync/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,8 @@
DEV_DRAFT_URL = "http://10.1.0.102:8044"
DEV_LIVE_URL = "http://10.1.0.102:8045"
DEV_TEST_URL = "http://10.1.0.102:8046"


# Publish Date Constants
PUBLISH_DATE_LIVE = "publish_date"
PUBLISH_DATE_DRAFT = "draft_publish_date"
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
S3_IAM_RESOURCE_TYPE_IDENTIFIER = Identifier("s3-resource-iam").root
OCW_STUDIO_WEBHOOK_RESOURCE_TYPE_IDENTIFIER = Identifier("ocw-studio-webhook").root
OCW_STUDIO_WEBHOOK_CURL_STEP_IDENTIFIER = Identifier("ocw-studio-webhook-curl").root
OCW_STUDIO_WEBHOOK_OFFLINE_GATE_IDENTIFIER = Identifier("offline-build-gate").root
SLACK_ALERT_RESOURCE_IDENTIFIER = Identifier("slack-alert").root
WEBPACK_MANIFEST_S3_IDENTIFIER = Identifier("webpack-manifest-s3").root
WEBPACK_MANIFEST_S3_TRIGGER_IDENTIFIER = Identifier(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, **kwargs):
super().__init__(
name=HTTP_RESOURCE_TYPE_IDENTIFIER,
type=REGISTRY_IMAGE,
source={"repository": "jgriff/http-resource", "tag": "latest"},
source={"repository": "umar8hassan/http-resource", "tag": "latest"},
**kwargs,
)

Expand Down
40 changes: 40 additions & 0 deletions content_sync/pipelines/definitions/concourse/common/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
HTTP_RESOURCE_TYPE_IDENTIFIER,
OCW_HUGO_PROJECTS_GIT_IDENTIFIER,
OCW_HUGO_THEMES_GIT_IDENTIFIER,
OCW_STUDIO_WEBHOOK_OFFLINE_GATE_IDENTIFIER,
OCW_STUDIO_WEBHOOK_RESOURCE_TYPE_IDENTIFIER,
S3_IAM_RESOURCE_TYPE_IDENTIFIER,
SLACK_ALERT_RESOURCE_IDENTIFIER,
Expand Down Expand Up @@ -124,6 +125,45 @@ def __init__(
)


class OcwStudioOfflineGateResource(Resource):
"""
A Resource for making API calls to ocw-studio to check if a Website's
offline version is downloadable.
args:
site_name(str): The name of the site the status is in reference to
api_token(str): The ocw-studio API token
"""

def __init__(
self,
site_name: str,
api_token: str,
**kwargs,
):
ocw_studio_url = get_ocw_studio_api_url()
api_path = os.path.join( # noqa: PTH118
"api", "websites", site_name, "hide_download"
)
api_url = f"{urljoin(ocw_studio_url, api_path)}/"
super().__init__(
name=OCW_STUDIO_WEBHOOK_OFFLINE_GATE_IDENTIFIER,
icon="language-python",
type=HTTP_RESOURCE_TYPE_IDENTIFIER,
check_every="never",
source={
"url": api_url,
"method": "GET",
"version": {"jq": ".version", "default": "none"},
"headers": {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_token}",
},
},
**kwargs,
)


class WebpackManifestResource(Resource):
"""
A Resource for fetching the ocw-hugo-themes webpack manifest from S3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def __init__( # noqa: PLR0913
hugo_arg_overrides: Optional[str] = None,
):
vars = get_common_pipeline_vars() # noqa: A001
sites = list(get_publishable_sites(version))
sites = list(get_publishable_sites(version, is_offline=offline))
shuffle(sites)
self.sites = sites
self.version = version
Expand Down
34 changes: 16 additions & 18 deletions content_sync/pipelines/definitions/concourse/site_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@

from content_sync.constants import DEV_TEST_URL, TARGET_OFFLINE, TARGET_ONLINE
from content_sync.pipelines.definitions.concourse.common.identifiers import (
KEYVAL_RESOURCE_TYPE_IDENTIFIER,
OCW_HUGO_PROJECTS_GIT_IDENTIFIER,
OCW_HUGO_THEMES_GIT_IDENTIFIER,
OCW_STUDIO_WEBHOOK_OFFLINE_GATE_IDENTIFIER,
SITE_CONTENT_GIT_IDENTIFIER,
STATIC_RESOURCES_S3_IDENTIFIER,
WEBPACK_ARTIFACTS_IDENTIFIER,
Expand All @@ -45,6 +45,7 @@
from content_sync.pipelines.definitions.concourse.common.resources import (
OcwHugoProjectsGitResource,
OcwHugoThemesGitResource,
OcwStudioOfflineGateResource,
OcwStudioWebhookResource,
OpenCatalogResource,
SiteContentGitResource,
Expand Down Expand Up @@ -303,13 +304,18 @@ def __init__(self, config: SitePipelineDefinitionConfig):
site_name=config.vars["site_name"],
api_token=settings.API_BEARER_TOKEN or "",
)
ocw_studio_webhook_offline_gate_resource = OcwStudioOfflineGateResource(
site_name=config.vars["site_name"],
api_token=settings.API_BEARER_TOKEN or "",
)
self.extend(
[
webpack_manifest_resource,
site_content_resource,
ocw_hugo_themes_resource,
ocw_hugo_projects_resource,
ocw_studio_webhook_resource,
ocw_studio_webhook_offline_gate_resource,
SlackAlertResource(),
]
)
Expand Down Expand Up @@ -821,7 +827,9 @@ class SitePipelineDefinition(Pipeline):
config(SitePipelineDefinitionConfig): The site pipeline configuration object
"""

_offline_build_gate_identifier = Identifier("offline-build-gate").root
_offline_build_gate_identifier = Identifier(
OCW_STUDIO_WEBHOOK_OFFLINE_GATE_IDENTIFIER
).root
_online_site_job_identifier = Identifier("online-site-job").root
_offline_site_job_identifier = Identifier("offline-site-job").root

Expand All @@ -834,24 +842,14 @@ def __init__(self, config: SitePipelineDefinitionConfig, **kwargs):
resource_types = SitePipelineResourceTypes()
resource_types.append(KeyvalResourceType())
resources = SitePipelineResources(config=config)
offline_build_gate_resource = Resource(
name=self._offline_build_gate_identifier,
type=KEYVAL_RESOURCE_TYPE_IDENTIFIER,
icon="gate",
check_every="never",
)
resources.append(offline_build_gate_resource)
online_job = self.get_online_build_job(config=config)
offline_build_gate_put_step = add_error_handling(
step=PutStep(
offline_build_gate_put_step = TryStep(
try_=PutStep(
put=self._offline_build_gate_identifier,
params={"mapping": "timestamp = now()"},
inputs=[],
),
step_description=f"{self._offline_build_gate_identifier} task step",
pipeline_name=config.vars["pipeline_name"],
short_id=config.vars["short_id"],
instance_vars=config.vars["instance_vars"],
timeout="1m",
attempts=1,
get_params={"no_get": True, "strict": True},
)
)
online_job.plan.append(offline_build_gate_put_step)
offline_job = self.get_offline_build_job(config=config)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def test_generate_theme_assets_pipeline_definition( # noqa: C901, PLR0912, PLR0
field="name",
value=pipeline_definition._offline_build_gate_identifier, # noqa: SLF001
)
assert offline_build_gate_resource["type"] == "keyval"
assert offline_build_gate_resource["type"] == "http-resource"
site_content_git_resource = get_dict_list_item_by_field(
items=resources, field="name", value=SITE_CONTENT_GIT_IDENTIFIER
)
Expand Down Expand Up @@ -428,7 +428,7 @@ def assert_base_build_tasks(tasks: list[dict], offline: bool): # noqa: FBT001
== config.vars["pipeline_name"]
)
assert (
online_site_tasks[-1]["put"]
online_site_tasks[-1]["try"]["put"]
== pipeline_definition._offline_build_gate_identifier # noqa: SLF001
)
offline_site_job = get_dict_list_item_by_field(
Expand Down
12 changes: 7 additions & 5 deletions content_sync/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from content_sync.constants import VERSION_DRAFT, VERSION_LIVE, WEBSITE_LISTING_DIRPATH
from content_sync.decorators import single_task
from content_sync.models import ContentSyncState
from content_sync.utils import get_publishable_sites
from main.celery import app
from main.s3_utils import get_boto3_resource
from websites.api import (
Expand Down Expand Up @@ -460,13 +461,14 @@ def update_websites_in_root_website():
if settings.CONTENT_SYNC_BACKEND:
root_website = Website.objects.get(name=settings.ROOT_WEBSITE_NAME)
# Get all sites, minus any sites that have never been successfully published
sites = Website.objects.exclude(
Q(**{"draft_publish_date__isnull": True})
& Q(**{"publish_date__isnull": True})
)
sites = sites.exclude(Q(url_path__isnull=True))
# and have downloading disabled
sites = get_publishable_sites(is_offline=True)
# Exclude the root website
sites = sites.exclude(name=settings.ROOT_WEBSITE_NAME)

# Remove the content for unpublished or not downloadable sites
WebsiteContent.objects.exclude(website__in=sites).delete()

fields = [
"website",
"type",
Expand Down
33 changes: 26 additions & 7 deletions content_sync/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@
OFFLINE_START,
ONLINE_END,
ONLINE_START,
PUBLISH_DATE_DRAFT,
PUBLISH_DATE_LIVE,
START_TAG_PREFIX,
TARGET_OFFLINE,
VERSION_DRAFT,
VERSION_LIVE,
)
from main.s3_utils import get_boto3_resource
from main.utils import is_dev
from websites.constants import WEBSITE_CONTENT_FILETYPE
from websites.constants import CONTENT_TYPE_METADATA, WEBSITE_CONTENT_FILETYPE
from websites.models import Website, WebsiteContent
from websites.site_config_api import SiteConfig

Expand Down Expand Up @@ -158,24 +159,42 @@ def get_ocw_studio_api_url():
return "http://10.1.0.102:8043" if is_dev() else settings.SITE_BASE_URL


def get_publishable_sites(version: str):
def get_publishable_sites(version: str = "", *, is_offline: bool = False):
"""
Get a QuerySet of Website objects that are eligible for publishing
Args:
version(str): The version (draft/live) to check publish eligibility with
is_offline(bool): Is the build type offline or not
"""
publish_date_field = (
"publish_date" if version == VERSION_LIVE else "draft_publish_date"
)
publish_date_field_filter = {
f"{PUBLISH_DATE_DRAFT}__isnull": True,
f"{PUBLISH_DATE_LIVE}__isnull": True,
}

if version:
publish_date_field_filter.pop(
f"{PUBLISH_DATE_LIVE}__isnull"
if version == VERSION_DRAFT
else f"{PUBLISH_DATE_DRAFT}__isnull"
)

# Get all sites, minus any sites that have never been successfully published and test sites # noqa: E501
sites = (
Website.objects.exclude(
Q(**{f"{publish_date_field}__isnull": True}) | Q(url_path__isnull=True)
Q(**publish_date_field_filter) | Q(url_path__isnull=True)
)
.exclude(unpublish_status__isnull=False)
.exclude(name__in=settings.OCW_TEST_SITE_SLUGS)
)

if is_offline:
# filter sites with download disabled
download_disabled_sites = WebsiteContent.objects.filter(
type=CONTENT_TYPE_METADATA, metadata__hide_download=True
).values_list("website__short_id", flat=True)
sites = sites.exclude(short_id__in=download_disabled_sites)

return sites.prefetch_related("starter")


Expand Down
19 changes: 16 additions & 3 deletions content_sync/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
)
from main.s3_utils import get_boto3_client
from ocw_import.conftest import MOCK_BUCKET_NAME, setup_s3
from websites.constants import CONTENT_TYPE_METADATA
from websites.factories import WebsiteContentFactory, WebsiteStarterFactory
from websites.site_config_api import ConfigItem, SiteConfig

Expand Down Expand Up @@ -257,7 +258,8 @@ def test_get_ocw_studio_api_url(settings, mocker, is_dev):


@pytest.mark.parametrize("version", [VERSION_DRAFT, VERSION_LIVE])
def test_get_publishable_sites(settings, mocker, mass_build_websites, version):
@pytest.mark.parametrize("offline", [False, True])
def test_get_publishable_sites(settings, mocker, mass_build_websites, version, offline):
"""get_publishable_sites should return a queryset of sites that have been published before"""
unpublished_site = mass_build_websites[0]
if version == VERSION_DRAFT:
Expand All @@ -270,8 +272,19 @@ def test_get_publishable_sites(settings, mocker, mass_build_websites, version):
test_site.name = test_site_slug
test_site.save()
assert len(mass_build_websites) == 7
publishable_sites = get_publishable_sites(version)
assert publishable_sites.count() == 4

publishable_sites_count = 4

if offline:
WebsiteContentFactory.create(
type=CONTENT_TYPE_METADATA,
metadata={"hide_download": True},
website=mass_build_websites[4],
)
publishable_sites_count = 3

publishable_sites = get_publishable_sites(version, is_offline=offline)
assert publishable_sites.count() == publishable_sites_count


@pytest.mark.parametrize("version", [VERSION_DRAFT, VERSION_LIVE])
Expand Down
13 changes: 13 additions & 0 deletions websites/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,19 @@ def pipeline_status(self, request, name=None):
)
return Response(status=200)

@action(detail=True, methods=["get"], permission_classes=[BearerTokenPermission])
def hide_download(self, request, name=None): # noqa: ARG002
"""Process webhook requests from concourse pipeline runs"""
website = get_object_or_404(Website, name=name)
content = WebsiteContent.objects.get(
website=website, type=CONTENT_TYPE_METADATA
)
hide_download = content and content.metadata.get("hide_download")
return Response(
status=200,
data={} if hide_download else {"version": str(now_in_utc().timestamp())},
)


class WebsiteMassBuildViewSet(viewsets.ViewSet):
"""Return a list of previously published sites, with the info required by the mass-build-sites pipeline""" # noqa: E501
Expand Down

0 comments on commit 36e0b62

Please sign in to comment.