Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #1472: check attachments bundle #1473

Merged
merged 4 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions checks/remotesettings/attachments_bundles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""
Verify freshness and validity of attachment bundles.

For each collection where the attachments bundle is enable, return the modification timestamp and number of attachments bundled.
"""

import io
import logging
import urllib.parse
import zipfile
from typing import Any

from telescope.typings import CheckResult
from telescope.utils import (
ClientSession,
retry_decorator,
run_parallel,
utcfromhttpdate,
utcfromtimestamp,
)

from .utils import KintoClient, fetch_signed_resources


EXPOSED_PARAMETERS = ["server"]

logger = logging.getLogger(__name__)


@retry_decorator
async def fetch_binary(url: str, **kwargs) -> tuple[int, str, bytes]:
human_url = urllib.parse.unquote(url)
logger.debug(f"Fetch binary from '{human_url}'")
async with ClientSession() as session:
async with session.get(url, **kwargs) as response:
return (
response.status,
response.headers.get("Last-Modified", "Mon, 01 Jan 1970 00:00:00 GMT"),
await response.read(),
)


async def run(
server: str, auth: str, margin_publication_hours: int = 12
) -> CheckResult:
client = KintoClient(server_url=server, auth=auth)
resources = await fetch_signed_resources(server, auth)

# resources = [r for r in resources if r["source"]["collection"] in ("intermediates",)]
leplatrem marked this conversation as resolved.
Show resolved Hide resolved

logger.debug("Fetch metadata of %s collections", len(resources))
futures = [
client.get_collection(
bucket=resource["source"]["bucket"],
id=resource["source"]["collection"],
)
for resource in resources
]
sources_metadata = await run_parallel(*futures)
resources_sources_metadata = zip(resources, sources_metadata)

metadata_for_bundled = [
(r, m)
for r, m in resources_sources_metadata
if m["data"].get("attachment", {}).get("bundle", False)
]
logger.info("%s collections with attachments bundle", len(metadata_for_bundled))
assert metadata_for_bundled, metadata_for_bundled
records_timestamps = [
resource["last_modified"] for resource, _ in metadata_for_bundled
]

info = await client.server_info()
base_url = info["capabilities"]["attachments"]["base_url"]

futures_bundles = []
for resource, metadata in metadata_for_bundled:
bid = resource["destination"]["bucket"]
cid = metadata["data"]["id"]
url = f"{base_url}bundles/{bid}--{cid}.zip"
futures_bundles.append(fetch_binary(url))
bundles = await run_parallel(*futures_bundles)

timestamps_metadata_bundles = zip(records_timestamps, metadata_for_bundled, bundles)

result: dict[str, dict[str, Any]] = {}
success = True
for timestamp, (resource, metadata), bundle in timestamps_metadata_bundles:
http_status, modified, binary = bundle
bid = resource["destination"]["bucket"]
cid = metadata["data"]["id"]
if http_status >= 400:
result[f"{bid}/{cid}"] = {"status": "missing"}
success = False
continue

try:
z = zipfile.ZipFile(io.BytesIO(binary))
nfiles = len(z.namelist())
except zipfile.BadZipFile:
result[f"{bid}/{cid}"] = {"status": "bad zip"}
success = False
continue

bundle_ts = utcfromhttpdate(modified)
records_ts = utcfromtimestamp(timestamp)
status = (
"outdated"
if ((records_ts - bundle_ts).total_seconds() / 3600)
> margin_publication_hours
else "ok"
)
result[f"{bid}/{cid}"] = {
"status": status,
"size": len(binary),
"attachments": nfiles,
"publication_timestamp": bundle_ts.isoformat(),
"collection_timestamp": records_ts.isoformat(),
}

return success, result
5 changes: 5 additions & 0 deletions telescope/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import email.utils
import json
import logging
import textwrap
Expand Down Expand Up @@ -164,6 +165,10 @@ def utcfromisoformat(iso8601):
return datetime.fromisoformat(iso8601_tz).replace(tzinfo=timezone.utc)


def utcfromhttpdate(httpdate):
return email.utils.parsedate_to_datetime(httpdate).replace(tzinfo=timezone.utc)


def render_checks(func):
async def wrapper(request):
# First, check that client requests supported output format.
Expand Down
150 changes: 150 additions & 0 deletions tests/checks/remotesettings/test_attachments_bundles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import io
import zipfile

from checks.remotesettings.attachments_bundles import run


COLLECTION_URL = "/buckets/{}/collections/{}"
RECORDS_URL = "/buckets/{}/collections/{}/records"
CHANGESET_URL = "/buckets/{}/collections/{}/changeset"


def build_zip(num_files=3):
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file:
for i in range(num_files):
file_name = f"fake_file_{i}.txt"
zip_file.writestr(file_name, 1024 * "x")
return zip_buffer.getvalue()


async def test_negative(mock_responses, mock_aioresponses):
server_url = "http://fake.local/v1"
mock_responses.get(
server_url + "/",
payload={
"capabilities": {
"attachments": {"base_url": "http://cdn/"},
"signer": {
"resources": [
{
"source": {"bucket": "main-workspace", "collection": None},
"preview": {"bucket": "main-preview", "collection": None},
"destination": {"bucket": "main", "collection": None},
}
]
},
}
},
)
may8_ts = 389664061000
may8_http = "Mon, 08 May 1982 00:01:01 GMT"
may8_iso = "1982-05-08T00:01:01+00:00"

changes_url = server_url + RECORDS_URL.format("monitor", "changes")
mock_responses.get(
changes_url,
payload={
"data": [
{
"id": "abc",
"bucket": "main",
"collection": "missing",
"last_modified": may8_ts,
},
{
"id": "efg",
"bucket": "main",
"collection": "ok",
"last_modified": may8_ts,
},
{
"id": "hij",
"bucket": "main",
"collection": "badzip",
"last_modified": may8_ts,
},
{
"id": "klm",
"bucket": "main",
"collection": "outdated",
"last_modified": may8_ts + 24 * 3600 * 1000 + 60 * 1000,
},
{
"id": "nop",
"bucket": "main",
"collection": "late",
"last_modified": may8_ts + 600 * 1000,
},
{
"id": "qrs",
"bucket": "main",
"collection": "no-bundle",
"last_modified": may8_ts,
},
]
},
)

for cid in ("missing", "ok", "badzip", "outdated", "late", "no-bundle"):
mock_responses.get(
server_url + COLLECTION_URL.format("main-workspace", cid),
payload={
"data": {
"id": cid,
"bucket": "main-workspace",
"attachment": {"bundle": cid != "no-bundle"},
}
},
)

mock_aioresponses.get("http://cdn/bundles/main--missing.zip", status=404)
mock_aioresponses.get(
"http://cdn/bundles/main--ok.zip",
body=build_zip(),
headers={"Last-Modified": may8_http},
)
mock_aioresponses.get(
"http://cdn/bundles/main--outdated.zip",
body=build_zip(num_files=6),
headers={"Last-Modified": may8_http},
)
mock_aioresponses.get(
"http://cdn/bundles/main--late.zip",
body=build_zip(num_files=6),
headers={"Last-Modified": may8_http},
)
mock_aioresponses.get(
"http://cdn/bundles/main--badzip.zip",
body=b"boom",
headers={"Last-Modified": may8_http},
)

status, data = await run(server_url, auth="")

assert status is False
assert data == {
"main/badzip": {"status": "bad zip"},
"main/missing": {"status": "missing"},
"main/ok": {
"status": "ok",
"attachments": 3,
"collection_timestamp": "1982-05-08T00:01:01+00:00",
"publication_timestamp": may8_iso,
"size": 373,
},
"main/late": {
"status": "ok",
"attachments": 6,
"collection_timestamp": "1982-05-08T00:11:01+00:00",
"publication_timestamp": may8_iso,
"size": 724,
},
"main/outdated": {
"attachments": 6,
"collection_timestamp": "1982-05-09T00:02:01+00:00",
"publication_timestamp": may8_iso,
"size": 724,
"status": "outdated",
},
}
Loading