diff --git a/browse/controllers/files/dissemination.py b/browse/controllers/files/dissemination.py index 7ca4a33d..fa681dd8 100644 --- a/browse/controllers/files/dissemination.py +++ b/browse/controllers/files/dissemination.py @@ -110,10 +110,7 @@ def pdf_resp_fn(file: FileObj, resp = default_resp_fn(file, arxiv_id, docmeta, version) filename = f"{arxiv_id.filename}v{version.version}.pdf" resp.headers["Content-Disposition"] = f"inline; filename=\"{filename}\"" - if arxiv_id.has_version: - resp.headers["Link"] = f"; rel='canonical'" - else: - resp.headers["Link"] = f"; rel='canonical'" + resp.headers["Link"] = f"; rel='canonical'" if arxiv_id.has_version: resp.headers=add_surrogate_key(resp.headers,["pdf",f"pdf-{arxiv_id.idv}"]) else: @@ -203,7 +200,7 @@ def _html_response(file_list: Union[List[FileObj],FileObj], # Not a data error since a non-html-source paper might legitimately not have a latexml HTML resp= unavailable(arxiv_id) - if arxiv_id.has_version: + if arxiv_id.has_version: resp.headers=add_surrogate_key(resp.headers,["html",f"html-{arxiv_id.idv}"]) else: resp.headers=add_surrogate_key(resp.headers,["html",f"html-{arxiv_id.id}-current"]) @@ -213,7 +210,9 @@ def _html_response(file_list: Union[List[FileObj],FileObj], def _html_source_single_response(file: FileObj, arxiv_id: Identifier) -> Response: """Produces a `Response`for a single file for a paper with HTML source.""" if _is_html_name(file): # do post_processing - return default_resp_fn( FileTransform(file, post_process_html), arxiv_id) + resp = default_resp_fn( FileTransform(file, post_process_html), arxiv_id) + resp.headers["Link"] = f"; rel='canonical'" + return resp else: return default_resp_fn( file, arxiv_id) diff --git a/tests/dissemination/test_pdf.py b/tests/dissemination/test_pdf.py index 9fbbb750..11056488 100644 --- a/tests/dissemination/test_pdf.py +++ b/tests/dissemination/test_pdf.py @@ -17,7 +17,7 @@ def test_pdf_headers(client_with_test_fs): assert "pdf-cs/0011004v1" in head assert "paper-id-cs/0011004" in head - assert rv.headers["Link"] == "; rel='canonical'", "should not have version" + assert rv.headers["Link"] == "; rel='canonical'", "should not have version" def test_pdf_redirect(client_with_test_fs): diff --git a/tests/test_clickthrough.py b/tests/test_clickthrough.py index 3445b6c9..e32e27b2 100644 --- a/tests/test_clickthrough.py +++ b/tests/test_clickthrough.py @@ -17,7 +17,7 @@ def test_clickthrough(app_with_fake): assert resp.status_code == 404 resp = client.get("/ct?url=http%3A%2F%2Fwww.example.com&v=bogus") - assert resp.status_code == 400 + assert resp.status_code == 404 url = "https://example.com/something?whereis=thecheese" hash = create_hash(app_with_fake.config["CLICKTHROUGH_SECRET"], url) diff --git a/tests/test_html.py b/tests/test_html.py index 920bd64d..ef68c8b6 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -4,6 +4,8 @@ def test_html_paper(client_with_test_fs): assert resp.status_code == 200 headers= resp.headers assert "max-age=31536000" in resp.headers.get("Surrogate-Control") + assert resp.headers.get('Link','') == "; rel='canonical'", "versionless canonical header" + keys= " "+headers["Surrogate-Key"]+" " expected_keys=["html", "paper-id-2403.10561", "paper-id-2403.10561-current", "html-native", "html-2403.10561-current"] assert all(" "+item+" " in keys for item in expected_keys) @@ -23,12 +25,16 @@ def test_html_paper(client_with_test_fs): resp = client_with_test_fs.get("/html/2403.10561/") assert resp.status_code == 200 and b"Human-Centric" in resp.data + assert resp.headers.get('Link','') == "; rel='canonical'", "versionless canonical header" resp = client_with_test_fs.get("/html/2403.10561v1") assert resp.status_code == 200 and b"Human-Centric" in resp.data + assert resp.headers.get('Link','') == "; rel='canonical'", "versionless canonical header" resp = client_with_test_fs.get("/html/2403.10561v1/") assert resp.status_code == 200 and b"Human-Centric" in resp.data + assert resp.headers.get('Link','') == "; rel='canonical'", "versionless canonical header" + def test_html_icon(client_with_test_fs): resp = client_with_test_fs.get("/html/2403.10561") @@ -36,6 +42,7 @@ def test_html_icon(client_with_test_fs): resp = client_with_test_fs.get("/html/2403.10561/icon.png") assert resp.status_code == 200 and resp.headers.get("Content-Type") == "image/png" + assert not("Link" in resp.headers), "assets of html articles don't have a canonical header (yet?)" def test_html_paper_multi_files(client_with_test_fs): @@ -113,6 +120,7 @@ def test_html_headers(client_with_test_fs): assert 'Content-Type' in resp.headers content_type = resp.headers.get('Content-Type', '') assert content_type== "text/html; charset=utf-8" + assert resp.headers.get('Link', '') == "; rel='canonical'", "versionless canonical header" #Surrogate Keys rv=client_with_test_fs.head("/html/2403.10561") @@ -125,6 +133,7 @@ def test_html_headers(client_with_test_fs): assert "html-latexml" not in head rv=client_with_test_fs.head("/html/cs/9904010v1/graph1.gif") + assert not('Link' in rv.headers), "HTML assets do not have a canonical header (yet?)" head=rv.headers["Surrogate-Key"] assert " html " in " "+head+" " assert "html-cs/9904010-current" not in head