From 353798ed8eaf893a104f0e6feb0a3876bb49cf58 Mon Sep 17 00:00:00 2001 From: Joseph Lewis III Date: Sat, 10 Aug 2024 12:32:27 -0700 Subject: [PATCH] Added generator logic. --- Dockerfile | 6 +- pyproject.toml | 3 +- src/devdocs2zim/client.py | 92 ++++- src/devdocs2zim/entrypoint.py | 23 +- src/devdocs2zim/generator.py | 246 +++++++++++- src/devdocs2zim/templates/licenses.txt | 10 + src/devdocs2zim/templates/page.html | 52 +++ src/devdocs2zim/third_party/devdocs/COPYRIGHT | 13 + src/devdocs2zim/third_party/devdocs/LICENSE | 373 ++++++++++++++++++ src/devdocs2zim/third_party/devdocs/README.md | 7 + .../third_party/devdocs/devdocs_48.png | Bin 0 -> 1108 bytes tests/test_client.py | 138 +++++++ tests/test_entrypoint.py | 12 + tests/test_generator.py | 107 ++++- 14 files changed, 1060 insertions(+), 22 deletions(-) create mode 100644 src/devdocs2zim/templates/licenses.txt create mode 100644 src/devdocs2zim/templates/page.html create mode 100644 src/devdocs2zim/third_party/devdocs/COPYRIGHT create mode 100644 src/devdocs2zim/third_party/devdocs/LICENSE create mode 100644 src/devdocs2zim/third_party/devdocs/README.md create mode 100644 src/devdocs2zim/third_party/devdocs/devdocs_48.png create mode 100644 tests/test_entrypoint.py diff --git a/Dockerfile b/Dockerfile index fba3419..e0f8f56 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,11 @@ FROM python:3.12-slim-bookworm LABEL org.opencontainers.image.source https://github.com/openzim/devdocs # Install necessary packages -RUN python -m pip install --no-cache-dir -U \ +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + libmagic1 \ + && rm -rf /var/lib/apt/lists/* \ + && python -m pip install --no-cache-dir -U \ pip # Copy pyproject.toml and its dependencies diff --git a/pyproject.toml b/pyproject.toml index 8e27708..2bbad08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "requests==2.32.3", "pydantic==2.8.2", "zimscraperlib==3.4.0", + "Jinja2==3.1.3", ] dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"] @@ -23,7 +24,7 @@ lint = [ "ruff==0.5.1", ] check = [ - "pyright==1.1.370", + "pyright==1.1.374", ] test = [ "pytest==8.2.2", diff --git a/src/devdocs2zim/client.py b/src/devdocs2zim/client.py index 8ee84a4..44e0085 100644 --- a/src/devdocs2zim/client.py +++ b/src/devdocs2zim/client.py @@ -1,10 +1,24 @@ +import re +from collections import defaultdict +from enum import Enum +from functools import cached_property + import requests -from pydantic import BaseModel, TypeAdapter +from pydantic import BaseModel, TypeAdapter, computed_field from devdocs2zim.constants import logger HTTP_TIMEOUT_SECONDS = 15 +# These regular expressions are extracted from the DevDocs frontend. +# The expression definitions haven't changed in ~8 years as of 2024-07-28: +# https://github.com/freeCodeCamp/devdocs/blob/e28f81d3218bdbad7eac0540c58c11c7fe1d33d3/assets/javascripts/collections/types.js#L3 +BEFORE_CONTENT_PATTERN = re.compile( + r"(^|\()(guides?|tutorials?|reference|book|getting\ started|manual|examples)($|[\):])", # noqa: E501 + re.IGNORECASE, +) +AFTER_CONTENT_PATTERN = re.compile(r"appendix", re.IGNORECASE) + class DevdocsMetadataLinks(BaseModel): """Project links for a specific documentation set.""" @@ -74,7 +88,8 @@ class DevdocsIndexEntry(BaseModel): path: str # Name of the type (section) the entry is located under. - type: str + # If None, the entry is not displayed. + type: str | None @property def path_without_fragment(self) -> str: @@ -82,6 +97,16 @@ def path_without_fragment(self) -> str: return self.path.split("#")[0] +class SortPrecedence(Enum): + """Represents where to place section in the navbar.""" + + # NOTE: Definition order must match display order. + + BEFORE_CONTENT = 0 + CONTENT = 1 + AFTER_CONTENT = 2 + + class DevdocsIndexType(BaseModel): """A section header for documentation.""" @@ -94,6 +119,39 @@ class DevdocsIndexType(BaseModel): # Section slug. This appears to be unused. slug: str + def sort_precedence(self) -> SortPrecedence: + """Determines where this section should be displayed in the navigation.""" + if BEFORE_CONTENT_PATTERN.match(self.name): + return SortPrecedence.BEFORE_CONTENT + + if AFTER_CONTENT_PATTERN.match(self.name): + return SortPrecedence.AFTER_CONTENT + + return SortPrecedence.CONTENT + + +class NavigationSection(BaseModel): + """Represents a single section of a devdocs navigation tree.""" + + # Heading information for the group of links. + name: str + # Links to display in the section. + links: list[DevdocsIndexEntry] + + @computed_field + @property + def count(self) -> int: + """Number of links in the section.""" + return len(self.links) + + @cached_property + def _contained_pages(self) -> set[str]: + return {link.path_without_fragment for link in self.links} + + def contains_page(self, page_path: str) -> bool: + """Returns whether this section contains the given page.""" + return page_path in self._contained_pages + class DevdocsIndex(BaseModel): """Represents entries in the //index.json file for each resource.""" @@ -102,10 +160,36 @@ class DevdocsIndex(BaseModel): entries: list[DevdocsIndexEntry] # List of "types" or section headings. - # These are displayed mostly in order, except regular expressions are used to sort: - # https://github.com/freeCodeCamp/devdocs/blob/e28f81d3218bdbad7eac0540c58c11c7fe1d33d3/assets/javascripts/collections/types.js#L3 + # These are displayed in the order they're found grouped by sort_precedence. types: list[DevdocsIndexType] + def build_navigation(self) -> list[NavigationSection]: + """Builds a navigation hierarchy that's soreted correctly for rendering.""" + + sections_by_precedence: dict[SortPrecedence, list[DevdocsIndexType]] = ( + defaultdict(list) + ) + for section in self.types: + sections_by_precedence[section.sort_precedence()].append(section) + + links_by_section_name: dict[str, list[DevdocsIndexEntry]] = defaultdict(list) + for entry in self.entries: + if entry.type is None: + continue + links_by_section_name[entry.type].append(entry) + + output: list[NavigationSection] = [] + for precedence in SortPrecedence: + for section in sections_by_precedence[precedence]: + output.append( + NavigationSection( + name=section.name, + links=links_by_section_name[section.name], + ) + ) + + return output + class DevdocsClient: """Utility functions to read data from devdocs.""" diff --git a/src/devdocs2zim/entrypoint.py b/src/devdocs2zim/entrypoint.py index ab79ac4..0725326 100644 --- a/src/devdocs2zim/entrypoint.py +++ b/src/devdocs2zim/entrypoint.py @@ -12,6 +12,19 @@ from devdocs2zim.generator import DocFilter, Generator, ZimConfig +def zim_defaults() -> ZimConfig: + """Returns the default configuration for ZIM generation.""" + return ZimConfig( + name_format="devdocs_{slug_without_version}_{version}", + creator="DevDocs", + publisher="openZIM", + title_format="{full_name} Docs", + description_format="{full_name} docs by DevDocs", + long_description_format=None, + tags="devdocs;{slug_without_version}", + ) + + def main() -> None: parser = argparse.ArgumentParser( prog=NAME, @@ -38,15 +51,7 @@ def main() -> None: # ZIM configuration flags ZimConfig.add_flags( parser, - ZimConfig( - name_format="devdocs_{slug_without_version}_{version}", - creator="DevDocs", - publisher="openZIM", - title_format="{full_name} Docs", - description_format="{full_name} docs by DevDocs", - long_description_format=None, - tags="devdocs;{slug_without_version}", - ), + zim_defaults(), ) # Document selection flags diff --git a/src/devdocs2zim/generator.py b/src/devdocs2zim/generator.py index c903a7d..fb6d67c 100644 --- a/src/devdocs2zim/generator.py +++ b/src/devdocs2zim/generator.py @@ -1,20 +1,37 @@ import argparse +import datetime import os import re from collections import defaultdict +from pathlib import Path +from jinja2 import Environment, FileSystemLoader, select_autoescape from pydantic import BaseModel from zimscraperlib.constants import ( # pyright: ignore[reportMissingTypeStubs] MAXIMUM_DESCRIPTION_METADATA_LENGTH, MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH, RECOMMENDED_MAX_TITLE_LENGTH, ) +from zimscraperlib.zim import ( # pyright: ignore[reportMissingTypeStubs] + Creator, + StaticItem, +) from devdocs2zim.client import ( DevdocsClient, + DevdocsIndex, + DevdocsIndexEntry, DevdocsMetadata, ) -from devdocs2zim.constants import logger +from devdocs2zim.constants import LANGUAGE_ISO_639_3, NAME, ROOT_DIR, VERSION, logger + +# Content to display for pages missing from DevDocs. +MISSING_PAGE = ( + "

This documentation is missing.

" + "

This is an error with the openZIM DevDocs scraper, not your ZIM reader e.g. " + 'Kiwix. Please ' + "file an issue with the scraper.

" +) class InvalidFormatError(Exception): @@ -274,15 +291,236 @@ def __init__( os.makedirs(self.output_folder, exist_ok=True) - def run(self) -> None: + # jinja2 environment setup + self.env = Environment( # type: ignore + loader=FileSystemLoader(ROOT_DIR.joinpath("templates")), + autoescape=select_autoescape(), + ) + + self.page_template = self.env.get_template("page.html") # type: ignore + self.licenses_template = self.env.get_template("licenses.txt") # type: ignore + + self.logo_path = self.asset_path("devdocs_48.png") + self.copyright_path = self.asset_path("COPYRIGHT") + self.license_path = self.asset_path("LICENSE") + + @staticmethod + def asset_path(name: str) -> Path: + """Returns the path to name in the third_party/devdocs folder. + + Raises ValueError if the resource doesn't exist. + """ + path = ROOT_DIR.joinpath("third_party", "devdocs", name) + if not path.exists(): + raise ValueError(f"File not found at {path}") + return path + + def load_common_files(self) -> list[StaticItem]: + """Loads common assets for the output.""" + static_files: list[StaticItem] = [] + + logger.info("Fetching common CSS...") + app_css = self.devdocs_client.read_application_css() + logger.debug(f" Found app CSS with {len(app_css)} chars.") + static_files.append( + StaticItem( + path="application.css", + content=app_css, + is_front=False, + mimetype="text/css", + ) + ) + + static_files.append( + StaticItem( + # Documentation doesn't have file extensions so this + # file won't conflict with the dynamic content. + path="licenses.txt", + content=self.licenses_template.render( # type: ignore + copyright=self.copyright_path.read_text(), + license=self.license_path.read_text(), + ), + is_front=True, + mimetype="text/plain", + ) + ) + + return static_files + + def run(self) -> list[Path]: """Run the generator to fetch content and produce ZIMs.""" # Load docs first to fail fast before fetching additional resources. all_docs = self.devdocs_client.list_docs() selected_doc_metadata = self.doc_filter.filter(all_docs) + # Check formatting early to bail if any templates are invalid. + for doc_metadata in selected_doc_metadata: + self.zim_config.format(doc_metadata.placeholders()) + + common_resources = self.load_common_files() + # List all docs and copy one by one + generated: list[Path] = [] for doc_metadata in selected_doc_metadata: - logger.info(f"Fetching {doc_metadata.slug}") + # TODO(#11): Add progress tracker here. + generated.append( + self.generate_zim( + doc_metadata, + common_resources, + ) + ) + + return generated + + def generate_zim( + self, doc_metadata: DevdocsMetadata, common_resources: list[StaticItem] + ) -> Path: + """Generates a zim for a single document. + + Returns the path to the gernated ZIM. + """ + logger.info(f"Generating ZIM for {doc_metadata.slug}") + + formatted_config = self.zim_config.format(doc_metadata.placeholders()) + zim_path = Path(self.output_folder, f"{formatted_config.name_format}.zim") + + # Don't clobber existing files so a user can resume a failed run. + if zim_path.exists(): + logger.warning(f" Skipping, {zim_path} already exists.") + return zim_path + + logger.info(f" Writing to: {zim_path}") + + creator = Creator(zim_path, "index") + creator.config_metadata( + Name=formatted_config.name_format, + Title=formatted_config.title_format, + Publisher=formatted_config.publisher, + Date=datetime.datetime.now(tz=datetime.UTC).date(), + Creator=formatted_config.creator, + Description=formatted_config.description_format, + LongDescription=formatted_config.long_description_format, + # As of 2024-07-28 all documentation is in English. + Language=LANGUAGE_ISO_639_3, + Tags=formatted_config.tags, + Scraper=f"{NAME} v{VERSION}", + Illustration_48x48_at_1=self.logo_path.read_bytes(), + ) + + # Disable indexing because it won't be available in the JS frontend + # and causes significant performance issues with rendered sidebars. + creator.config_indexing(False) + + # Start creator early to detect problems early. + with creator as started_creator: + logger.info(" Fetching the index...") + index = self.devdocs_client.get_index(doc_metadata.slug) + logger.debug(f" The index has {len(index.entries)} entries.") + + logger.info(" Fetching the document database...") + db = self.devdocs_client.get_db(doc_metadata.slug) + logger.debug(f" The database has {len(db)} entries.") + + self.add_zim_contents( + creator=started_creator, + doc_metadata=doc_metadata, + index=index, + db=db, + common_resources=common_resources, + ) + return zim_path + + @staticmethod + def page_titles(pages: list[DevdocsIndexEntry]) -> dict[str, str]: + """Returns a map between page paths in the DB and their "best" title. + + The best title is either the first navigation item that opens the page + to the top (i.e. without a path fragment) or the first navigation item + that opens the page if none open to the top. + """ + + page_to_title: dict[str, str] = {} + for page in pages: + path_without_fragment = page.path_without_fragment + if path_without_fragment == page.path: + page_to_title[path_without_fragment] = page.name + elif path_without_fragment not in page_to_title: + page_to_title[path_without_fragment] = page.name + + return page_to_title + + def add_zim_contents( + self, + creator: Creator, + doc_metadata: DevdocsMetadata, + index: DevdocsIndex, + db: dict[str, str], + common_resources: list[StaticItem], + ): + """Adds the doc conents to the ZIM. + + Parameters: + creator: ZIM writer. + doc_metadata: Document metadata for generating common pages. + index: Documentation index for the navigation bar. + db: Mapping between documentation path and HTML content. + common_resources: Static content to add to the documentation. + """ + + logger.info(" Adding common resources...") + for resource in common_resources: + creator.add_item(resource) # type: ignore + + page_to_title = self.page_titles(index.entries) + # Explicitly inject the index which exists for every documentation set + # but isn't in the dynamic list of pages. + page_to_title["index"] = f"{doc_metadata.name} Documentation" + + nav_sections = index.build_navigation() + + logger.info(f" Rendering {len(page_to_title)} pages...") + counter = 0 + for path, title in page_to_title.items(): + num_slashes = path.count("/") + rel_prefix = "../" * num_slashes + + content = MISSING_PAGE + if path in db: + content = db.get(path) + else: + logger.warning( + f" DevDocs is missing content for {title!r} at {path!r}." + ) - raise NotImplementedError("ZIM creation is not yet implemented") + # NOTE: Profiling indicates Jinja templating takes about twice + # the CPU time as adding items without compression. This appears to + # be because of the navigation bar. + page_content = self.page_template.render( # type: ignore + rel_prefix=rel_prefix, + nav_sections=nav_sections, + devdocs_metadata=doc_metadata, + title=title, + path=path, + # Fill missing DevDocs content with indications that the issue + # isn't with this generator. + content=content, + ) + creator.add_item_for( # type: ignore + path, + title=title, + content=page_content, # type: ignore + is_front=True, + # Compression is needed because navigation is similar across pages. + # Large documentation like Ansible may have ~6000 items in the + # navigation bar. + should_compress=True, + mimetype="text/html", + ) + + # Tracking metadta + counter += 1 + if counter % 100 == 0: + logger.info(f" Progress {counter} / {len(page_to_title)} pages") + + logger.info(" Finished adding contents.") diff --git a/src/devdocs2zim/templates/licenses.txt b/src/devdocs2zim/templates/licenses.txt new file mode 100644 index 0000000..d68c3fa --- /dev/null +++ b/src/devdocs2zim/templates/licenses.txt @@ -0,0 +1,10 @@ +{# The license page is intentionally plain text to ensure it's readable. -#} +This work contains resources from https://DevDocs.io licensed under the following license and copyright notice. + +COPYRIGHT + +{{ copyright | indent(4, true)}} + +LICENSE + +{{ license | indent(4, true)}} diff --git a/src/devdocs2zim/templates/page.html b/src/devdocs2zim/templates/page.html new file mode 100644 index 0000000..f9a680f --- /dev/null +++ b/src/devdocs2zim/templates/page.html @@ -0,0 +1,52 @@ +{# + Devdocs is an SPA so each page has identical structure. + + For performance, as many items are marked "safe" as possible. + + CSS is taken straight from DevDocs so variations in tag usage between + languages/libraries can be normalized. +-#} + + + {{title}} + + + +
+ {# Remove top padding which is usually reserved for the search bar. #} +
+
+ {{devdocs_metadata.name}} +
+ {% for section in nav_sections %} +
+ + + {{ section.count | safe}} + {{ section.name }} + + +
+ {% for link in section.links %} + + {{ link.name }} + + {% endfor %} +
+
+ {% endfor %} + Open-source Licenses +
+
+
+
+
+
{{content | safe}}
+
+
+
+ + + diff --git a/src/devdocs2zim/third_party/devdocs/COPYRIGHT b/src/devdocs2zim/third_party/devdocs/COPYRIGHT new file mode 100644 index 0000000..9c520b8 --- /dev/null +++ b/src/devdocs2zim/third_party/devdocs/COPYRIGHT @@ -0,0 +1,13 @@ +Copyright 2013-2024 Thibaut Courouble and other contributors + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Please do not use the name DevDocs to endorse or promote products +derived from this software without the maintainers' permission, except +as may be necessary to comply with the notice/attribution requirements. + +We also wish that any documentation file generated using this software +be attributed to DevDocs. Let's be fair to all contributors by giving +credit where credit's due. Thanks. diff --git a/src/devdocs2zim/third_party/devdocs/LICENSE b/src/devdocs2zim/third_party/devdocs/LICENSE new file mode 100644 index 0000000..a612ad9 --- /dev/null +++ b/src/devdocs2zim/third_party/devdocs/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/src/devdocs2zim/third_party/devdocs/README.md b/src/devdocs2zim/third_party/devdocs/README.md new file mode 100644 index 0000000..41abd43 --- /dev/null +++ b/src/devdocs2zim/third_party/devdocs/README.md @@ -0,0 +1,7 @@ +These files are copied from DevDocs: + + https://github.com/freeCodeCamp/devdocs + +devdocs_48.png is adapted from: + + https://github.com/freeCodeCamp/devdocs/blob/0dd0ad813f81d3c8e3d040095992e61b7398be96/public/images/icon-64.png diff --git a/src/devdocs2zim/third_party/devdocs/devdocs_48.png b/src/devdocs2zim/third_party/devdocs/devdocs_48.png new file mode 100644 index 0000000000000000000000000000000000000000..f8b5b77cb845298a38b22883c0cf27e175b91f81 GIT binary patch literal 1108 zcmX|BcTC$y6#fATqy$SuC@qbm#A(t$3ZaTxDS?!xKunmSKxt7zn>uX-W@9jB8k=GE zGGNRGvxhOpY@2O{F%U4uEVG9(+knzWja2#~OFik{y?grZ-Fx46x+F&jE0p#PZ3ID3 zHr5ztSRZ_xS2Vz4mp^Gk5cM~X_AZv{ut$)AfdM|B-`UyO*4D=1a2gsKs;a6=N=owc z^Dizgz{04mu5NE{2P8!D^72%{va&KRm)qCZ$6zoZQdd_8J~#yh1#C7Oz~HOE5Mi-c zy}i97BO_B&Qws|V8yg#YdwWWyQk4M!04^*nY-wqkoSalB6s4u5fQRFdRw9uc9v;@# z)^>Gu0j#2;VtRV|04x*=kB*M&>+1yq0Ysn?fGsU8LC*iHf?FUVbP9mh z)>g2pd%;;>Uw3zR_wexW_xBGA3yX`3TU}jEO-;?r%$%Q}hqR52jZi9__4W1j_V!*| zTbr4g+27wE8yhPxFAoh3&CSg{K0X$U#Xdeh$;rubxqNqbcW7t`hr?}eZv!EIettwE zkxHdzWn~>49FWQ6pr9a6PtWM+=<)IK+1c5&w6w^`$l~H+C^#Y_A}}y8I5;>bCnr8W zzPY)XKp+GJ1Z-|@f;3xOTR?tCN5}B+a7s!_Wo6~)=%`F41MQlcnwFQBNhA`J$&^Z^ zKoMvc8ylODkkH@XKR7s;k&%JN7?|F%cx7P$)oSc6Rp8&Q3^32s|6mnnt71>2wwOD$#*? zAeLZkfr3Y-MsRlc2{Dr|xPxC}gE4op9zB}tu~UD2Q%fmumfSJFa%bA;E#JeWndz;> zD+vL?oNBd|)osx1+>LLp^jV?LS670|i zT9#oRx^GFvj-Toq&k9XDt-I%ZQJy{ZMh!(Fsx~0Q52tQ{X8h%w4%D&E{ zjlOc#*Eu*6^ZkV^q<4@0(nYS%_z(}n~-~H6JP(d{L zz1U_uBD*S*HA?wiWcSccPq5+iyt)Z9#WQ{oEW|LH+$#gLL&Zrn<$l< mlT4iUpS|MSd=SeGMQ4x7Y(x*e?&0YlKL;C22MpT+8}lC~g=c>N literal 0 HcmV?d00001 diff --git a/tests/test_client.py b/tests/test_client.py index cee5dc4..88f6625 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -11,6 +11,8 @@ DevdocsIndexType, DevdocsMetadata, DevdocsMetadataLinks, + NavigationSection, + SortPrecedence, ) # NOTE: Deserializataion tests in this file are performed against the full object @@ -224,6 +226,74 @@ def test_unmarshal(self): index_type, ) + def test_sort_precedence_default(self): + index_type = DevdocsIndexType( + name="ZIM Readers", + count=0, + slug="", + ) + + got = index_type.sort_precedence() + + self.assertEqual(SortPrecedence.CONTENT, got) + + def test_sort_precedence_before(self): + index_type = DevdocsIndexType( + name="(Tutorial) Creating a ZIM", + count=0, + slug="", + ) + + got = index_type.sort_precedence() + + self.assertEqual(SortPrecedence.BEFORE_CONTENT, got) + + def test_sort_precedence_after(self): + index_type = DevdocsIndexType( + name="Appendix A: List of ZIMs", + count=0, + slug="", + ) + + got = index_type.sort_precedence() + + self.assertEqual(SortPrecedence.AFTER_CONTENT, got) + + +class TestNavigationSection(TestCase): + def test_count_empty(self): + section = NavigationSection(name="", links=[]) + + got = section.count + + self.assertEqual(0, got) + + def test_count_non_empty(self): + section = NavigationSection( + name="", + links=[ + DevdocsIndexEntry(name="Foo 1", path="foo#1", type=None), + ], + ) + + got = section.count + + self.assertEqual(1, got) + + def test_contains_page(self): + section = NavigationSection( + name="", + links=[ + DevdocsIndexEntry(name="Foo 1", path="foo#1", type=None), + DevdocsIndexEntry(name="Foo 2", path="foo#2", type=None), + DevdocsIndexEntry(name="Bar", path="bar", type=None), + ], + ) + + self.assertTrue(section.contains_page("foo")) + self.assertTrue(section.contains_page("bar")) + self.assertFalse(section.contains_page("bazz")) + class TestDevdocsIndex(TestCase): def test_unmarshal_minimal(self): @@ -275,6 +345,74 @@ def test_unmarshal(self): index, ) + def test_build_navigation(self): + index = DevdocsIndex( + entries=[ + DevdocsIndexEntry(name="Appendix 1", path="", type="Appendix"), + DevdocsIndexEntry(name="Middle 1", path="", type="Middle"), + DevdocsIndexEntry(name="Appendix 2", path="", type="Appendix"), + DevdocsIndexEntry(name="Tutorial 1", path="", type="Tutorials"), + DevdocsIndexEntry(name="Middle 2", path="", type="Middle"), + DevdocsIndexEntry(name="Tutorial 2", path="", type="Tutorials"), + ], + types=[ + DevdocsIndexType(name="Appendix", count=2, slug=""), + DevdocsIndexType(name="Tutorials", count=2, slug=""), + DevdocsIndexType(name="Middle", count=2, slug=""), + ], + ) + + got = index.build_navigation() + + self.assertEqual( + [ + NavigationSection( + name="Tutorials", + links=[ + DevdocsIndexEntry(name="Tutorial 1", path="", type="Tutorials"), + DevdocsIndexEntry(name="Tutorial 2", path="", type="Tutorials"), + ], + ), + NavigationSection( + name="Middle", + links=[ + DevdocsIndexEntry(name="Middle 1", path="", type="Middle"), + DevdocsIndexEntry(name="Middle 2", path="", type="Middle"), + ], + ), + NavigationSection( + name="Appendix", + links=[ + DevdocsIndexEntry(name="Appendix 1", path="", type="Appendix"), + DevdocsIndexEntry(name="Appendix 2", path="", type="Appendix"), + ], + ), + ], + got, + ) + + def test_build_navigation_ignores_none(self): + index = DevdocsIndex( + entries=[ + DevdocsIndexEntry(name="Appendix 1", path="", type=None), + ], + types=[ + DevdocsIndexType(name="Appendix", count=1, slug=""), + ], + ) + + got = index.build_navigation() + + self.assertEqual( + [ + NavigationSection( + name="Appendix", + links=[], + ), + ], + got, + ) + class TestDevdocsClient(TestCase): def setUp(self): diff --git a/tests/test_entrypoint.py b/tests/test_entrypoint.py new file mode 100644 index 0000000..0b18df1 --- /dev/null +++ b/tests/test_entrypoint.py @@ -0,0 +1,12 @@ +import argparse + +from devdocs2zim.entrypoint import zim_defaults +from devdocs2zim.generator import ZimConfig + + +def test_zim_defaults_validity(): + parser = argparse.ArgumentParser() + ZimConfig.add_flags(parser, zim_defaults()) + + # Assert parsing the defaults doesn't raise an error. + ZimConfig.of(parser.parse_args([])) diff --git a/tests/test_generator.py b/tests/test_generator.py index 80a5040..e23f5e1 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -1,9 +1,19 @@ import argparse +from tempfile import TemporaryDirectory from unittest import TestCase - -from devdocs2zim.client import DevdocsMetadata +from unittest.mock import create_autospec + +from devdocs2zim.client import ( + DevdocsClient, + DevdocsIndex, + DevdocsIndexEntry, + DevdocsIndexType, + DevdocsMetadata, +) +from devdocs2zim.entrypoint import zim_defaults from devdocs2zim.generator import ( DocFilter, + Generator, InvalidFormatError, MissingDocumentError, ZimConfig, @@ -184,7 +194,7 @@ def test_flags_missing_selector(self): parser = argparse.ArgumentParser(exit_on_error=False) DocFilter.add_flags(parser) - self.assertRaises(SystemExit, parser.parse_args, args=[]) + self.assertRaises(argparse.ArgumentError, parser.parse_args, args=[]) def test_flags_regex(self): parser = argparse.ArgumentParser() @@ -274,3 +284,94 @@ def test_filter_first(self): ], got, ) + + +class TestGenerator(TestCase): + def setUp(self): + self.temp_dir = TemporaryDirectory() + output_folder = self.temp_dir.__enter__() + + self.mock_client = create_autospec(DevdocsClient) + + self.generator = Generator( + devdocs_client=self.mock_client, + doc_filter=DocFilter( + all=True, first=None, slugs=None, skip_slug_regex=None + ), + output_folder=output_folder, + zim_config=zim_defaults(), + ) + + def tearDown(self): + self.temp_dir.__exit__(None, None, None) + + def test_asset_path_missing(self): + self.assertRaises(ValueError, Generator.asset_path, "does_not_exist") + + def test_asset_path_exists(self): + got = Generator.asset_path("README.md") + + self.assertTrue(got.exists()) + + def test_load_common_files(self): + got = self.generator.load_common_files() + + # Check names because they're referenced in templates + self.assertEqual( + {"licenses.txt", "application.css"}, + {f.path for f in got}, # type: ignore + ) + + def test_run_no_documents(self): + got = self.generator.run() + + self.assertEqual([], got) + + def test_run_e2e(self): + self.mock_client.read_application_css.return_value = ".mock_css {}" + self.mock_client.list_docs.return_value = [ + DevdocsMetadata(name="MockDoc", slug="mockdoc") + ] + self.mock_client.get_index.return_value = DevdocsIndex( + entries=[ + DevdocsIndexEntry( + name="Mock Entry", path="mock-entry", type="Mock Header" + ), + DevdocsIndexEntry( + name="Missing Entry", path="missing", type="Mock Header" + ), + ], + types=[ + DevdocsIndexType(name="Mock Header", count=1, slug="headers"), + ], + ) + self.mock_client.get_db.return_value = { + "mock-entry": "Entry Value", + "index": "Index Value", + } + + got = self.generator.run() + + self.assertEqual(1, len(got)) + + def test_page_titles_no_fragment(self): + pages = [ + DevdocsIndexEntry(name="Mock Sub1", path="mock#subheading1", type=None), + DevdocsIndexEntry(name="Mock Top", path="mock", type=None), + DevdocsIndexEntry(name="Mock Sub2", path="mock#subheading2", type=None), + ] + + got = Generator.page_titles(pages) + + self.assertEqual({"mock": "Mock Top"}, got) + + def test_page_titles_only_fragment(self): + pages = [ + DevdocsIndexEntry(name="Mock Sub1", path="mock#subheading1", type=None), + DevdocsIndexEntry(name="Mock Sub2", path="mock#subheading2", type=None), + ] + + got = Generator.page_titles(pages) + + # First fragment wins if no page points to the top + self.assertEqual({"mock": "Mock Sub1"}, got)