Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added generator logic #10

Merged
merged 1 commit into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ FROM python:3.12-slim-bookworm
LABEL org.opencontainers.image.source https://github.com/openzim/devdocs

# Install necessary packages
RUN python -m pip install --no-cache-dir -U \
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
libmagic1 \
&& rm -rf /var/lib/apt/lists/* \
&& python -m pip install --no-cache-dir -U \
pip

# Copy pyproject.toml and its dependencies
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies = [
"requests==2.32.3",
"pydantic==2.8.2",
"zimscraperlib==3.4.0",
"Jinja2==3.1.3",
]
dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"]

Expand All @@ -23,7 +24,7 @@ lint = [
"ruff==0.5.1",
]
check = [
"pyright==1.1.370",
"pyright==1.1.374",
]
test = [
"pytest==8.2.2",
Expand Down
92 changes: 88 additions & 4 deletions src/devdocs2zim/client.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,24 @@
import re
from collections import defaultdict
from enum import Enum
from functools import cached_property

import requests
from pydantic import BaseModel, TypeAdapter
from pydantic import BaseModel, TypeAdapter, computed_field

from devdocs2zim.constants import logger

HTTP_TIMEOUT_SECONDS = 15

# These regular expressions are extracted from the DevDocs frontend.
# The expression definitions haven't changed in ~8 years as of 2024-07-28:
# https://github.com/freeCodeCamp/devdocs/blob/e28f81d3218bdbad7eac0540c58c11c7fe1d33d3/assets/javascripts/collections/types.js#L3
josephlewis42 marked this conversation as resolved.
Show resolved Hide resolved
BEFORE_CONTENT_PATTERN = re.compile(
r"(^|\()(guides?|tutorials?|reference|book|getting\ started|manual|examples)($|[\):])", # noqa: E501
re.IGNORECASE,
)
AFTER_CONTENT_PATTERN = re.compile(r"appendix", re.IGNORECASE)


class DevdocsMetadataLinks(BaseModel):
"""Project links for a specific documentation set."""
Expand Down Expand Up @@ -74,14 +88,25 @@ class DevdocsIndexEntry(BaseModel):
path: str

# Name of the type (section) the entry is located under.
type: str
# If None, the entry is not displayed.
type: str | None
josephlewis42 marked this conversation as resolved.
Show resolved Hide resolved

@property
def path_without_fragment(self) -> str:
"""Key in db.json for the file's contents."""
return self.path.split("#")[0]


class SortPrecedence(Enum):
"""Represents where to place section in the navbar."""

# NOTE: Definition order must match display order.

BEFORE_CONTENT = 0
CONTENT = 1
AFTER_CONTENT = 2


class DevdocsIndexType(BaseModel):
"""A section header for documentation."""

Expand All @@ -94,6 +119,39 @@ class DevdocsIndexType(BaseModel):
# Section slug. This appears to be unused.
slug: str

def sort_precedence(self) -> SortPrecedence:
josephlewis42 marked this conversation as resolved.
Show resolved Hide resolved
"""Determines where this section should be displayed in the navigation."""
if BEFORE_CONTENT_PATTERN.match(self.name):
return SortPrecedence.BEFORE_CONTENT

if AFTER_CONTENT_PATTERN.match(self.name):
return SortPrecedence.AFTER_CONTENT

return SortPrecedence.CONTENT


class NavigationSection(BaseModel):
"""Represents a single section of a devdocs navigation tree."""

# Heading information for the group of links.
name: str
# Links to display in the section.
links: list[DevdocsIndexEntry]

@computed_field
@property
def count(self) -> int:
"""Number of links in the section."""
return len(self.links)

@cached_property
def _contained_pages(self) -> set[str]:
return {link.path_without_fragment for link in self.links}

def contains_page(self, page_path: str) -> bool:
"""Returns whether this section contains the given page."""
return page_path in self._contained_pages


class DevdocsIndex(BaseModel):
"""Represents entries in the /<slug>/index.json file for each resource."""
Expand All @@ -102,10 +160,36 @@ class DevdocsIndex(BaseModel):
entries: list[DevdocsIndexEntry]

# List of "types" or section headings.
# These are displayed mostly in order, except regular expressions are used to sort:
# https://github.com/freeCodeCamp/devdocs/blob/e28f81d3218bdbad7eac0540c58c11c7fe1d33d3/assets/javascripts/collections/types.js#L3
# These are displayed in the order they're found grouped by sort_precedence.
types: list[DevdocsIndexType]

def build_navigation(self) -> list[NavigationSection]:
josephlewis42 marked this conversation as resolved.
Show resolved Hide resolved
"""Builds a navigation hierarchy that's soreted correctly for rendering."""

sections_by_precedence: dict[SortPrecedence, list[DevdocsIndexType]] = (
defaultdict(list)
)
for section in self.types:
josephlewis42 marked this conversation as resolved.
Show resolved Hide resolved
sections_by_precedence[section.sort_precedence()].append(section)

links_by_section_name: dict[str, list[DevdocsIndexEntry]] = defaultdict(list)
for entry in self.entries:
josephlewis42 marked this conversation as resolved.
Show resolved Hide resolved
if entry.type is None:
continue
links_by_section_name[entry.type].append(entry)

output: list[NavigationSection] = []
for precedence in SortPrecedence:
for section in sections_by_precedence[precedence]:
output.append(
NavigationSection(
name=section.name,
links=links_by_section_name[section.name],
)
)

return output


class DevdocsClient:
"""Utility functions to read data from devdocs."""
Expand Down
23 changes: 14 additions & 9 deletions src/devdocs2zim/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,19 @@
from devdocs2zim.generator import DocFilter, Generator, ZimConfig


def zim_defaults() -> ZimConfig:
"""Returns the default configuration for ZIM generation."""
return ZimConfig(
name_format="devdocs_{slug_without_version}_{version}",
creator="DevDocs",
publisher="openZIM",
title_format="{full_name} Docs",
description_format="{full_name} docs by DevDocs",
long_description_format=None,
tags="devdocs;{slug_without_version}",
)


def main() -> None:
parser = argparse.ArgumentParser(
prog=NAME,
Expand All @@ -38,15 +51,7 @@ def main() -> None:
# ZIM configuration flags
ZimConfig.add_flags(
parser,
ZimConfig(
name_format="devdocs_{slug_without_version}_{version}",
creator="DevDocs",
publisher="openZIM",
title_format="{full_name} Docs",
description_format="{full_name} docs by DevDocs",
long_description_format=None,
tags="devdocs;{slug_without_version}",
),
zim_defaults(),
)

# Document selection flags
Expand Down
Loading