Skip to content

Commit

Permalink
fix algolia upload (#820)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin authored Jan 30, 2025
1 parent b783a02 commit 6c00d1d
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 24 deletions.
12 changes: 10 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,8 @@ jobs:
if: github.event.pull_request.head.repo.full_name == github.repository || github.ref == 'refs/heads/main'
env:
PPPR_TOKEN: ${{ secrets.PPPR_TOKEN }}
ALGOLIA_WRITE_API_KEY: ${{ secrets.ALGOLIA_WRITE_API_KEY }}

- run: tree site
- run: tree -sh site
- uses: actions/setup-node@v4
- run: npm install
working-directory: docs-site
Expand Down Expand Up @@ -243,6 +242,10 @@ jobs:
- run: npm install
working-directory: docs-site

- uses: astral-sh/setup-uv@v5
with:
enable-cache: true

- uses: actions/download-artifact@v4
with:
name: site
Expand All @@ -257,6 +260,11 @@ jobs:
--var GIT_COMMIT_SHA:${{ github.sha }}
--var GIT_BRANCH:main
- run: uv sync --group docs-upload
- run: uv run python docs/.hooks/algolia.py upload
env:
ALGOLIA_WRITE_API_KEY: ${{ secrets.ALGOLIA_WRITE_API_KEY }}

release:
needs: [check]
if: "success() && startsWith(github.ref, 'refs/tags/')"
Expand Down
66 changes: 47 additions & 19 deletions docs/.hooks/algolia.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
from __future__ import annotations as _annotations

import os
from typing import TypedDict, cast
import sys
from pathlib import Path
from typing import TYPE_CHECKING, TypedDict, cast

from algoliasearch.search.client import SearchClientSync
from bs4 import BeautifulSoup
from mkdocs.config import Config
from mkdocs.structure.files import Files
from mkdocs.structure.pages import Page
from pydantic import TypeAdapter

if TYPE_CHECKING:
from mkdocs.config import Config
from mkdocs.structure.files import Files
from mkdocs.structure.pages import Page


class AlgoliaRecord(TypedDict):
Expand All @@ -20,19 +23,18 @@ class AlgoliaRecord(TypedDict):


records: list[AlgoliaRecord] = []
records_ta = TypeAdapter(list[AlgoliaRecord])
# these values should match docs/javascripts/search-worker.js.
ALGOLIA_APP_ID = 'KPPUDTIAVX'
ALGOLIA_INDEX_NAME = 'pydantic-ai-docs'
ALGOLIA_WRITE_API_KEY = os.environ.get('ALGOLIA_WRITE_API_KEY')

# Algolia has a limit of 100kb per record in the paid plan,
# leave some space for the other fields as well.
MAX_CONTENT_LENGTH = 90_000


def on_page_content(html: str, page: Page, config: Config, files: Files) -> str:
if not ALGOLIA_WRITE_API_KEY:
return html
from bs4 import BeautifulSoup

assert page.title is not None, 'Page title must not be None'
title = cast(str, page.title)
Expand Down Expand Up @@ -93,26 +95,52 @@ def on_page_content(html: str, page: Page, config: Config, files: Files) -> str:
return html


ALGOLIA_RECORDS_FILE = 'algolia_records.json'


def on_post_build(config: Config) -> None:
if not ALGOLIA_WRITE_API_KEY:
return
if records:
algolia_records_path = Path(config['site_dir']) / ALGOLIA_RECORDS_FILE
with algolia_records_path.open('wb') as f:
f.write(records_ta.dump_json(records))


def algolia_upload() -> None:
from algoliasearch.search.client import SearchClientSync

client = SearchClientSync(ALGOLIA_APP_ID, ALGOLIA_WRITE_API_KEY)
algolia_write_api_key = os.environ['ALGOLIA_WRITE_API_KEY']

for record in records:
if len(record['content']) > MAX_CONTENT_LENGTH:
client = SearchClientSync(ALGOLIA_APP_ID, algolia_write_api_key)
filtered_records: list[AlgoliaRecord] = []

algolia_records_path = Path.cwd() / 'site' / ALGOLIA_RECORDS_FILE

with algolia_records_path.open('rb') as f:
all_records = records_ta.validate_json(f.read())

for record in all_records:
content = record['content']
if len(content) > MAX_CONTENT_LENGTH:
print(
f"Record with title '{record['title']}' has more than {MAX_CONTENT_LENGTH} characters, {len(record['content'])}."
f"Record with title '{record['title']}' has more than {MAX_CONTENT_LENGTH} characters, {len(content)}."
)
print(record['content'])
print(content)
else:
filtered_records.append(record)

# Filter the records from the index if the content is bigger than 100kb, Algolia limit
filtered_records = list(filter(lambda record: len(record['content']) < MAX_CONTENT_LENGTH, records))
print(f'Uploading {len(filtered_records)} out of {len(records)} records to Algolia...')
print(f'Uploading {len(filtered_records)} out of {len(all_records)} records to Algolia...')

client.clear_objects(index_name=ALGOLIA_INDEX_NAME)

client.batch(
index_name=ALGOLIA_INDEX_NAME,
batch_write_params={'requests': [{'action': 'addObject', 'body': record} for record in filtered_records]},
)


if __name__ == '__main__':
if sys.argv[-1] == 'upload':
algolia_upload()
else:
print('Run with "upload" argument to upload records to Algolia.')
exit(1)
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ lint = [
"ruff>=0.6.9",
]
docs = [
"algoliasearch>=4.12.0",
"black>=24.10.0",
"bs4>=0.0.2",
"markdownify>=0.14.1",
Expand All @@ -74,6 +73,10 @@ docs = [
"mkdocs-material[imaging]>=9.5.45",
"mkdocstrings-python>=1.12.2",
]
docs-upload = [
"algoliasearch>=4.12.0",
"pydantic>=2.10.1",
]

[tool.hatch.build.targets.wheel]
only-include = ["/README.md"]
Expand Down
10 changes: 8 additions & 2 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6c00d1d

Please sign in to comment.