Skip to content

Commit

Permalink
chore(python): WIP backport of changes from skalt/pagefind_python
Browse files Browse the repository at this point in the history
  • Loading branch information
SKalt committed Aug 20, 2024
1 parent 6185637 commit ecdb22c
Show file tree
Hide file tree
Showing 30 changed files with 756 additions and 62 deletions.
4 changes: 4 additions & 0 deletions .shellcheckrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
external-sources=true
source-path=SCRIPTDIR
disable=SC2002
# SC2002: ignore "useless cat" warning: starting pipes with `cat` improves composability
4 changes: 2 additions & 2 deletions test_ci.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash

set -eu
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd $SCRIPT_DIR
cd "$SCRIPT_DIR"

PAGEFIND=$(realpath "$SCRIPT_DIR/target/$1/pagefind")
REPO_WD=$(realpath "$SCRIPT_DIR")
Expand Down
2 changes: 2 additions & 0 deletions wrappers/python/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
output
# ^ from src/tests/integration.py
2 changes: 2 additions & 0 deletions wrappers/python/poetry.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[virtualenvs]
in-project = true
37 changes: 31 additions & 6 deletions wrappers/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
[tool.poetry]
name = "pagefind"
version = "0.1.0"
name = "pagefind_python"
version = "0.0.0a0"
# note that ^this^ is the version number of the python API, not the version of
# the pagefind executable.
description = "Python API for Pagefind"
authors = ["CloudCannon"]
license = "MIT"
readme = "README.md"
include = []
exclude = [
"dist",
"output",
"*.whl",
"*.egg-info",
"*.log",
".venv",
"pagefind_python_bin"
]
classifiers = [
"License :: OSI Approved :: MIT License",
"Topic :: Text Processing :: Indexing",
Expand All @@ -19,10 +30,22 @@ classifiers = [

[tool.poetry.dependencies]
python = ">=3.9"
# TODO: uncomment
# FIXME: update bin-package names once those stabilize
# experimental_pagefind_python_bin = { version = "*", optional = true }
# experimental_pagefind_python_bin_extended = { version = "*", optional = true }

# during the building of the pagefind_python package, the pagefind binary packages
# aren't yet published. Thus, `poetry lock` will fail if we include them here.
# However, `poetry build` fails to include the binary package extras in
# `pagefind_python`'s distribution info if these lines are commented out. Thus,
# we temporarily uncomment these lines during the build process, and then re-comment
# them afterwards

# [[[cog
# version = open("pagefind_version.txt").read().strip()
# print(f"# pagefind_bin = {{ version = \"~={version}\", optional = true }} #!!opt")
# print(f"# pagefind_bin_extended = {{ version = \"~={version}\", optional = true }} #!!opt")
# ]]]
# pagefind_bin = { version = "~=1.1.0", optional = true } #!!opt
# pagefind_bin_extended = { version = "~=1.1.0", optional = true } #!!opt
# [[[end]]]

[tool.poetry.extras]
bin = ["pagefind_bin"]
Expand All @@ -33,6 +56,8 @@ ruff = "^0.5.0"
mypy = "^1.10.1"
wheel = "^0.43.0"
cogapp = "^3.4.1"
twine = "^5.1.1"
docutils = "^0.21.2"

[build-system]
requires = ["poetry-core"]
Expand Down
Empty file.
16 changes: 16 additions & 0 deletions wrappers/python/scripts/build/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import logging
import os
from pathlib import Path

this_file = Path(__file__)
this_dir = Path(__file__).parent
python_root = this_dir.parent.parent.resolve().absolute()
upstream_version_file = python_root / "pagefind_version.txt"
dist_dir = python_root / "dist"
vendor_dir = python_root / "vendor"


def setup_logging() -> None:
logging.basicConfig(
level=os.environ.get("PAGEFIND_PYTHON_LOG_LEVEL") or logging.INFO
)
70 changes: 70 additions & 0 deletions wrappers/python/scripts/build/all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import tarfile
import tempfile
from pathlib import Path
from typing import List

from . import dist_dir, setup_logging
from .binary_only_wheel import (
LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS,
write_pagefind_bin_only_wheel,
)
from .get_pagefind_release import download

__candidates = (
"pagefind",
"pagefind.exe",
"pagefind_extended",
"pagefind_extended.exe",
)


def find_bin(dir: Path) -> Path:
for file in dir.iterdir():
if file.is_file() and file.name in __candidates:
return file
raise FileNotFoundError(f"Could not find any of {__candidates} in {dir}")


def get_llvm_triple(tar_gz: Path) -> str:
assert tar_gz.name.endswith(".tar.gz")
# parse the llvm triple from the archive name
llvm_triple = tar_gz.name
llvm_triple = llvm_triple.removesuffix(".tar.gz")
llvm_triple = llvm_triple.removeprefix(f"pagefind-{tag_name}-")
llvm_triple = llvm_triple.removeprefix(f"pagefind_extended-{tag_name}-")
return llvm_triple


def check_platforms(certified: List[Path]) -> None:
for compressed_archive in certified:
llvm_triple = get_llvm_triple(compressed_archive)
platform = LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.get(llvm_triple)
if platform is None:
raise ValueError(f"Unsupported platform: {llvm_triple}")


if __name__ == "__main__":
setup_logging()
certified, tag_name = download("latest", dry_run=False)
# create a temp directory to hold the extracted binaries
check_platforms(certified)
dist_dir.mkdir(exist_ok=True)
for tar_gz in certified:
llvm_triple = get_llvm_triple(tar_gz)
platform = LLVM_TRIPLES_TO_PYTHON_WHEEL_PLATFORMS.get(llvm_triple)
if platform is None:
raise ValueError(f"Unsupported platform: {llvm_triple}")

# FIXME: avoid writing the extracted bin to disk
# unpack the tar.gz archive
name = tar_gz.name.removesuffix(".tar.gz")
with tempfile.TemporaryDirectory(prefix=name + "~") as _temp_dir:
temp_dir = Path(_temp_dir)
with tarfile.open(tar_gz, "r:gz") as tar:
tar.extractall(_temp_dir)
write_pagefind_bin_only_wheel(
executable=find_bin(temp_dir),
output_dir=dist_dir,
version=tag_name.removeprefix("v"),
platform=platform,
)
28 changes: 28 additions & 0 deletions wrappers/python/scripts/build/api_package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# HACK: This script is a hack to build the API package without using poetry to lock the
# optional dependencies. It might be preferable to use setuptools directly rather than
# work around poetry.

from . import python_root, setup_logging
import subprocess

pyproject_toml = python_root / "pyproject.toml"


def main() -> None:
original = pyproject_toml.read_text()
temp = ""
for line in original.splitlines():
if line.endswith("#!!opt"):
temp += line.removeprefix("# ") + "\n"
else:
temp += line + "\n"
with pyproject_toml.open("w") as f:
f.write(temp)
subprocess.run(["poetry", "build"], check=True)
with pyproject_toml.open("w") as f:
f.write(original)


if __name__ == "__main__":
setup_logging()
main()
Loading

0 comments on commit ecdb22c

Please sign in to comment.