diff --git a/.gitignore b/.gitignore index 001f5bd..0570cb8 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ __pycache__ node_modules/ boredcharts/static/plotlyjs.min.js +*.pdf diff --git a/README.md b/README.md index 0b49704..24391a7 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,13 @@ Build easy, minimal, PDF-able data reports with markdown and python. - [ ] matplotlib figures as svg? - [ ] support python 3.10, 3.11 - [ ] make plotting libraries optional -- [ ] pdf exports with selenium in headless mode +- [x] pdf exports with selenium in headless mode +- [ ] cli (`boredcharts init`, `boredcharts export [report]`, `boredcharts list`, `boredcharts dev`, `boredcharts run`) + - [x] list/export/dev/run + - [ ] init - [ ] ability to archive reports (export to static html, move to archive directory, still serve from archive directory, but can get rid of analysis—could just be archive endpoints for figures?) -- [ ] cli? (`boredcharts init`, `boredcharts export [report]`, `boredcharts dev`, `boredcharts serve`) - [ ] deploy to [bored-charts-example.oliverlambson.com](https://bored-charts-example.oliverlambson.com) - [ ] dashboard layout with tighter grid layout - [ ] example with database diff --git a/bored-charts/boredcharts/cli/__init__.py b/bored-charts/boredcharts/cli/__init__.py new file mode 100644 index 0000000..267a8d6 --- /dev/null +++ b/bored-charts/boredcharts/cli/__init__.py @@ -0,0 +1,5 @@ +from boredcharts.cli.cli import main + +__all__ = [ + "main", +] diff --git a/bored-charts/boredcharts/cli/cli.py b/bored-charts/boredcharts/cli/cli.py new file mode 100644 index 0000000..ccf0cec --- /dev/null +++ b/bored-charts/boredcharts/cli/cli.py @@ -0,0 +1,233 @@ +import argparse +import asyncio +import importlib +import multiprocessing +import time +from pathlib import Path +from typing import Literal, NamedTuple +from urllib.error import URLError +from urllib.request import urlopen + +import uvicorn +from fastapi import FastAPI +from starlette.routing import NoMatchFound + +from boredcharts.cli.discover import get_import_string +from boredcharts.pdf import UrlToPdfFile, print_to_pdf_manual + + +class Report(NamedTuple): + name: str + urlpath: str + tag: str + + +def get_report_url( + path: Path | None, + app_name: str | None, + name: str, +) -> str: + import_str = get_import_string(path=path, app_name=app_name) # mutates sys.path + mod = importlib.import_module(import_str.split(":")[0]) + app = getattr(mod, import_str.split(":")[1]) + assert isinstance(app, FastAPI) + return app.url_path_for(name) + + +def get_reports( + path: Path | None, + app_name: str | None, +) -> list[Report]: + import_str = get_import_string(path=path, app_name=app_name) # mutates sys.path + mod = importlib.import_module(import_str.split(":")[0]) + app = getattr(mod, import_str.split(":")[1]) + assert isinstance(app, FastAPI) + openapi = app.openapi() + paths = openapi["paths"] + assert isinstance(paths, dict) + + reports: list[Report] = [] + for urlpath, methods in paths.items(): + assert isinstance(urlpath, str) + assert isinstance(methods, dict) + for method, data in methods.items(): + assert isinstance(method, str) + assert isinstance(data, dict) + if method != "get": + continue + + tags = data.get("tags") + if tags is None: + continue + assert isinstance(tags, list) + tags = [t for t in tags if t.startswith("report")] # boredcharts convention + if not tags: + continue + + name = data.get("summary") + assert isinstance(name, str) + name = name.lower().replace(" ", "_") # reverse fastapi name->summary + if name.startswith("index"): # boredcharts convention + continue + + for tag in tags: + reports.append(Report(name=name, urlpath=urlpath, tag=tag)) + + return reports + + +def _run_uvicorn( + path: Path | None, + app_name: str | None, + reload: bool = False, + host: str = "127.0.0.1", + port: int = 4000, + log_level: Literal[ + "critical", + "error", + "warning", + "info", + "debug", + "trace", + ] = "info", +) -> None: + import_str = get_import_string(path=path, app_name=app_name) + uvicorn.run( + import_str, + host=host, + port=port, + proxy_headers=True, + forwarded_allow_ips="*", + reload=reload, + log_level=log_level, + ) + + +def init( + path: Path | None, + app_name: str | None, +) -> None: + """create a new project scaffolding""" + raise NotImplementedError + + +def list_reports( + path: Path | None, + app_name: str | None, +) -> None: + """list available reports""" + reports = get_reports(path, app_name) + reports = sorted(reports, key=lambda x: f"{x.tag}::{x.name}") + urlpathwidth = max(len(r.urlpath) for r in reports) + name = max(len(r.name) for r in reports) + tagwidth = max(len(r.tag) for r in reports) + print( + f"{"REPORT".ljust(name)} {"CATEGORY".ljust(tagwidth)} {"URL".ljust(urlpathwidth)}" + ) + for r in reports: + category = ":".join(r.tag.split(":")[1:]) or "-" # boredcharts convention + print( + f"{r.name.ljust(name)} {category.ljust(tagwidth)} {r.urlpath.ljust(urlpathwidth)}" + ) + + +def export( + path: Path | None, + app_name: str | None, + report: str, + *, + exporter: UrlToPdfFile = print_to_pdf_manual, +) -> None: + """write to pdf + + TODO: + - [x] write to pdf + - [x] spin up server + - [x] provide list of reports + """ + try: + route = get_report_url(path, app_name, report) + except NoMatchFound: + print(f'Report "{report}" not found!') + print("Use `boredcharts list` to see available reports.") + raise SystemExit(1) + + host = "127.0.0.1" + port = 4001 # different port just for exports + base_url = f"http://{host}:{port}" + process = multiprocessing.Process( + target=_run_uvicorn, + kwargs=dict( + path=path, + app_name=app_name, + reload=False, + host=host, + port=port, + log_level="warning", + ), + ) + + print("Spinning up boredcharts app", end="", flush=True) + process.start() + for _ in range(10): + print(".", end="", flush=True) + time.sleep(0.1) + try: + with urlopen(f"{base_url}/healthz") as response: + status = response.status + except URLError: + continue + if status == 200: + print(" started!") + break + else: + print(" health check failed!") + raise Exception("Couldn't start app!") + + url = f"{base_url}{route}" + file = Path(report.replace(".", "-")).absolute().with_suffix(".pdf") + asyncio.run(exporter(url, file)) + print(f"Exported {report} to {file}") + + process.terminate() + + +def dev(path: Path | None, app_name: str | None) -> None: + """run uvicorn with reload""" + _run_uvicorn(path, app_name, reload=True) + + +def run(path: Path | None, app_name: str | None) -> None: + """run uvicorn without reload""" + _run_uvicorn(path, app_name, reload=False) + + +def main() -> None: + """cli entrypoint""" + parser = argparse.ArgumentParser(description="boredcharts CLI") + parser.add_argument("path", type=Path, default=None, help="Path to FastAPI app") + parser.add_argument("--app-name", type=str, default=None, help="FastAPI app name") + + subparsers = parser.add_subparsers(dest="command") + subparsers.required = True + + parser_init = subparsers.add_parser("init", help="Create a new project scaffolding") + parser_init.set_defaults(func=init) + + parser_init = subparsers.add_parser("list", help="List available reports") + parser_init.set_defaults(func=list_reports) + + parser_export = subparsers.add_parser("export", help="Write report to PDF") + parser_export.add_argument("report", type=str, help="The report to export") + parser_export.set_defaults(func=export) + + parser_dev = subparsers.add_parser("dev", help="Run uvicorn with reload") + parser_dev.set_defaults(func=dev) + + parser_serve = subparsers.add_parser("run", help="Run uvicorn without reload") + parser_serve.set_defaults(func=run) + + args = parser.parse_args() + + func_args = {k: v for k, v in vars(args).items() if k != "func" and k != "command"} + args.func(**func_args) diff --git a/bored-charts/boredcharts/cli/discover.py b/bored-charts/boredcharts/cli/discover.py new file mode 100644 index 0000000..8b845f0 --- /dev/null +++ b/bored-charts/boredcharts/cli/discover.py @@ -0,0 +1,97 @@ +"""forked from fastapi_cli.discover because i needed to get rid of the print statements""" + +import importlib +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Union + +from fastapi import FastAPI + + +def get_default_path() -> Path: + potential_paths = ( + "main.py", + "app.py", + "api.py", + "app/main.py", + "app/app.py", + "app/api.py", + ) + + for full_path in potential_paths: + path = Path(full_path) + if path.is_file(): + return path + + raise Exception( + "Could not find a default file to run, please provide an explicit path" + ) + + +@dataclass +class ModuleData: + module_import_str: str + extra_sys_path: Path + + +def get_module_data_from_path(path: Path) -> ModuleData: + use_path = path.resolve() + module_path = use_path + if use_path.is_file() and use_path.stem == "__init__": + module_path = use_path.parent + module_paths = [module_path] + extra_sys_path = module_path.parent + for parent in module_path.parents: + init_path = parent / "__init__.py" + if init_path.is_file(): + module_paths.insert(0, parent) + extra_sys_path = parent.parent + else: + break + module_str = ".".join(p.stem for p in module_paths) + return ModuleData( + module_import_str=module_str, extra_sys_path=extra_sys_path.resolve() + ) + + +def get_app_name(*, mod_data: ModuleData, app_name: Union[str, None] = None) -> str: + try: + mod = importlib.import_module(mod_data.module_import_str) + except (ImportError, ValueError): + raise # missing __init__.py? + object_names = dir(mod) + object_names_set = set(object_names) + if app_name: + if app_name not in object_names_set: + raise Exception( + f"Could not find app name {app_name} in {mod_data.module_import_str}" + ) + app = getattr(mod, app_name) + if not isinstance(app, FastAPI): + raise Exception( + f"The app name {app_name} in {mod_data.module_import_str} doesn't seem to be a FastAPI app" + ) + return app_name + for preferred_name in ["app", "api"]: + if preferred_name in object_names_set: + obj = getattr(mod, preferred_name) + if isinstance(obj, FastAPI): + return preferred_name + for name in object_names: + obj = getattr(mod, name) + if isinstance(obj, FastAPI): + return name + raise Exception("Could not find FastAPI app in module, try using --app") + + +def get_import_string(*, path: Path | None = None, app_name: str | None = None) -> str: + if not path: + path = get_default_path() + if not path.exists(): + raise Exception(f"Path does not exist {path}") + mod_data = get_module_data_from_path(path) + sys.path.insert(0, str(mod_data.extra_sys_path)) + use_app_name = get_app_name(mod_data=mod_data, app_name=app_name) + import_string = f"{mod_data.module_import_str}:{use_app_name}" + return import_string diff --git a/bored-charts/boredcharts/pdf.py b/bored-charts/boredcharts/pdf.py new file mode 100644 index 0000000..c418801 --- /dev/null +++ b/bored-charts/boredcharts/pdf.py @@ -0,0 +1,92 @@ +import asyncio +import json +import subprocess +import tempfile +from pathlib import Path +from typing import Protocol + +from playwright.async_api import async_playwright + + +class UrlToPdfFile(Protocol): + async def __call__(self, url: str, file: Path) -> None: ... + + +async def print_to_pdf_manual(url: str, file: Path) -> None: + """this one seems to work the best""" + async with async_playwright() as p: + args = [ + "--headless=new", + "--virtual-time-budget=10000", # seems to wait for ajax too? + "--run-all-compositor-stages-before-draw", # also recommended, dunno + "--no-pdf-header-footer", + f"--print-to-pdf={file.as_posix()}", + url, + ] + process = await asyncio.create_subprocess_exec( + p.chromium.executable_path, + *args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + _, stderr = await process.communicate() + + if process.returncode != 0: + raise ChildProcessError(f"Could not export to pdf {stderr.decode()}") + + +async def _print_to_pdf_pw_adv(url: str, file: Path) -> None: + # in headless mode this doesn't seem to actually download the pdf + prefs = { + "printing": { + "print_preview_sticky_settings": { + "appState": json.dumps( + { + "version": 2, + "recentDestinations": [ + {"id": "Save as PDF", "origin": "local", "account": ""} + ], + "selectedDestinationId": "Save as PDF", + "isHeaderFooterEnabled": False, + } + ) + } + }, + } + with tempfile.TemporaryDirectory() as pref_dir: + pref_file = Path(pref_dir) / "Default" / "Preferences" + pref_file.parent.mkdir(parents=True, exist_ok=True) + with pref_file.open("w") as f: + json.dump(prefs, f) + + async with async_playwright() as p: + context = await p.chromium.launch_persistent_context( + user_data_dir=pref_file.parent.parent, + ignore_default_args=["--headless"], + args=[ + "--headless=new", + "--kiosk-printing", + ], + ) + page = await context.new_page() + + await page.goto(url, wait_until="networkidle") + await page.evaluate("window.print();") + await context.close() + + +async def _print_to_pdf_pw_basic(url: str, file: Path) -> None: + # playwright's built-in pdf export results in text that can't be selected well + async with async_playwright() as p: + browser = await p.chromium.launch() + context = await browser.new_context() + page = await context.new_page() + + await page.goto(url, wait_until="networkidle") + await page.pdf( + path=file, + format="A4", + margin={"top": "1cm", "right": "1cm", "bottom": "1cm", "left": "1cm"}, + ) + await context.close() + await browser.close() diff --git a/bored-charts/pyproject.toml b/bored-charts/pyproject.toml index f919cc1..c20a208 100644 --- a/bored-charts/pyproject.toml +++ b/bored-charts/pyproject.toml @@ -4,14 +4,18 @@ dynamic = ["version"] description = "Easy, minimal, PDF-able data reports with python and markdown." authors = [{ name = "Oliver Lambson", email = "oliverlambson@gmail.com" }] dependencies = [ + # core "fastapi>=0.112.0", "jinja2>=3.1.4", "markdown>=3.6", "markupsafe>=2.1.5", + # charts "matplotlib>=3.9.2", "plotly>=5.23.0", "altair>=5.4.0", "seaborn>=0.13.2", + # cli + "playwright>=1.46.0", ] readme = "README.md" license = "MIT" @@ -47,6 +51,9 @@ classifiers = [ Repository = "https://github.com/oliverlambson/bored-charts.git" Issues = "https://github.com/oliverlambson/bored-charts/issues" +[project.scripts] +boredcharts = "boredcharts.cli:main" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/uv.lock b/uv.lock index 4c6bd83..4b51016 100644 --- a/uv.lock +++ b/uv.lock @@ -130,6 +130,7 @@ dependencies = [ { name = "markdown" }, { name = "markupsafe" }, { name = "matplotlib" }, + { name = "playwright" }, { name = "plotly" }, { name = "seaborn" }, ] @@ -142,6 +143,7 @@ requires-dist = [ { name = "markdown", specifier = ">=3.6" }, { name = "markupsafe", specifier = ">=2.1.5" }, { name = "matplotlib", specifier = ">=3.9.2" }, + { name = "playwright", specifier = ">=1.46.0" }, { name = "plotly", specifier = ">=5.23.0" }, { name = "seaborn", specifier = ">=0.13.2" }, ] @@ -292,6 +294,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e4/b9/0394d67056d4ad36a3807b439571934b318f1df925593a95e9ec0516b1a7/fonttools-4.53.1-py3-none-any.whl", hash = "sha256:f1f8758a2ad110bd6432203a344269f445a2907dc24ef6bccfd0ac4e14e0d71d", size = 1090472 }, ] +[[package]] +name = "greenlet" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/17/14/3bddb1298b9a6786539ac609ba4b7c9c0842e12aa73aaa4d8d73ec8f8185/greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491", size = 182013 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/2f/461615adc53ba81e99471303b15ac6b2a6daa8d2a0f7f77fd15605e16d5b/greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be", size = 273085 }, + { url = "https://files.pythonhosted.org/packages/e9/55/2c3cfa3cdbb940cf7321fbcf544f0e9c74898eed43bf678abf416812d132/greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e", size = 660514 }, + { url = "https://files.pythonhosted.org/packages/38/77/efb21ab402651896c74f24a172eb4d7479f9f53898bd5e56b9e20bb24ffd/greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676", size = 674295 }, + { url = "https://files.pythonhosted.org/packages/74/3a/92f188ace0190f0066dca3636cf1b09481d0854c46e92ec5e29c7cefe5b1/greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc", size = 669395 }, + { url = "https://files.pythonhosted.org/packages/63/0f/847ed02cdfce10f0e6e3425cd054296bddb11a17ef1b34681fa01a055187/greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230", size = 670455 }, + { url = "https://files.pythonhosted.org/packages/bd/37/56b0da468a85e7704f3b2bc045015301bdf4be2184a44868c71f6dca6fe2/greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf", size = 625692 }, + { url = "https://files.pythonhosted.org/packages/7c/68/b5f4084c0a252d7e9c0d95fc1cfc845d08622037adb74e05be3a49831186/greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305", size = 1152597 }, + { url = "https://files.pythonhosted.org/packages/a4/fa/31e22345518adcd69d1d6ab5087a12c178aa7f3c51103f6d5d702199d243/greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6", size = 1181043 }, + { url = "https://files.pythonhosted.org/packages/53/80/3d94d5999b4179d91bcc93745d1b0815b073d61be79dd546b840d17adb18/greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2", size = 293635 }, +] + [[package]] name = "h11" version = "0.14.0" @@ -735,6 +754,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/2c/2e0a52890f269435eee38b21c8218e102c621fe8d8df8b9dd06fabf879ba/pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d", size = 2243375 }, ] +[[package]] +name = "playwright" +version = "1.46.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "greenlet" }, + { name = "pyee" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/8f/cf024e7cd4f1f365fea772b7fdde21e421fcd5c0c206bc7cb1c4866cdfbe/playwright-1.46.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:fa60b95c16f6ce954636229a6c9dd885485326bca52d5ba20d02c0bc731a2bbb", size = 34799014 }, + { url = "https://files.pythonhosted.org/packages/98/d2/50db19ce9b25c2033a6836b5a4eacb7f4be1adff63cfb4c58b46a9eb04ab/playwright-1.46.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:73dcfc24834f4d004bc862ed0d74b4c1406793a8164734238ad035356fddc8ac", size = 33117618 }, + { url = "https://files.pythonhosted.org/packages/9f/c9/8d0381489d082f86246579a4d51b20ccd6b5b6e570e809fd103b63d1b9bd/playwright-1.46.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:f5acfec1dbdc84d02dc696a17a344227e66c91413eab2036428dab405f195b82", size = 34799011 }, + { url = "https://files.pythonhosted.org/packages/75/4f/0a410deb48a0ff93107884a6cf06bbdbc97571f41b49e06cf7673c192264/playwright-1.46.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:3b418509f45879f1403d070858657a39bd0b333b23d92c37355682b671726df9", size = 37946374 }, + { url = "https://files.pythonhosted.org/packages/1f/ac/4df6b6c12bbfbcfd2d2f1c59645ff99732852e920027b877c7c775341ca0/playwright-1.46.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23580f6a3f99757bb9779d29be37144cb9328cd9bafa178e6db5b3ab4b7faf4c", size = 37693981 }, + { url = "https://files.pythonhosted.org/packages/55/cc/3de814e8e7540d9c6d1b131c5e4457d5a3a56880b3a20235cfe94bbdfef7/playwright-1.46.0-py3-none-win32.whl", hash = "sha256:85f44dd32a23d02850f0ff4dafe51580e5199531fff5121a62489d9838707782", size = 29819013 }, + { url = "https://files.pythonhosted.org/packages/ba/27/b5f21695ee2ea32fdf826e531066e5633e1056171e217bac3daeefa46017/playwright-1.46.0-py3-none-win_amd64.whl", hash = "sha256:f14a7fd7e24e954eec6ce61d787d499e41937ade811a0818e9a088aabe28ebb6", size = 29819024 }, +] + [[package]] name = "plotly" version = "5.23.0" @@ -898,6 +935,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/63/b95781763e8d84207025071c0cec16d921c0163c7a9033ae4b9a0e020dc7/pydantic_core-2.20.1-cp313-none-win_amd64.whl", hash = "sha256:65db0f2eefcaad1a3950f498aabb4875c8890438bc80b19362cf633b87a8ab20", size = 1898013 }, ] +[[package]] +name = "pyee" +version = "11.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/22/b4c7f3d9579204a014c4eda0e019e6bfe56af52a96cacc82004b60eec079/pyee-11.1.0.tar.gz", hash = "sha256:b53af98f6990c810edd9b56b87791021a8f54fd13db4edd1142438d44ba2263f", size = 29806 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/cc/5cea8a0a0d3deb90b5a0d39ad1a6a1ccaa40a9ea86d793eb8a49d32a6ed0/pyee-11.1.0-py3-none-any.whl", hash = "sha256:5d346a7d0f861a4b2e6c47960295bd895f816725b27d656181947346be98d7c1", size = 15263 }, +] + [[package]] name = "pygments" version = "2.18.0"