Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sqlcache #3

Merged
merged 9 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
name: Deploy
on: workflow_dispatch
on:
workflow_dispatch:
push:
branches: [ main ]

jobs:
deploy:
Expand Down
15 changes: 9 additions & 6 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Check with linter
Expand All @@ -25,12 +25,15 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: pip install -r requirements.txt
run: |
pip install poetry==1.8.3
poetry export --without-hashes --with=dev --format=requirements.txt > requirements.txt
pip install --no-cache-dir -r requirements.txt
- name: Run tests
run: pytest
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ build/
*.egg

# Heavy
*.sqlite
*.pdf
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.2
hooks:
- id: ruff
- id: ruff-format
17 changes: 17 additions & 0 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Developer notes

## Version bumping

Currently project version is tracked in two locations: `pyproject.toml` and `muckraker/__init__.py`.
Those two MUST be synchronised. To do that automatically, use Poetry with
[this plugin](https://pypi.org/project/poetry-bumpversion/).

## Linting

Currently project uses Ruff to perform static checks. To make sure that your commits will be accepted:

- Install `ruff` and `pre-commit` tools.
- Run `pre-commit install` to install our preconfigured hook in your local repo.
- Run `pre-commit run --all-files` to check the whole repo.

Now each time you commit your changes will be checked against the Ruff rules.
17 changes: 14 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
FROM python:3.11-slim
FROM python:3.12-slim AS builder

WORKDIR /build

COPY ./pyproject.toml ./poetry.lock /build

RUN pip install poetry==1.8.3
RUN poetry export --without-hashes --only=main --format=requirements.txt > requirements.txt



FROM python:3.12-slim AS runner

WORKDIR /app

RUN apt-get update
RUN apt-get install -y libpango-1.0-0 libpangoft2-1.0-0

COPY ./requirements.txt /app
RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
COPY --from=builder /build/requirements.txt /app
RUN pip install --no-cache-dir -r /app/requirements.txt

COPY ./muckraker /app/muckraker

Expand Down
1 change: 0 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
version: '3'
services:

app:
Expand Down
2 changes: 1 addition & 1 deletion muckraker/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1"
__version__ = "0.1.0"
140 changes: 63 additions & 77 deletions muckraker/main.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
import json
import uuid
from asyncio import gather
from io import BytesIO
from pathlib import Path
from shutil import rmtree
from tempfile import gettempdir, mkdtemp
from tempfile import TemporaryDirectory
from typing import List

from fastapi import Depends, FastAPI, File, Response, UploadFile
from fastapi.encoders import jsonable_encoder
from fastapi.exceptions import HTTPException, RequestValidationError
from fastapi import FastAPI, File, Response, UploadFile, status
from fastapi.exceptions import HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse

from . import __version__
from .models import Issue
from .render import render_issue
from .sqlcache import CacheError, SQLCache

CACHE_PATH = "cache.sqlite"
MAX_IMAGE_NUM = 4
MAX_IMAGE_SIZE = 2 * 1024 * 1024 # 2 MB
IMAGE_BATCH = 1024
Expand All @@ -28,109 +29,94 @@
root_path="/api",
version=__version__,
summary="A vintage gazette generator for your creative projects.",
openapi_tags=tags_metadata
openapi_tags=tags_metadata,
)

# Configure CORS policy
origins = ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_methods=["POST", "PATCH", "GET"]
allow_methods=["POST", "PATCH", "GET"],
)


def get_dir_path(issue_id: str):
dir_path = Path(gettempdir()) / f"muckraker{issue_id}"
if not (dir_path.exists() and dir_path.is_dir()):
raise HTTPException(status_code=404, detail="No data")
return dir_path


@app.exception_handler(RequestValidationError)
def clear_tempdir_handler(request, exc):
issue_id = request.path_params.get("issue_id")
if issue_id:
rmtree(get_dir_path(issue_id))
return JSONResponse(
status_code=422,
content=jsonable_encoder({"detail": exc.errors()}),
)
cache = SQLCache(CACHE_PATH)


@app.post("/issue/", tags=["issue"])
def upload_issue_data(issue: Issue):
dir_path = mkdtemp(prefix="muckraker")
issue_path = Path(dir_path) / "issue.json"
with open(issue_path, "w") as fd:
fd.write(issue.model_dump_json())
return {"issue_id": dir_path.split("muckraker")[-1]}
async def upload_issue_data(issue: Issue) -> dict:
issue_id = uuid.uuid4().hex
await cache.put_issue(issue_id, issue.model_dump())
return {"issue_id": issue_id}


@app.patch("/issue/{issue_id}", tags=["issue"])
def upload_images(
dir_path: Path = Depends(get_dir_path),
images: List[UploadFile] = File()
async def upload_images(
issue_id: str,
images: List[UploadFile] = File(),
):
# Check if there are already images
uploaded_files = dir_path.glob('**/*')
uploaded_images = [
x for x in uploaded_files
if x.is_file() and x.suffix in IMAGE_SUFFIXES
]
if len(uploaded_images) > 0:
rmtree(dir_path)
raise HTTPException(429, detail="To many uploads")
issue = await cache.get_issue(issue_id)
if issue is None:
raise HTTPException(status.HTTP_404_NOT_FOUND, detail="Issue not found")

# Validate number of images
if len(images) > MAX_IMAGE_NUM:
rmtree(dir_path)
raise HTTPException(413, detail="To many images")
image_num = await cache.count_images(issue_id)
if image_num + len(images) > MAX_IMAGE_NUM:
raise HTTPException(status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, detail="Too many images")

# Validate images
for image in images:
if image.content_type not in ACCEPTED_FILE_TYPES:
detail = f"Invalid file type: {image.filename}"
rmtree(dir_path)
raise HTTPException(415, detail=detail)
raise HTTPException(status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, detail=detail)
if image.size > MAX_IMAGE_SIZE:
detail = f"File is too large: {image.filename}"
rmtree(dir_path)
raise HTTPException(413, detail=detail)

# Save images to the disk
for image in images:
image_path = dir_path / image.filename
with open(image_path, "wb") as fd:
fd.write(image.file.read())
raise HTTPException(status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, detail=detail)

# Save images
tasks = [cache.put_image(issue_id, image.filename, image.file.read()) for image in images]
try:
await gather(*tasks)
except CacheError as err:
raise HTTPException(status.HTTP_400_BAD_REQUEST, detail=str(err))
return JSONResponse(content={"filename": image.filename})


@app.get("/issue/{issue_id}", tags=["issue"])
def get_issue(dir_path: Path = Depends(get_dir_path)):
async def get_issue(issue_id: str):
# Read issue data
with open(dir_path / "issue.json", "r") as fd:
issue_dict = json.load(fd)

# Render PDF and write it to buffer
pdf_path = dir_path / "out.pdf"
render_issue(
page=issue_dict["page"],
header=issue_dict["header"],
body=issue_dict["body"],
fonts=issue_dict["fonts"],
output=pdf_path,
image_dir=dir_path
)
with open(pdf_path, "rb") as fd:
buf = BytesIO(fd.read())

# Delete tempdir
rmtree(dir_path)
issue_dict = await cache.get_issue(issue_id)
if issue_dict is None:
raise HTTPException(status.HTTP_404_NOT_FOUND, detail="Issue not found")

with TemporaryDirectory() as tmp_dir_name:
dir_path = Path(tmp_dir_name)

# Extract images
async for filename, image in cache.load_images(issue_id):
image_path = dir_path / filename
with open(image_path, "wb") as fd:
fd.write(image)

# Render PDF and write it to buffer
pdf_path = dir_path / "out.pdf"
render_issue(
page=issue_dict["page"],
header=issue_dict["header"],
body=issue_dict["body"],
fonts=issue_dict["fonts"],
output=pdf_path,
image_dir=dir_path,
)
with open(pdf_path, "rb") as fd:
buf = BytesIO(fd.read())

# Get pdf from buffer
pdf_bytes = buf.getvalue()
buf.close()

headers = {'Content-Disposition': 'attachment; filename="out.pdf"'}
return Response(pdf_bytes, headers=headers, media_type='application/pdf')
# Delete cached data
await cache.delete_issue(issue_id)

headers = {"Content-Disposition": 'attachment; filename="out.pdf"'}
return Response(pdf_bytes, headers=headers, media_type="application/pdf")
20 changes: 7 additions & 13 deletions muckraker/md_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


class FilterExtension(Extension):
""" Ignore some tags """
"""Ignore some tags"""

def extendMarkdown(self, md: Markdown) -> None:
md.inlinePatterns.deregister("link")
Expand All @@ -26,16 +26,14 @@ def extendMarkdown(self, md: Markdown) -> None:


class ImagePathProcessor(ImageInlineProcessor):
""" Return an `img` element from the given match. """
"""Return an `img` element from the given match."""

def __init__(self, pattern: str, md: Markdown, image_dir: str = ""):
def __init__(self, pattern: str, md: Markdown, image_dir: str = "") -> None:
super().__init__(pattern, md)
self.image_dir = image_dir

def handleMatch(
self,
m: re.Match[str],
data: str
self, m: re.Match[str], data: str
) -> tuple[etree.Element | None, int | None, int | None]:
el, start, ind = super().handleMatch(m, data)
src_path = Path(el.get("src"))
Expand All @@ -45,9 +43,9 @@ def handleMatch(


class ImagePathExtension(Extension):
""" Modify image paths so that Weasyprint could handle them """
"""Modify image paths so that Weasyprint could handle them"""

def __init__(self, **kwargs):
def __init__(self, **kwargs) -> None:
self.config = {"image_dir": ["", "Images root directory"]}
super().__init__(**kwargs)

Expand All @@ -56,9 +54,5 @@ def extendMarkdown(self, md: Markdown) -> None:
md.inlinePatterns.deregister("image_reference")
md.inlinePatterns.deregister("short_image_ref")

processor = ImagePathProcessor(
IMAGE_LINK_RE,
md,
self.getConfig("image_dir")
)
processor = ImagePathProcessor(IMAGE_LINK_RE, md, self.getConfig("image_dir"))
md.inlinePatterns.register(processor, "image_path", 140)
2 changes: 1 addition & 1 deletion muckraker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class Issue(BaseModel):
body: str = Field(max_length=MAX_BODY_LEN)
fonts: Optional[IssueFonts] = IssueFonts()

@model_validator(mode='before')
@model_validator(mode="before")
@classmethod
def validate_to_json(cls, value):
if isinstance(value, str):
Expand Down
Loading