kompoth · kompoth · Aug 29, 2024 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -1,5 +1,8 @@
 name: Deploy
-on: workflow_dispatch
+on:
+  workflow_dispatch:
+  push:
+    branches: [ main ]
 
 jobs:
   deploy:

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -9,9 +9,9 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - name: Checkout
-      uses: actions/checkout@v3  
+      uses: actions/checkout@v4
     - name: Set up Python 
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:  
         python-version: "3.12" 
     - name: Check with linter 
@@ -25,12 +25,15 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - name: Checkout
-      uses: actions/checkout@v3  
-    - name: Set up Python 
-      uses: actions/setup-python@v4
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
       with:  
         python-version: "3.12" 
     - name: Install dependencies
-      run: pip install -r requirements.txt
+      run: |
+        pip install poetry==1.8.3
+        poetry export --without-hashes --with=dev --format=requirements.txt > requirements.txt
+        pip install --no-cache-dir -r requirements.txt
     - name: Run tests 
       run: pytest
diff --git a/.gitignore b/.gitignore
@@ -13,4 +13,5 @@ build/
 *.egg
 
 # Heavy
+*.sqlite
 *.pdf
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,6 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.5.2
+    hooks:
+      - id: ruff
+      - id: ruff-format
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -0,0 +1,17 @@
+# Developer notes
+
+## Version bumping
+
+Currently project version is tracked in two locations: `pyproject.toml` and `muckraker/__init__.py`.
+Those two MUST be synchronised. To do that automatically, use Poetry with
+[this plugin](https://pypi.org/project/poetry-bumpversion/).
+
+## Linting
+
+Currently project uses Ruff to perform static checks. To make sure that your commits will be accepted:
+
+- Install `ruff` and `pre-commit` tools.
+- Run `pre-commit install` to install our preconfigured hook in your local repo.
+- Run `pre-commit run --all-files` to check the whole repo.
+
+Now each time you commit your changes will be checked against the Ruff rules.
diff --git a/Dockerfile b/Dockerfile
@@ -1,12 +1,23 @@
-FROM python:3.11-slim
+FROM python:3.12-slim AS builder
+
+WORKDIR /build
+
+COPY ./pyproject.toml ./poetry.lock /build
+
+RUN pip install poetry==1.8.3
+RUN poetry export --without-hashes --only=main --format=requirements.txt > requirements.txt
+
+
+
+FROM python:3.12-slim AS runner
 
 WORKDIR /app
 
 RUN apt-get update 
 RUN apt-get install -y libpango-1.0-0 libpangoft2-1.0-0
 
-COPY ./requirements.txt /app
-RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt
+COPY --from=builder /build/requirements.txt /app
+RUN pip install --no-cache-dir -r /app/requirements.txt
 
 COPY ./muckraker /app/muckraker
 

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,4 +1,3 @@
-version: '3'
 services:
 
   app:

diff --git a/muckraker/__init__.py b/muckraker/__init__.py
@@ -1 +1 @@
-__version__ = "0.1"
+__version__ = "0.1.0"
diff --git a/muckraker/main.py b/muckraker/main.py
@@ -1,20 +1,21 @@
-import json
+import uuid
+from asyncio import gather
 from io import BytesIO
 from pathlib import Path
-from shutil import rmtree
-from tempfile import gettempdir, mkdtemp
+from tempfile import TemporaryDirectory
 from typing import List
 
-from fastapi import Depends, FastAPI, File, Response, UploadFile
-from fastapi.encoders import jsonable_encoder
-from fastapi.exceptions import HTTPException, RequestValidationError
+from fastapi import FastAPI, File, Response, UploadFile, status
+from fastapi.exceptions import HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 
 from . import __version__
 from .models import Issue
 from .render import render_issue
+from .sqlcache import CacheError, SQLCache
 
+CACHE_PATH = "cache.sqlite"
 MAX_IMAGE_NUM = 4
 MAX_IMAGE_SIZE = 2 * 1024 * 1024  # 2 MB
 IMAGE_BATCH = 1024
@@ -28,109 +29,94 @@
     root_path="/api",
     version=__version__,
     summary="A vintage gazette generator for your creative projects.",
-    openapi_tags=tags_metadata
+    openapi_tags=tags_metadata,
 )
 
 # Configure CORS policy
 origins = ["*"]
 app.add_middleware(
     CORSMiddleware,
     allow_origins=origins,
-    allow_methods=["POST", "PATCH", "GET"]
+    allow_methods=["POST", "PATCH", "GET"],
 )
 
-
-def get_dir_path(issue_id: str):
-    dir_path = Path(gettempdir()) / f"muckraker{issue_id}"
-    if not (dir_path.exists() and dir_path.is_dir()):
-        raise HTTPException(status_code=404, detail="No data")
-    return dir_path
-
-
-@app.exception_handler(RequestValidationError)
-def clear_tempdir_handler(request, exc):
-    issue_id = request.path_params.get("issue_id")
-    if issue_id:
-        rmtree(get_dir_path(issue_id))
-    return JSONResponse(
-        status_code=422,
-        content=jsonable_encoder({"detail": exc.errors()}),
-    )
+cache = SQLCache(CACHE_PATH)
 
 
 @app.post("/issue/", tags=["issue"])
-def upload_issue_data(issue: Issue):
-    dir_path = mkdtemp(prefix="muckraker")
-    issue_path = Path(dir_path) / "issue.json"
-    with open(issue_path, "w") as fd:
-        fd.write(issue.model_dump_json())
-    return {"issue_id": dir_path.split("muckraker")[-1]}
+async def upload_issue_data(issue: Issue) -> dict:
+    issue_id = uuid.uuid4().hex
+    await cache.put_issue(issue_id, issue.model_dump())
+    return {"issue_id": issue_id}
 
 
 @app.patch("/issue/{issue_id}", tags=["issue"])
-def upload_images(
-    dir_path: Path = Depends(get_dir_path),
-    images: List[UploadFile] = File()
+async def upload_images(
+    issue_id: str,
+    images: List[UploadFile] = File(),
 ):
-    # Check if there are already images
-    uploaded_files = dir_path.glob('**/*')
-    uploaded_images = [
-        x for x in uploaded_files
-        if x.is_file() and x.suffix in IMAGE_SUFFIXES
-    ]
-    if len(uploaded_images) > 0:
-        rmtree(dir_path)
-        raise HTTPException(429, detail="To many uploads")
+    issue = await cache.get_issue(issue_id)
+    if issue is None:
+        raise HTTPException(status.HTTP_404_NOT_FOUND, detail="Issue not found")
 
     # Validate number of images
-    if len(images) > MAX_IMAGE_NUM:
-        rmtree(dir_path)
-        raise HTTPException(413, detail="To many images")
+    image_num = await cache.count_images(issue_id)
+    if image_num + len(images) > MAX_IMAGE_NUM:
+        raise HTTPException(status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, detail="Too many images")
 
     # Validate images
     for image in images:
         if image.content_type not in ACCEPTED_FILE_TYPES:
             detail = f"Invalid file type: {image.filename}"
-            rmtree(dir_path)
-            raise HTTPException(415, detail=detail)
+            raise HTTPException(status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, detail=detail)
         if image.size > MAX_IMAGE_SIZE:
             detail = f"File is too large: {image.filename}"
-            rmtree(dir_path)
-            raise HTTPException(413, detail=detail)
-
-    # Save images to the disk
-    for image in images:
-        image_path = dir_path / image.filename
-        with open(image_path, "wb") as fd:
-            fd.write(image.file.read())
+            raise HTTPException(status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, detail=detail)
+
+    # Save images
+    tasks = [cache.put_image(issue_id, image.filename, image.file.read()) for image in images]
+    try:
+        await gather(*tasks)
+    except CacheError as err:
+        raise HTTPException(status.HTTP_400_BAD_REQUEST, detail=str(err))
     return JSONResponse(content={"filename": image.filename})
 
 
 @app.get("/issue/{issue_id}", tags=["issue"])
-def get_issue(dir_path: Path = Depends(get_dir_path)):
+async def get_issue(issue_id: str):
     # Read issue data
-    with open(dir_path / "issue.json", "r") as fd:
-        issue_dict = json.load(fd)
-
-    # Render PDF and write it to buffer
-    pdf_path = dir_path / "out.pdf"
-    render_issue(
-        page=issue_dict["page"],
-        header=issue_dict["header"],
-        body=issue_dict["body"],
-        fonts=issue_dict["fonts"],
-        output=pdf_path,
-        image_dir=dir_path
-    )
-    with open(pdf_path, "rb") as fd:
-        buf = BytesIO(fd.read())
-
-    # Delete tempdir
-    rmtree(dir_path)
+    issue_dict = await cache.get_issue(issue_id)
+    if issue_dict is None:
+        raise HTTPException(status.HTTP_404_NOT_FOUND, detail="Issue not found")
+
+    with TemporaryDirectory() as tmp_dir_name:
+        dir_path = Path(tmp_dir_name)
+
+        # Extract images
+        async for filename, image in cache.load_images(issue_id):
+            image_path = dir_path / filename
+            with open(image_path, "wb") as fd:
+                fd.write(image)
+
+        # Render PDF and write it to buffer
+        pdf_path = dir_path / "out.pdf"
+        render_issue(
+            page=issue_dict["page"],
+            header=issue_dict["header"],
+            body=issue_dict["body"],
+            fonts=issue_dict["fonts"],
+            output=pdf_path,
+            image_dir=dir_path,
+        )
+        with open(pdf_path, "rb") as fd:
+            buf = BytesIO(fd.read())
 
     # Get pdf from buffer
     pdf_bytes = buf.getvalue()
     buf.close()
 
-    headers = {'Content-Disposition': 'attachment; filename="out.pdf"'}
-    return Response(pdf_bytes, headers=headers, media_type='application/pdf')
+    # Delete cached data
+    await cache.delete_issue(issue_id)
+
+    headers = {"Content-Disposition": 'attachment; filename="out.pdf"'}
+    return Response(pdf_bytes, headers=headers, media_type="application/pdf")
diff --git a/muckraker/md_extensions.py b/muckraker/md_extensions.py
@@ -8,7 +8,7 @@
 
 
 class FilterExtension(Extension):
-    """ Ignore some tags """
+    """Ignore some tags"""
 
     def extendMarkdown(self, md: Markdown) -> None:
         md.inlinePatterns.deregister("link")
@@ -26,16 +26,14 @@ def extendMarkdown(self, md: Markdown) -> None:
 
 
 class ImagePathProcessor(ImageInlineProcessor):
-    """ Return an `img` element from the given match. """
+    """Return an `img` element from the given match."""
 
-    def __init__(self, pattern: str, md: Markdown, image_dir: str = ""):
+    def __init__(self, pattern: str, md: Markdown, image_dir: str = "") -> None:
         super().__init__(pattern, md)
         self.image_dir = image_dir
 
     def handleMatch(
-        self,
-        m: re.Match[str],
-        data: str
+        self, m: re.Match[str], data: str
     ) -> tuple[etree.Element | None, int | None, int | None]:
         el, start, ind = super().handleMatch(m, data)
         src_path = Path(el.get("src"))
@@ -45,9 +43,9 @@ def handleMatch(
 
 
 class ImagePathExtension(Extension):
-    """ Modify image paths so that Weasyprint could handle them """
+    """Modify image paths so that Weasyprint could handle them"""
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs) -> None:
         self.config = {"image_dir": ["", "Images root directory"]}
         super().__init__(**kwargs)
 
@@ -56,9 +54,5 @@ def extendMarkdown(self, md: Markdown) -> None:
         md.inlinePatterns.deregister("image_reference")
         md.inlinePatterns.deregister("short_image_ref")
 
-        processor = ImagePathProcessor(
-            IMAGE_LINK_RE,
-            md,
-            self.getConfig("image_dir")
-        )
+        processor = ImagePathProcessor(IMAGE_LINK_RE, md, self.getConfig("image_dir"))
         md.inlinePatterns.register(processor, "image_path", 140)
diff --git a/muckraker/models.py b/muckraker/models.py
@@ -42,7 +42,7 @@ class Issue(BaseModel):
     body: str = Field(max_length=MAX_BODY_LEN)
     fonts: Optional[IssueFonts] = IssueFonts()
 
-    @model_validator(mode='before')
+    @model_validator(mode="before")
     @classmethod
     def validate_to_json(cls, value):
         if isinstance(value, str):
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,4 +13,5 @@ build/ @@
     *.egg
     # Heavy
+    *.sqlite
     *.pdf