Skip to content

Commit

Permalink
Make PyMuPDF always log to stderr
Browse files Browse the repository at this point in the history
PyMUPDF logs to stdout by default, which is problematic because we use
the stdout of the conversion process to read the pixel stream of a
document.

Make PyMuPDF always log to stderr, by setting the following environment
variables: PYMUPDF_MESSAGE and PYMUPDF_LOG.

Fixes #877
  • Loading branch information
apyrgio committed Aug 9, 2024
1 parent 08f03b4 commit 3f86e7b
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 8 deletions.
11 changes: 11 additions & 0 deletions dangerzone/conversion/doc_to_pixels.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
import sys
from typing import Dict, Optional

# XXX: PyMUPDF logs to stdout by default [1]. The PyMuPDF devs provide a way [2] to log to
# stderr, but it's based on environment variables. These envvars are consulted at import
# time [3], so we have to set them here, before we import `fitz`.
#
# [1] https://github.com/freedomofpress/dangerzone/issues/877
# [2] https://github.com/pymupdf/PyMuPDF/issues/3135#issuecomment-1992625724
# [3] https://github.com/pymupdf/PyMuPDF/blob/9717935eeb2d50d15440d62575878214226795f9/src/__init__.py#L62-L63
os.environ["PYMUPDF_MESSAGE"] = "fd:2"
os.environ["PYMUPDF_LOG"] = "fd:2"


import fitz
import magic

Expand Down
25 changes: 17 additions & 8 deletions dangerzone/conversion/pixels_to_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@

from .common import DEFAULT_DPI, DangerzoneConverter, get_tessdata_dir, running_on_qubes

# XXX: PyMUPDF logs to stdout by default [1]. The PyMuPDF devs provide a way [2] to log to
# stderr, but it's based on environment variables. These envvars are consulted at import
# time [3], so we have to set them here, before we import `fitz`.
#
# [1] https://github.com/freedomofpress/dangerzone/issues/877
# [2] https://github.com/pymupdf/PyMuPDF/issues/3135#issuecomment-1992625724
# [3] https://github.com/pymupdf/PyMuPDF/blob/9717935eeb2d50d15440d62575878214226795f9/src/__init__.py#L62-L63
os.environ["PYMUPDF_MESSAGE"] = "fd:2"
os.environ["PYMUPDF_LOG"] = "fd:2"


class PixelsToPDF(DangerzoneConverter):
async def convert(
Expand Down Expand Up @@ -50,14 +60,13 @@ async def convert(
# The first few operations happen on a per-page basis.
page_size = len(untrusted_rgb_data)
total_size += page_size
with contextlib.redirect_stdout(io.StringIO()):
pixmap = fitz.Pixmap(
fitz.Colorspace(fitz.CS_RGB),
width,
height,
untrusted_rgb_data,
False,
)
pixmap = fitz.Pixmap(
fitz.Colorspace(fitz.CS_RGB),
width,
height,
untrusted_rgb_data,
False,
)
pixmap.set_dpi(DEFAULT_DPI, DEFAULT_DPI)
if ocr_lang: # OCR the document
self.update_progress(
Expand Down

0 comments on commit 3f86e7b

Please sign in to comment.