Skip to content

Commit

Permalink
WIP: Timeouts
Browse files Browse the repository at this point in the history
  • Loading branch information
apyrgio committed Apr 3, 2024
1 parent b284a55 commit f0d39bd
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 22 deletions.
90 changes: 79 additions & 11 deletions dangerzone/isolation_provider/base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import contextlib
import logging
import os
import subprocess
import sys
import tempfile
from abc import ABC, abstractmethod
from pathlib import Path
from typing import IO, Callable, Optional
from typing import IO, Callable, Iterator, Optional

from colorama import Fore, Style

Expand Down Expand Up @@ -69,20 +70,14 @@ def convert(
self.progress_callback = progress_callback
document.mark_as_converting()
try:
conversion_proc = self.start_doc_to_pixels_proc()
with tempfile.TemporaryDirectory() as t:
Path(f"{t}/pixels").mkdir()
self.doc_to_pixels(document, t, conversion_proc)
conversion_proc.wait(3)
# TODO: validate convert to pixels output
with self.doc_to_pixels_proc(document) as conversion_proc:
self.doc_to_pixels(document, t, conversion_proc)
self.pixels_to_pdf(document, t, ocr_lang)
document.mark_as_safe()
if document.archive_after_conversion:
document.archive()
except errors.ConverterProcException as e:
exception = self.get_proc_exception(conversion_proc)
self.print_progress(document, True, str(exception), 0)
document.mark_as_failed()
except errors.ConversionException as e:
self.print_progress(document, True, str(e), 0)
document.mark_as_failed()
Expand Down Expand Up @@ -176,7 +171,15 @@ def print_progress(

def get_proc_exception(self, p: subprocess.Popen) -> Exception:
"""Returns an exception associated with a process exit code"""
error_code = p.wait(3)
timeout = 3
try:
error_code = p.wait(timeout)
except subprocess.TimeoutExpired:
return errors.UnexpectedConversionError(
"Encountered an I/O error during document to pixels conversion,"
f" but the conversion process is still running after {timeout} seconds"
f" (PID: {p.pid})"
)
return errors.exception_from_error_code(error_code)

@abstractmethod
Expand All @@ -192,9 +195,74 @@ def sanitize_conversion_str(self, untrusted_conversion_str: str) -> str:
return armor_start + conversion_string + armor_end

@abstractmethod
def start_doc_to_pixels_proc(self) -> subprocess.Popen:
def start_doc_to_pixels_proc(self, document: Document) -> subprocess.Popen:
pass

@abstractmethod
def terminate_doc_to_pixels_proc(
self, document: Document, p: subprocess.Popen
) -> None:
pass

def ensure_stop_doc_to_pixels_proc(
self, document: Document, p: subprocess.Popen
) -> None:
# At this point, we have gained what we want from the process, and we only want
# to free resources.
timeout_grace = 3
timeout_force = 2

# Check if the process has already been waited, probably by the error handling
# logic of self.get_proc_exception().
log.info("Checking")
if p.returncode is not None:
return

# Check if the process completed successfully.
log.debug("Polling")
ret = p.poll()
if ret is not None:
return

# At this point, the process is still running. This may be benign, as we haven't
# waited for it yet. Terminate it gracefully.
# FIXME: This does not work for Qubes.
log.debug("Terminating")
self.terminate_doc_to_pixels_proc(document, p)
try:
p.wait(timeout_grace)
log.debug("Terminated")
except subprocess.TimeoutExpired:
log.warning(
f"Conversion process did not terminate gracefully after {timeout_grace}"
" seconds. Killing it forcefully..."
)

# Forcefully kill the running process.
p.kill()
try:
p.wait(timeout_force)
except subprocess.TimeoutExpired:
log.warning(
"Conversion process did not terminate forcefully after"
f" {timeout_force} seconds. Resources may linger..."
)

@contextlib.contextmanager
def doc_to_pixels_proc(self, document: Document) -> Iterator[subprocess.Popen]:
p = self.start_doc_to_pixels_proc(document)
try:
yield p
except errors.ConverterProcException as e:
exception = self.get_proc_exception(p)
self.print_progress(document, True, str(exception), 0)
document.mark_as_failed()
finally:
import time

time.sleep(3)
self.ensure_stop_doc_to_pixels_proc(document, p)


# From global_common:

Expand Down
26 changes: 23 additions & 3 deletions dangerzone/isolation_provider/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@ def is_container_installed() -> bool:

return installed

def doc_to_pixels_container_name(self, document: Document) -> str:
return f"doc-to-pixels-{document.id}"

def pixels_to_pdf_container_name(self, document: Document) -> str:
return f"pixels-to-pdf-{document.id}"

def assert_field_type(self, val: Any, _type: object) -> None:
# XXX: Use a stricter check than isinstance because `bool` is a subclass of
# `int`.
Expand Down Expand Up @@ -172,6 +178,7 @@ def exec(
def exec_container(
self,
command: List[str],
name: str,
extra_args: List[str] = [],
) -> subprocess.Popen:
container_runtime = self.get_runtime()
Expand All @@ -187,6 +194,7 @@ def exec_container(
security_args += ["--cap-drop", "all"]
user_args = ["-u", "dangerzone"]
enable_stdin = ["-i"]
set_name = ["--name", name]

prevent_leakage_args = ["--rm"]

Expand All @@ -196,6 +204,7 @@ def exec_container(
+ security_args
+ prevent_leakage_args
+ enable_stdin
+ set_name
+ extra_args
+ [self.CONTAINER_NAME]
+ command
Expand All @@ -204,6 +213,10 @@ def exec_container(
args = [container_runtime] + args
return self.exec(args)

def kill_container(self, name: str) -> None:
container_runtime = self.get_runtime()
self.exec([container_runtime, "kill", name])

def pixels_to_pdf(
self, document: Document, tempdir: str, ocr_lang: Optional[str]
) -> None:
Expand All @@ -222,7 +235,8 @@ def pixels_to_pdf(
f"OCR_LANGUAGE={ocr_lang}",
]

pixels_to_pdf_proc = self.exec_container(command, extra_args)
name = self.pixels_to_pdf_container_name(document)
pixels_to_pdf_proc = self.exec_container(command, name, extra_args)
if pixels_to_pdf_proc.stdout:
for line in pixels_to_pdf_proc.stdout:
self.parse_progress_trusted(document, line.decode())
Expand Down Expand Up @@ -251,14 +265,20 @@ def pixels_to_pdf(
)
shutil.move(container_output_filename, document.output_filename)

def start_doc_to_pixels_proc(self) -> subprocess.Popen:
def start_doc_to_pixels_proc(self, document: Document) -> subprocess.Popen:
# Convert document to pixels
command = [
"/usr/bin/python3",
"-m",
"dangerzone.conversion.doc_to_pixels",
]
return self.exec_container(command)
name = self.doc_to_pixels_container_name(document)
return self.exec_container(command, name=name)

def terminate_doc_to_pixels_proc(
self, document: Document, p: subprocess.Popen
) -> None:
self.kill_container(self.doc_to_pixels_container_name(document))

def get_max_parallel_conversions(self) -> int:
# FIXME hardcoded 1 until length conversions are better handled
Expand Down
8 changes: 7 additions & 1 deletion dangerzone/isolation_provider/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,14 @@ def pixels_to_pdf(
) -> None:
pass

def start_doc_to_pixels_proc(self) -> subprocess.Popen:
def start_doc_to_pixels_proc(self, document: Document) -> subprocess.Popen:
return subprocess.Popen("True")

def terminate_doc_to_pixels_proc(
self, document: Document, p: subprocess.Popen
) -> None:
# FIXME: What to do here?
pass

def get_max_parallel_conversions(self) -> int:
return 1
9 changes: 8 additions & 1 deletion dangerzone/isolation_provider/qubes.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def print_progress_wrapper(error: bool, text: str, percentage: float) -> None:
def get_max_parallel_conversions(self) -> int:
return 1

def start_doc_to_pixels_proc(self) -> subprocess.Popen:
def start_doc_to_pixels_proc(self, document: Document) -> subprocess.Popen:
dev_mode = getattr(sys, "dangerzone_dev", False) == True
if dev_mode:
# Use dz.ConvertDev RPC call instead, if we are in development mode.
Expand All @@ -77,6 +77,13 @@ def start_doc_to_pixels_proc(self) -> subprocess.Popen:

return p

def terminate_doc_to_pixels_proc(
self, document: Document, p: subprocess.Popen
) -> None:
# TODO: Make sure to close the standard streams as well. See:
# https://github.com/freedomofpress/dangerzone/issues/563#issuecomment-2034803232
p.terminate()

def teleport_dz_module(self, wpipe: IO[bytes]) -> None:
"""Send the dangerzone module to another qube, as a zipfile."""
# Grab the absolute file path of the dangerzone module.
Expand Down
21 changes: 15 additions & 6 deletions tests/isolation_provider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_max_pages_server_enforcement(
provider.progress_callback = mocker.MagicMock()
doc = Document(pdf_11k_pages)

p = provider.start_doc_to_pixels_proc()
p = provider.start_doc_to_pixels_proc(doc)
with pytest.raises(errors.ConverterProcException):
provider.doc_to_pixels(doc, tmpdir, p)
assert provider.get_proc_exception(p) == errors.MaxPagesException
Expand All @@ -54,7 +54,7 @@ def test_max_pages_client_enforcement(
"dangerzone.conversion.errors.MAX_PAGES", 1
) # sample_doc has 4 pages > 1
doc = Document(sample_doc)
p = provider.start_doc_to_pixels_proc()
p = provider.start_doc_to_pixels_proc(doc)
with pytest.raises(errors.MaxPagesException):
provider.doc_to_pixels(doc, tmpdir, p)

Expand All @@ -67,10 +67,19 @@ def test_max_dimensions(
tmpdir: str,
) -> None:
provider.progress_callback = mocker.MagicMock()
p = provider.start_doc_to_pixels_proc()
doc = Document(sample_bad_width)
p = provider.start_doc_to_pixels_proc(doc)
with pytest.raises(errors.MaxPageWidthException):
provider.doc_to_pixels(Document(sample_bad_width), tmpdir, p)
provider.doc_to_pixels(doc, tmpdir, p)

p = provider.start_doc_to_pixels_proc()
doc = Document(sample_bad_height)
p = provider.start_doc_to_pixels_proc(doc)
with pytest.raises(errors.MaxPageHeightException):
provider.doc_to_pixels(Document(sample_bad_height), tmpdir, p)
provider.doc_to_pixels(doc, tmpdir, p)

def test_graceful_termination(
self,
provider: base.IsolationProvider,
mocker: MockerFixture,
) -> None:
pass

0 comments on commit f0d39bd

Please sign in to comment.