Harmonizing typer usage (#74)

Co-authored-by: Cimon Lucas (LCM) <[email protected]>
py-pdf · Nov 12, 2024 · fc94271 · fc94271
1 parent 1d4e256
commit fc94271
Show file tree

Hide file tree

Showing 3 changed files with 114 additions and 69 deletions.
diff --git a/pdfly/cli.py b/pdfly/cli.py
@@ -52,11 +52,8 @@ def extract_images(
     pdf: Annotated[
         Path,
         typer.Argument(
-            exists=True,
-            file_okay=True,
             dir_okay=False,
-            writable=False,
-            readable=True,
+            exists=True,
             resolve_path=True,
         ),
     ]
@@ -69,11 +66,8 @@ def up2(
     pdf: Annotated[
         Path,
         typer.Argument(
-            exists=True,
-            file_okay=True,
             dir_okay=False,
-            writable=False,
-            readable=True,
+            exists=True,
             resolve_path=True,
         ),
     ],
@@ -87,11 +81,8 @@ def cat(
     filename: Annotated[
         Path,
         typer.Argument(
-            exists=True,
-            file_okay=True,
             dir_okay=False,
-            writable=False,
-            readable=True,
+            exists=True,
             resolve_path=True,
         ),
     ],
@@ -111,11 +102,8 @@ def rm(
     filename: Annotated[
         Path,
         typer.Argument(
-            exists=True,
-            file_okay=True,
             dir_okay=False,
-            writable=False,
-            readable=True,
+            exists=True,
             resolve_path=True,
         ),
     ],
@@ -135,11 +123,8 @@ def metadata(
     pdf: Annotated[
         Path,
         typer.Argument(
-            exists=True,
-            file_okay=True,
             dir_okay=False,
-            writable=False,
-            readable=True,
+            exists=True,
             resolve_path=True,
         ),
     ],
@@ -159,11 +144,8 @@ def pagemeta(
     pdf: Annotated[
         Path,
         typer.Argument(
-            exists=True,
-            file_okay=True,
             dir_okay=False,
-            writable=False,
-            readable=True,
+            exists=True,
             resolve_path=True,
         ),
     ],
@@ -188,11 +170,8 @@ def extract_text(
     pdf: Annotated[
         Path,
         typer.Argument(
-            exists=True,
-            file_okay=True,
             dir_okay=False,
-            writable=False,
-            readable=True,
+            exists=True,
             resolve_path=True,
         ),
     ]
@@ -202,19 +181,16 @@ def extract_text(
 
     reader = PdfReader(str(pdf))
     for page in reader.pages:
-        print(page.extract_text())
+        typer.echo(page.extract_text())
 
 
 @entry_point.command(name="compress", help=pdfly.compress.__doc__)  # type: ignore[misc]
 def compress(
     pdf: Annotated[
         Path,
         typer.Argument(
-            exists=True,
-            file_okay=True,
             dir_okay=False,
-            writable=False,
-            readable=True,
+            exists=True,
             resolve_path=True,
         ),
     ],
@@ -231,7 +207,14 @@ def compress(
 
 @entry_point.command(name="update-offsets", help=pdfly.update_offsets.__doc__)  # type: ignore[misc]
 def update_offsets(
-    file_in: Path,
+    file_in: Annotated[
+        Path,
+        typer.Argument(
+            dir_okay=False,
+            exists=True,
+            resolve_path=True,
+        ),
+    ],
     file_out: Path,
     encoding: str = typer.Option(
         "ISO-8859-1",
@@ -246,15 +229,25 @@ def update_offsets(
 
 @entry_point.command(name="x2pdf", help=pdfly.x2pdf.__doc__)  # type: ignore[misc]
 def x2pdf(
-    x: List[Path],
+    x: List[
+        Annotated[
+            Path,
+            typer.Argument(
+                dir_okay=False,
+                exists=True,
+                resolve_path=True,
+            ),
+        ]
+    ],
     output: Annotated[
         Path,
         typer.Option(
             "-o",
             "--output",
-            exists=False,
             writable=True,
         ),
     ],
-) -> int:
-    return pdfly.x2pdf.main(x, output)
+) -> None:
+    exit_code = pdfly.x2pdf.main(x, output)
+    if exit_code:
+        raise typer.Exit(code=exit_code)
diff --git a/pdfly/x2pdf.py b/pdfly/x2pdf.py
@@ -1,10 +1,12 @@
 """Convert one or more files to PDF. Each file is a page."""
 
+from io import BytesIO
 from pathlib import Path
 from typing import List
 
 from fpdf import FPDF
 from PIL import Image
+from pypdf import PdfReader, PdfWriter
 from rich.console import Console
 
 
@@ -16,39 +18,34 @@ def px_to_mm(px: float) -> float:
     return mm
 
 
-def image_to_pdf(pdf: FPDF, x: Path) -> None:
-    cover = Image.open(x)
-    width: float
-    height: float
-    width, height = cover.size
-    cover.close()
+def image_to_pdf(filepath: Path) -> BytesIO:
+    with Image.open(filepath) as cover:
+        width, height = cover.size
     width, height = px_to_mm(width), px_to_mm(height)
-
+    pdf = FPDF(unit="mm")
     pdf.add_page(format=(width, height))
-    pdf.image(x, x=0, y=0)
+    pdf.image(filepath, x=0, y=0)
+    return BytesIO(pdf.output())
 
 
-def main(xs: List[Path], output: Path) -> int:
+def main(in_filepaths: List[Path], out_filepath: Path) -> int:
     console = Console()
-    for x in xs:
-        path_str = str(x).lower()
-        if path_str.endswith(("doc", "docx", "odt")):
-            console.print("[red]Error: Cannot convert Word documents to PDF")
-            return 1
-        if not x.exists():
-            console.print(f"[red]Error: File '{x}' does not exist.")
-            return 2
-    if output.exists():
-        console.print(f"[red]Error: Output file '{output}' exist.")
-        return 3
-    pdf = FPDF(
-        unit="mm",
-    )
-    for x in xs:
-        path_str = str(x).lower()
+    exit_code = 0
+    writer = PdfWriter()
+    for filepath in in_filepaths:
+        if filepath.name.endswith(".pdf"):
+            for page in PdfReader(filepath).pages:
+                writer.insert_page(page)
+            continue
         try:
-            image_to_pdf(pdf, x)
+            pdf_bytes = image_to_pdf(filepath)
+            new_page = PdfReader(pdf_bytes).pages[0]
+            writer.insert_page(new_page)
         except Exception:
-            console.print(f"[red]Error: Could not convert '{x}' to a PDF.")
-    pdf.output(str(output))
-    return 0
+            console.print(
+                f"[red]Error: Could not convert '{filepath}' to a PDF."
+            )
+            console.print_exception(extra_lines=1, max_frames=1)
+            exit_code += 1
+    writer.write(out_filepath)
+    return exit_code
diff --git a/tests/test_x2pdf.py b/tests/test_x2pdf.py
@@ -9,10 +9,9 @@
 from .conftest import run_cli
 
 
-def test_x2pdf(capsys, tmp_path: Path) -> None:
+def test_x2pdf_succeed_to_convert_jpg(capsys, tmp_path: Path):
     # Arrange
     output = tmp_path / "out.pdf"
-    assert not output.exists()
 
     # Act
     exit_code = run_cli(
@@ -29,3 +28,59 @@ def test_x2pdf(capsys, tmp_path: Path) -> None:
     assert exit_code == 0, captured
     assert captured.out == ""
     assert output.exists()
+
+
+def test_x2pdf_succeed_to_embed_pdfs(capsys, tmp_path: Path):
+    # Arrange
+    output = tmp_path / "out.pdf"
+
+    # Act
+    exit_code = run_cli(
+        [
+            "x2pdf",
+            "sample-files/001-trivial/minimal-document.pdf",
+            "sample-files/002-trivial-libre-office-writer/002-trivial-libre-office-writer.pdf",
+            "--output",
+            str(output),
+        ]
+    )
+
+    # Assert
+    captured = capsys.readouterr()
+    assert exit_code == 0, captured
+    assert captured.out == ""
+    assert output.exists()
+
+
+def test_x2pdf_fail_to_open_file(capsys, tmp_path: Path):
+    # Arrange & Act
+    exit_code = run_cli(
+        [
+            "x2pdf",
+            "NonExistingFile",
+            "--output",
+            str(tmp_path / "out.pdf"),
+        ]
+    )
+
+    # Assert
+    captured = capsys.readouterr()
+    assert exit_code == 1, captured
+    assert "No such file or directory" in captured.out
+
+
+def test_x2pdf_fail_to_convert(capsys, tmp_path: Path):
+    # Arrange & Act
+    exit_code = run_cli(
+        [
+            "x2pdf",
+            "README.md",
+            "--output",
+            str(tmp_path / "out.pdf"),
+        ]
+    )
+
+    # Assert
+    captured = capsys.readouterr()
+    assert exit_code == 1, captured
+    assert "Error: Could not convert 'README.md' to a PDF" in captured.out