Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add --format Option for Custom Page Sizes in x2pdf Command #65

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
10 changes: 7 additions & 3 deletions pdfly/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,7 @@ def compress(
) -> None:
pdfly.compress.main(pdf, output)


@entry_point.command(name="x2pdf", help=pdfly.x2pdf.__doc__) # type: ignore[misc]
@entry_point.command(name="x2pdf", help=pdfly.x2pdf.__doc__)
def x2pdf(
x: List[Path],
output: Annotated[
Expand All @@ -240,5 +239,10 @@ def x2pdf(
writable=True,
),
],
format: str = typer.Option(
None,
"--format",
help="Optional page format for output PDF: Letter, A4-portrait, A4-landscape, or custom dimensions (e.g., 210x297). If omitted, no format is enforced."
),
) -> int:
return pdfly.x2pdf.main(x, output)
return pdfly.x2pdf.main(x, output, format)
1 change: 1 addition & 0 deletions pdfly/up2.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ def main(pdf: Path, output: Path) -> None:
with open(output, "wb") as fp:
writer.write(fp)
print("done.")

81 changes: 58 additions & 23 deletions pdfly/x2pdf.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,35 @@
"""Convert one or more files to PDF. Each file is a page."""

import re

from pathlib import Path
from typing import List

from fpdf import FPDF
from PIL import Image
from rich.console import Console

def get_page_size(format: str):
Lucas-C marked this conversation as resolved.
Show resolved Hide resolved
"""Get page dimensions based on format."""
sizes = {
"A4": (210, 297), "A3": (297, 420), "A2": (420, 594),
"A1": (594, 841), "A0": (841, 1189), "Letter": (215.9, 279.4),
"Legal": (215.9, 355.6)
}
match = re.match(r"(A\d|B\d|C\d|Letter|Legal)(-(landscape|portrait))?$", format, re.IGNORECASE)
if match:
size_key = match.group(1).upper()
if size_key in sizes:
width, height = sizes[size_key]
orientation = match.group(3)
if orientation == "landscape":
return (height, width)
elif orientation == "portrait":
return (width, height)
Comment on lines +27 to +28
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
elif orientation == "portrait":
return (width, height)

Just a minor code improvement suggestion.

This change is not mandatory before merging this PR.

else:
return (width, height)
raise ValueError(f"Invalid or unsupported page format provided: {format}")


def px_to_mm(px: float) -> float:
px_in_inch = 72
Expand All @@ -16,39 +39,51 @@ def px_to_mm(px: float) -> float:
return mm


def image_to_pdf(pdf: FPDF, x: Path) -> None:
def image_to_pdf(pdf: FPDF, x: Path, page_size: tuple) -> None:
cover = Image.open(x)
width: float
height: float
width, height = cover.size
cover.close()

# Convert dimensions to millimeters
width, height = px_to_mm(width), px_to_mm(height)
page_width, page_height = page_size

# Scale image to fit page size while maintaining aspect ratio
scale_factor = min(page_width / width, page_height / height)
scaled_width, scaled_height = width * scale_factor, height * scale_factor

pdf.add_page(format=(width, height))
pdf.image(x, x=0, y=0)
x_offset = (page_width - scaled_width) / 2
y_offset = (page_height - scaled_height) / 2

pdf.add_page(format=page_size)
pdf.image(str(x), x=x_offset, y=y_offset, w=scaled_width, h=scaled_height)


def main(xs: List[Path], output: Path) -> int:

def main(xs: List[Path], output: Path, format: str = None) -> int:
"""Main function to generate PDF with images fitted to specified page format."""
console = Console()
pdf = FPDF(unit="mm")
page_size = get_page_size(format) if format else None

error_occurred = False # Flag to track if any errors happen

for x in xs:
path_str = str(x).lower()
if path_str.endswith(("doc", "docx", "odt")):
console.print("[red]Error: Cannot convert Word documents to PDF")
return 1
if not x.exists():
console.print(f"[red]Error: File '{x}' does not exist.")
return 2
if output.exists():
console.print(f"[red]Error: Output file '{output}' exist.")
return 3
pdf = FPDF(
unit="mm",
)
for x in xs:
path_str = str(x).lower()
console.print(f"Skipping unsupported file format: {x}", style="yellow")
continue
try:
image_to_pdf(pdf, x)
except Exception:
console.print(f"[red]Error: Could not convert '{x}' to a PDF.")
image_to_pdf(pdf, x, page_size)
except Exception as e:
console.print(f"Error processing {x}: {e}", style="red")
error_occurred = True

pdf.output(str(output))
return 0

if error_occurred:
console.print(f"PDF created at {output}, but some files encountered errors.", style="yellow")
return 1
else:
console.print(f"PDF created successfully at {output}", style="green")
return 0
40 changes: 40 additions & 0 deletions tests/test_x2pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,43 @@ def test_x2pdf(capsys, tmp_path: Path) -> None:
assert exit_code == 0, captured
assert captured.out == ""
assert output.exists()

def test_x2pdf_with_format(capsys, tmp_path: Path) -> None:
# Arrange
output = tmp_path / "out.pdf"
assert not output.exists()

formats_to_test = [
"Letter",
"A4-portrait",
"A4-landscape",
"210x297",
"invalid-format"
]

for format_option in formats_to_test:
# Act
exit_code = run_cli(
[
"x2pdf",
"sample-files/003-pdflatex-image/page-0-Im1.jpg",
Lucas-C marked this conversation as resolved.
Show resolved Hide resolved
"--output",
str(output),
"--format",
format_option,
]
)

# Assert
captured = capsys.readouterr()

# For valid formats, we expect a successful exit code and the output file to exist
if format_option != "invalid-format":
assert exit_code == 0, captured
assert captured.out == ""
assert output.exists()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be interesting to also validate the resulting pages dimensions.

This can be checked with pdfly pagemeta $pdf_filepath $page_index

Or using the underlying pypdf library: PdfReader(pdf: Path).mediabox/.cropbox/.artbox/.bleedbox

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you did not handle that feedback comment @mulla028 🙂

else:
# For an invalid format, we expect a non-zero exit code (indicating failure)
assert exit_code != 0
assert "Invalid format" in captured.err # Check for expected error message
output.unlink(missing_ok=True) # Clean up for the next test iteration
Loading