Skip to content

Commit

Permalink
Added pymupdf/fitz open to get page count (#225)
Browse files Browse the repository at this point in the history
  • Loading branch information
ekcomputer authored Jan 22, 2024
1 parent 3cb16f2 commit 68e92e8
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions paperqa/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
from typing import Any, BinaryIO, Coroutine, Iterator, Union

import fitz
import pypdf

StrPath = Union[str, Path]
Expand Down Expand Up @@ -63,8 +64,12 @@ def strings_similarity(s1: str, s2: str) -> float:

def count_pdf_pages(file_path: StrPath) -> int:
with open(file_path, "rb") as pdf_file:
pdf_reader = pypdf.PdfReader(pdf_file)
num_pages = len(pdf_reader.pages)
try: # try fitz by default
doc = fitz.open(file_path)
num_pages = len(doc)
except: # pypdf instead
pdf_reader = pypdf.PdfReader(pdf_file)
num_pages = len(pdf_reader.pages)
return num_pages


Expand Down

0 comments on commit 68e92e8

Please sign in to comment.