Skip to content

Commit

Permalink
Small table fix
Browse files Browse the repository at this point in the history
  • Loading branch information
VikParuchuri committed May 1, 2024
1 parent d22c5a5 commit ba0df58
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from collections import defaultdict

from tqdm import tqdm
import pypdfium2 as pdfium

from marker.convert import convert_single_pdf
from marker.logger import configure_logging
Expand All @@ -14,7 +15,6 @@
import os
import subprocess
import shutil
import fitz as pymupdf
from tabulate import tabulate

configure_logging()
Expand Down Expand Up @@ -62,7 +62,7 @@ def main():
reference = f.read()

pdf_filename = os.path.join(args.in_folder, fname)
doc = pymupdf.open(pdf_filename)
doc = pdfium.PdfDocument(pdf_filename)
pages[fname] = len(doc)

for method in methods:
Expand Down
2 changes: 1 addition & 1 deletion marker/benchmark/scoring.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import math

from rapidfuzz import fuzz, distance
from rapidfuzz import fuzz
import re

CHUNK_MIN_CHARS = 25
Expand Down
2 changes: 1 addition & 1 deletion marker/cleaners/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def create_new_tables(blocks: List[Page]):
table_rows.append(row)

# Don't render tables if they will be too large
if max([len("".join(r)) for r in table_rows]) > 300 or len(table_rows[0]) > 8 or len(table_rows[0]) < 2:
if max([len("".join(r)) for r in table_rows]) > 300 or len(table_rows[0]) > 8:
continue

new_text = tabulate(table_rows, headers="firstrow", tablefmt="github")
Expand Down

0 comments on commit ba0df58

Please sign in to comment.