From 4ce025c55352ca0df6ef5638169fa15d70f15dff Mon Sep 17 00:00:00 2001
From: Vik Paruchuri <vik.paruchuri@gmail.com>
Date: Fri, 22 Dec 2023 16:02:10 -0800
Subject: [PATCH] Clarify purpose in README

---
 README.md                          | 19 +++++++++++++------
 benchmark.py                       |  7 ++++---
 pyproject.toml                     |  2 +-
 scripts/verify_benchmark_scores.py |  2 +-
 4 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 65574fa..d87dfd7 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,17 @@
 # Texify
 
-Texify converts equations and surrounding text into markdown and LaTeX that can be rendered by MathJax ($$ and $ are delimiters).  It will work with images or pdfs, and can run on CPU, GPU, or MPS.
+Texify is an OCR model that converts images or pdfs containing math into markdown and LaTeX that can be rendered by MathJax ($$ and $ are delimiters).  It can run on CPU, GPU, or MPS.
 
 https://github.com/VikParuchuri/texify/assets/913340/882022a6-020d-4796-af02-67cb77bc084c
 
+Texify can work with block equations, or equations mixed with text (inline).  It will convert both the equations and the text.
+
 The closest open source comparisons to texify are [pix2tex](https://github.com/lukas-blecher/LaTeX-OCR) and [nougat](https://github.com/facebookresearch/nougat), although they're designed for different purposes:
 
-- Pix2tex is designed for block LaTeX equations, and hallucinates more on text.  Texify can work with inline equations and text.
-- Nougat is designed to OCR entire pages, and hallucinates more on small images. Texify is optimized for equations and small page regions.
+- Pix2tex is designed only for block LaTeX equations, and hallucinates more on text.
+- Nougat is designed to OCR entire pages, and hallucinates more on small images only containing math.
 
-Pix2tex is trained on im2latex, and nougat is trained on arxiv.  Texify is trained on a broader set of web data, and works on a range of images.
+Pix2tex is trained on im2latex, and nougat is trained on arxiv.  Texify is trained on a more diverse set of web data, and works on a range of images.
 
 See more details in the [benchmarks](#benchmarks) section.
 
@@ -37,9 +39,13 @@ where the integral over the surface of cell $\mathcal{C}_ {j}$ only depends on $
 
 # Installation
 
-This has been tested on Mac and Linux (Ubuntu and Debian).  You'll need python 3.10+ and PyTorch. You may need to install the CPU version of torch first if you're not using a Mac or a GPU machine.  See [here](https://pytorch.org/get-started/locally/) for more details.
+You'll need python 3.10+ and PyTorch. You may need to install the CPU version of torch first if you're not using a Mac or a GPU machine.  See [here](https://pytorch.org/get-started/locally/) for more details.
+
+Install with:
 
+```
 `pip install texify`
+```
 
 Model weights will automatically download the first time you run it.
 
@@ -97,6 +103,7 @@ If you want to develop texify, you can install it manually:
 
 OCR is complicated, and texify is not perfect.  Here are some known limitations:
 
+- The OCR is dependent on how you crop the image.  If you get bad results, try a different selection/crop.  Or try changing the `TEMPERATURE` setting.
 - Texify will OCR equations and surrounding text, but is not good for general purpose OCR.  Think sections of a page instead of a whole page.
 - Texify was mostly trained with 96 DPI images, and only at a max 420x420 resolution.  Very wide or very tall images may not work well.
 - It works best with English, although it should support other languages with similar character sets.
@@ -120,7 +127,7 @@ Although this makes the benchmark results biased, it does seem like a good compr
 |---------|--------------|--------------|-----------------|
 | pix2tex | 0.382659     | 0.543363     | 0.352533        |
 | nougat  | 0.697667     | 0.668331     | 0.288159        |
-| texify  | **0.837895** | **0.865492** | **0.0842209**   |
+| texify  | **0.842349** | **0.885731** | **0.0651534**   |
 
 ## Running your own benchmarks
 
diff --git a/benchmark.py b/benchmark.py
index 519119d..7b2b824 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -6,6 +6,7 @@
 
 import evaluate
 from tabulate import tabulate
+from tqdm import tqdm
 
 from texify.inference import batch_inference
 from texify.model.model import load_model
@@ -62,7 +63,7 @@ def inference_texify(source_data, model, processor):
     images = load_images(source_data)
 
     write_data = []
-    for i in range(0, len(images), settings.BATCH_SIZE):
+    for i in tqdm(range(0, len(images), settings.BATCH_SIZE), desc="Texify inference"):
         batch = images[i:i+settings.BATCH_SIZE]
         text = batch_inference(batch, model, processor)
         for j, t in enumerate(text):
@@ -78,7 +79,7 @@ def inference_pix2tex(source_data):
 
     images = load_images(source_data)
     write_data = []
-    for i in range(len(images)):
+    for i in tqdm(range(len(images)), desc="Pix2tex inference"):
         try:
             text = model(images[i])
         except ValueError:
@@ -127,7 +128,7 @@ def inference_nougat(source_data, batch_size=1):
         shuffle=False,
     )
 
-    for idx, sample in enumerate(dataloader):
+    for idx, sample in tqdm(enumerate(dataloader), desc="Nougat inference", total=len(dataloader)):
         model.config.max_length = settings.MAX_TOKENS
         model_output = model.inference(image_tensors=sample, early_stopping=False)
         output = [markdown_compatible(o) for o in model_output["predictions"]]
diff --git a/pyproject.toml b/pyproject.toml
index 870948e..a77aebb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "texify"
-version = "0.1.4"
+version = "0.1.6"
 description = "OCR for latex images"
 authors = ["Vik Paruchuri <vik.paruchuri@gmail.com>"]
 readme = "README.md"
diff --git a/scripts/verify_benchmark_scores.py b/scripts/verify_benchmark_scores.py
index 494ba8c..c5f7868 100644
--- a/scripts/verify_benchmark_scores.py
+++ b/scripts/verify_benchmark_scores.py
@@ -8,7 +8,7 @@ def verify_scores(file_path):
 
     scores = data["texify"]["scores"]
 
-    if scores["bleu"] <= 0.7 or scores["meteor"] <= 0.7 or scores["edit"] > 0.2:
+    if scores["bleu"] <= 0.6 or scores["meteor"] <= 0.6 or scores["edit"] > 0.2:
         print(scores)
         raise ValueError("Scores do not meet the required threshold")