From dfe09ebda9f0155f3c80dcee79fe4eff91a812f6 Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Mon, 2 Oct 2023 16:50:57 +0200
Subject: [PATCH 01/13] [skip ci] compute orientation on seq map

---
 doctr/models/_utils.py                        | 12 ++++------
 doctr/models/detection/predictor/pytorch.py   | 17 +++++++++----
 .../models/detection/predictor/tensorflow.py  | 15 ++++++++----
 doctr/models/kie_predictor/pytorch.py         | 24 ++++++++++++++-----
 doctr/models/kie_predictor/tensorflow.py      | 22 +++++++++++------
 doctr/models/predictor/pytorch.py             | 21 +++++++++++-----
 doctr/models/predictor/tensorflow.py          | 21 ++++++++++------
 doctr/models/zoo.py                           | 14 +++++++++++
 tests/common/test_models.py                   | 17 ++++---------
 9 files changed, 109 insertions(+), 54 deletions(-)

diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py
index 1021afdc0c..304fa9a7e8 100644
--- a/doctr/models/_utils.py
+++ b/doctr/models/_utils.py
@@ -27,13 +27,12 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
     return max(w / h, h / w)
 
 
-def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) -> float:
+def estimate_orientation(seq_map: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) -> float:
     """Estimate the angle of the general document orientation based on the
      lines of the document and the assumption that they should be horizontal.
 
     Args:
-    ----
-        img: the img to analyze
+        seq_map: the binarized image of the document
         n_ct: the number of contours used for the orientation estimation
         ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
 
@@ -41,16 +40,13 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
     -------
         the angle of the general document orientation
     """
-    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    gray_img = cv2.medianBlur(gray_img, 5)
-    thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
 
     # try to merge words in lines
-    (h, w) = img.shape[:2]
+    (h, w) = seq_map.shape[:2]
     k_x = max(1, (floor(w / 100)))
     k_y = max(1, (floor(h / 100)))
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (k_x, k_y))
-    thresh = cv2.dilate(thresh, kernel, iterations=1)
+    thresh = cv2.dilate(seq_map, kernel, iterations=1)
 
     # extract contours
     contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
diff --git a/doctr/models/detection/predictor/pytorch.py b/doctr/models/detection/predictor/pytorch.py
index 8202abca8d..34f26f03e4 100644
--- a/doctr/models/detection/predictor/pytorch.py
+++ b/doctr/models/detection/predictor/pytorch.py
@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 
-from typing import Any, List, Union
+from typing import Any, Dict, List, Tuple, Union
 
 import numpy as np
 import torch
@@ -37,8 +37,9 @@ def __init__(
     def forward(
         self,
         pages: List[Union[np.ndarray, torch.Tensor]],
+        return_maps: bool = False,
         **kwargs: Any,
-    ) -> List[np.ndarray]:
+    ) -> Union[List[Dict[str, np.ndarray]], Tuple[List[Dict[str, np.ndarray]], List[np.ndarray]]]:
         # Dimension check
         if any(page.ndim != 3 for page in pages):
             raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")
@@ -48,5 +49,13 @@ def forward(
         self.model, processed_batches = set_device_and_dtype(
             self.model, processed_batches, _params.device, _params.dtype
         )
-        predicted_batches = [self.model(batch, return_preds=True, **kwargs)["preds"] for batch in processed_batches]
-        return [pred for batch in predicted_batches for pred in batch]
+        predicted_batches = [
+            self.model(batch, return_preds=True, return_model_output=True, **kwargs) for batch in processed_batches
+        ]
+        preds = [pred for batch in predicted_batches for pred in batch["preds"]]
+        seq_maps = [
+            pred.permute(1, 2, 0).detach().cpu().numpy() for batch in predicted_batches for pred in batch["out_map"]
+        ]
+        if return_maps:
+            return preds, seq_maps
+        return preds
diff --git a/doctr/models/detection/predictor/tensorflow.py b/doctr/models/detection/predictor/tensorflow.py
index 80251ff96e..6317a61874 100644
--- a/doctr/models/detection/predictor/tensorflow.py
+++ b/doctr/models/detection/predictor/tensorflow.py
@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Tuple, Union
 
 import numpy as np
 import tensorflow as tf
@@ -37,14 +37,21 @@ def __init__(
     def __call__(
         self,
         pages: List[Union[np.ndarray, tf.Tensor]],
+        return_maps: bool = False,
         **kwargs: Any,
-    ) -> List[Dict[str, np.ndarray]]:
+    ) -> Union[List[Dict[str, np.ndarray]], Tuple[List[Dict[str, np.ndarray]], List[np.ndarray]]]:
         # Dimension check
         if any(page.ndim != 3 for page in pages):
             raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")
 
         processed_batches = self.pre_processor(pages)
         predicted_batches = [
-            self.model(batch, return_preds=True, training=False, **kwargs)["preds"] for batch in processed_batches
+            self.model(batch, return_preds=True, return_model_output=True, training=False, **kwargs)
+            for batch in processed_batches
         ]
-        return [pred for batch in predicted_batches for pred in batch]
+
+        preds = [pred for batch in predicted_batches for pred in batch["preds"]]
+        seq_maps = [pred.numpy() for batch in predicted_batches for pred in batch["out_map"]]
+        if return_maps:
+            return preds, seq_maps
+        return preds
diff --git a/doctr/models/kie_predictor/pytorch.py b/doctr/models/kie_predictor/pytorch.py
index 520dcdaf0e..9d7c0ef088 100644
--- a/doctr/models/kie_predictor/pytorch.py
+++ b/doctr/models/kie_predictor/pytorch.py
@@ -72,22 +72,34 @@ def forward(
 
         origin_page_shapes = [page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:] for page in pages]
 
+        # Localize text elements
+        loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
+
         # Detect document rotation and rotate pages
+        seq_maps = [
+            np.sum(np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0), axis=-1, keepdims=True).astype(np.uint8)
+            for out_map in out_maps
+        ]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(page) for page in pages]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seq_maps]
             orientations = [
-                {"value": orientation_page, "confidence": 1.0} for orientation_page in origin_page_orientations
+                {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
         else:
             orientations = None
         if self.straighten_pages:
             origin_page_orientations = (
-                origin_page_orientations if self.detect_orientation else [estimate_orientation(page) for page in pages]
+                origin_page_orientations
+                if self.detect_orientation
+                else [estimate_orientation(seq_map) for seq_map in seq_maps]
             )
-            pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)]
+            pages = [
+                rotate_image(page, -angle, expand=True)  # type: ignore[arg-type]
+                for page, angle in zip(pages, origin_page_orientations)
+            ]
+            # forward again to get predictions on straight pages
+            loc_preds = self.det_predictor(pages, **kwargs)
 
-        # Localize text elements
-        loc_preds = self.det_predictor(pages, **kwargs)
         dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore[assignment]
         # Check whether crop mode should be switched to channels first
         channels_last = len(pages) == 0 or isinstance(pages[0], np.ndarray)
diff --git a/doctr/models/kie_predictor/tensorflow.py b/doctr/models/kie_predictor/tensorflow.py
index d6dca51520..a1c1f8d2c3 100644
--- a/doctr/models/kie_predictor/tensorflow.py
+++ b/doctr/models/kie_predictor/tensorflow.py
@@ -72,24 +72,32 @@ def __call__(
 
         origin_page_shapes = [page.shape[:2] for page in pages]
 
+        # Localize text elements
+        loc_preds, out_maps = self.det_predictor(pages, return_preds=True, **kwargs)
+
         # Detect document rotation and rotate pages
+        seq_maps = [
+            np.sum(np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0), axis=-1, keepdims=True).astype(np.uint8)
+            for out_map in out_maps
+        ]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(page) for page in pages]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seq_maps]
             orientations = [
-                {"value": orientation_page, "confidence": 1.0} for orientation_page in origin_page_orientations
+                {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
         else:
             orientations = None
         if self.straighten_pages:
             origin_page_orientations = (
-                origin_page_orientations if self.detect_orientation else [estimate_orientation(page) for page in pages]
+                origin_page_orientations
+                if self.detect_orientation
+                else [estimate_orientation(seq_map) for seq_map in seq_maps]
             )
             pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)]
+            # forward again to get predictions on straight pages
+            loc_preds = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]
 
-        # Localize text elements
-        loc_preds = self.det_predictor(pages, **kwargs)
-
-        dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore[assignment]
+        dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore
         # Rectify crops if aspect ratio
         dict_loc_preds = {k: self._remove_padding(pages, loc_pred) for k, loc_pred in dict_loc_preds.items()}
 
diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py
index 59b34c8dca..c16d404166 100644
--- a/doctr/models/predictor/pytorch.py
+++ b/doctr/models/predictor/pytorch.py
@@ -72,22 +72,31 @@ def forward(
 
         origin_page_shapes = [page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:] for page in pages]
 
+        # Localize text elements
+        loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
+
         # Detect document rotation and rotate pages
+        seq_maps = [np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0).astype(np.uint8) for out_map in out_maps]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(page) for page in pages]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seq_maps]
             orientations = [
-                {"value": orientation_page, "confidence": 1.0} for orientation_page in origin_page_orientations
+                {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
         else:
             orientations = None
         if self.straighten_pages:
             origin_page_orientations = (
-                origin_page_orientations if self.detect_orientation else [estimate_orientation(page) for page in pages]
+                origin_page_orientations
+                if self.detect_orientation
+                else [estimate_orientation(seq_map) for seq_map in seq_maps]
             )
-            pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)]
+            pages = [
+                rotate_image(page, -angle, expand=True)  # type: ignore[arg-type]
+                for page, angle in zip(pages, origin_page_orientations)
+            ]
+            # forward again to get predictions on straight pages
+            loc_preds = self.det_predictor(pages, **kwargs)
 
-        # Localize text elements
-        loc_preds = self.det_predictor(pages, **kwargs)
         assert all(
             len(loc_pred) == 1 for loc_pred in loc_preds
         ), "Detection Model in ocr_predictor should output only one class"
diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py
index 9ac31b3957..a1d66cdb66 100644
--- a/doctr/models/predictor/tensorflow.py
+++ b/doctr/models/predictor/tensorflow.py
@@ -72,27 +72,34 @@ def __call__(
 
         origin_page_shapes = [page.shape[:2] for page in pages]
 
+        # Localize text elements
+        loc_preds_dict, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
+
         # Detect document rotation and rotate pages
+        seq_maps = [np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0).astype(np.uint8) for out_map in out_maps]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(page) for page in pages]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seq_maps]
             orientations = [
-                {"value": orientation_page, "confidence": 1.0} for orientation_page in origin_page_orientations
+                {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
         else:
             orientations = None
         if self.straighten_pages:
             origin_page_orientations = (
-                origin_page_orientations if self.detect_orientation else [estimate_orientation(page) for page in pages]
+                origin_page_orientations
+                if self.detect_orientation
+                else [estimate_orientation(seq_map) for seq_map in seq_maps]
             )
             pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)]
+            # forward again to get predictions on straight pages
+            loc_preds_dict = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]
 
-        # Localize text elements
-        loc_preds_dict = self.det_predictor(pages, **kwargs)
         assert all(
             len(loc_pred) == 1 for loc_pred in loc_preds_dict
         ), "Detection Model in ocr_predictor should output only one class"
-
-        loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict]
+        loc_preds: List[np.ndarray] = [
+            list(loc_pred.values())[0] for loc_pred in loc_preds_dict  # type: ignore[union-attr]
+        ]
 
         # Rectify crops if aspect ratio
         loc_preds = self._remove_padding(pages, loc_preds)
diff --git a/doctr/models/zoo.py b/doctr/models/zoo.py
index c7842124e6..da806227c0 100644
--- a/doctr/models/zoo.py
+++ b/doctr/models/zoo.py
@@ -24,6 +24,7 @@ def _predictor(
     det_bs: int = 2,
     reco_bs: int = 128,
     detect_orientation: bool = False,
+    straighten_pages: bool = False,
     detect_language: bool = False,
     **kwargs,
 ) -> OCRPredictor:
@@ -53,6 +54,7 @@ def _predictor(
         preserve_aspect_ratio=preserve_aspect_ratio,
         symmetric_pad=symmetric_pad,
         detect_orientation=detect_orientation,
+        straighten_pages=straighten_pages,
         detect_language=detect_language,
         **kwargs,
     )
@@ -68,6 +70,7 @@ def ocr_predictor(
     symmetric_pad: bool = True,
     export_as_straight_boxes: bool = False,
     detect_orientation: bool = False,
+    straighten_pages: bool = False,
     detect_language: bool = False,
     **kwargs: Any,
 ) -> OCRPredictor:
@@ -96,6 +99,9 @@ def ocr_predictor(
             (potentially rotated) as straight bounding boxes.
         detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
+        straighten_pages: if True, estimates the page general orientation based on the median line orientation.
+            Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
+            accordingly. Doing so will improve performances for documents with page-uniform rotations.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
         kwargs: keyword args of `OCRPredictor`
@@ -114,6 +120,7 @@ def ocr_predictor(
         symmetric_pad=symmetric_pad,
         export_as_straight_boxes=export_as_straight_boxes,
         detect_orientation=detect_orientation,
+        straighten_pages=straighten_pages,
         detect_language=detect_language,
         **kwargs,
     )
@@ -130,6 +137,7 @@ def _kie_predictor(
     det_bs: int = 2,
     reco_bs: int = 128,
     detect_orientation: bool = False,
+    straighten_pages: bool = False,
     detect_language: bool = False,
     **kwargs,
 ) -> KIEPredictor:
@@ -159,6 +167,7 @@ def _kie_predictor(
         preserve_aspect_ratio=preserve_aspect_ratio,
         symmetric_pad=symmetric_pad,
         detect_orientation=detect_orientation,
+        straighten_pages=straighten_pages,
         detect_language=detect_language,
         **kwargs,
     )
@@ -174,6 +183,7 @@ def kie_predictor(
     symmetric_pad: bool = True,
     export_as_straight_boxes: bool = False,
     detect_orientation: bool = False,
+    straighten_pages: bool = False,
     detect_language: bool = False,
     **kwargs: Any,
 ) -> KIEPredictor:
@@ -202,6 +212,9 @@ def kie_predictor(
             (potentially rotated) as straight bounding boxes.
         detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
+        straighten_pages: if True, estimates the page general orientation based on the median line orientation.
+            Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
+            accordingly. Doing so will improve performances for documents with page-uniform rotations.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
         kwargs: keyword args of `OCRPredictor`
@@ -220,6 +233,7 @@ def kie_predictor(
         symmetric_pad=symmetric_pad,
         export_as_straight_boxes=export_as_straight_boxes,
         detect_orientation=detect_orientation,
+        straighten_pages=straighten_pages,
         detect_language=detect_language,
         **kwargs,
     )
diff --git a/tests/common/test_models.py b/tests/common/test_models.py
index fea26024b1..ab1dc35323 100644
--- a/tests/common/test_models.py
+++ b/tests/common/test_models.py
@@ -23,7 +23,7 @@ def mock_image(tmpdir_factory):
 
 @pytest.fixture(scope="function")
 def mock_bitmap(mock_image):
-    bitmap = np.squeeze(cv2.cvtColor(mock_image, cv2.COLOR_BGR2GRAY) / 255.0)
+    bitmap = np.squeeze(cv2.cvtColor(mock_image, cv2.COLOR_BGR2GRAY))
     return bitmap
 
 
@@ -32,20 +32,13 @@ def test_get_bitmap_angle(mock_bitmap):
     assert abs(angle - 30.0) < 1.0
 
 
-def test_estimate_orientation(mock_image, mock_tilted_payslip):
-    assert estimate_orientation(mock_image * 0) == 0
+def test_estimate_orientation(mock_bitmap):
+    assert estimate_orientation(mock_bitmap * 0) == 0
 
-    angle = estimate_orientation(mock_image)
+    angle = estimate_orientation(mock_bitmap)
     assert abs(angle - 30.0) < 1.0
 
-    rotated = geometry.rotate_image(mock_image, -angle)
-    angle_rotated = estimate_orientation(rotated)
-    assert abs(angle_rotated) < 1.0
-
-    mock_tilted_payslip = reader.read_img_as_numpy(mock_tilted_payslip)
-    assert (estimate_orientation(mock_tilted_payslip) - 30.0) < 1.0
-
-    rotated = geometry.rotate_image(mock_tilted_payslip, -30, expand=True)
+    rotated = geometry.rotate_image(mock_bitmap, -angle)
     angle_rotated = estimate_orientation(rotated)
     assert abs(angle_rotated) < 1.0
 

From eeb63eca6c18e3457950da68881508eda7f4abf8 Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Wed, 4 Oct 2023 11:03:10 +0200
Subject: [PATCH 02/13] [skip ci] some updates and fixes

---
 README.md                                |  2 +-
 doctr/io/elements.py                     | 35 +++++++++++-------------
 doctr/models/builder.py                  | 16 +++++++----
 doctr/models/kie_predictor/pytorch.py    | 29 ++++++--------------
 doctr/models/kie_predictor/tensorflow.py | 29 ++++++--------------
 doctr/models/predictor/pytorch.py        | 20 ++++----------
 doctr/models/predictor/tensorflow.py     | 16 ++---------
 tests/common/test_io_elements.py         | 20 ++++++++++----
 tests/common/test_models_builder.py      | 22 +++++++++------
 9 files changed, 79 insertions(+), 110 deletions(-)

diff --git a/README.md b/README.md
index 242cff4d59..93cc877b70 100644
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ If both options are set to False, the predictor will always fit and return rotat
 To interpret your model's predictions, you can visualize them interactively as follows:
 
 ```python
-result.show(doc)
+result.show()
 ```
 
 ![Visualization sample](docs/images/doctr_example_script.gif)
diff --git a/doctr/io/elements.py b/doctr/io/elements.py
index 4e92a4043f..d94ade587e 100644
--- a/doctr/io/elements.py
+++ b/doctr/io/elements.py
@@ -234,7 +234,7 @@ class Page(Element):
     """Implements a page element as a collection of blocks
 
     Args:
-    ----
+        page: image encoded as a numpy array in uint8
         blocks: list of block elements
         page_idx: the index of the page in the input raw document
         dimensions: the page size in pixels in format (height, width)
@@ -248,6 +248,7 @@ class Page(Element):
 
     def __init__(
         self,
+        page: np.ndarray,
         blocks: List[Block],
         page_idx: int,
         dimensions: Tuple[int, int],
@@ -255,6 +256,7 @@ def __init__(
         language: Optional[Dict[str, Any]] = None,
     ) -> None:
         super().__init__(blocks=blocks)
+        self.page = page
         self.page_idx = page_idx
         self.dimensions = dimensions
         self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None)
@@ -267,17 +269,15 @@ def render(self, block_break: str = "\n\n") -> str:
     def extra_repr(self) -> str:
         return f"dimensions={self.dimensions}"
 
-    def show(self, page: np.ndarray, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
+    def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
         """Overlay the result on a given image
 
         Args:
-        ----
-            page: image encoded as a numpy array in uint8
             interactive: whether the display should be interactive
             preserve_aspect_ratio: pass True if you passed True to the predictor
             **kwargs: additional keyword arguments passed to the matplotlib.pyplot.show method
         """
-        visualize_page(self.export(), page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
+        visualize_page(self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
         plt.show(**kwargs)
 
     def synthesize(self, **kwargs) -> np.ndarray:
@@ -408,6 +408,7 @@ class KIEPage(Element):
     Args:
     ----
         predictions: Dictionary with list of block elements for each detection class
+        page: image encoded as a numpy array in uint8
         page_idx: the index of the page in the input raw document
         dimensions: the page size in pixels in format (height, width)
         orientation: a dictionary with the value of the rotation angle in degress and confidence of the prediction
@@ -420,6 +421,7 @@ class KIEPage(Element):
 
     def __init__(
         self,
+        page: np.ndarray,
         predictions: Dict[str, List[Prediction]],
         page_idx: int,
         dimensions: Tuple[int, int],
@@ -427,6 +429,7 @@ def __init__(
         language: Optional[Dict[str, Any]] = None,
     ) -> None:
         super().__init__(predictions=predictions)
+        self.page = page
         self.page_idx = page_idx
         self.dimensions = dimensions
         self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None)
@@ -441,17 +444,17 @@ def render(self, prediction_break: str = "\n\n") -> str:
     def extra_repr(self) -> str:
         return f"dimensions={self.dimensions}"
 
-    def show(self, page: np.ndarray, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
+    def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
         """Overlay the result on a given image
 
         Args:
-        ----
-            page: image encoded as a numpy array in uint8
             interactive: whether the display should be interactive
             preserve_aspect_ratio: pass True if you passed True to the predictor
             **kwargs: keyword arguments passed to the matplotlib.pyplot.show method
         """
-        visualize_kie_page(self.export(), page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
+        visualize_kie_page(
+            self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio
+        )
         plt.show(**kwargs)
 
     def synthesize(self, **kwargs) -> np.ndarray:
@@ -561,16 +564,10 @@ def render(self, page_break: str = "\n\n\n\n") -> str:
         """Renders the full text of the element"""
         return page_break.join(p.render() for p in self.pages)
 
-    def show(self, pages: List[np.ndarray], **kwargs) -> None:
-        """Overlay the result on a given image
-
-        Args:
-        ----
-            pages: list of images encoded as numpy arrays in uint8
-            **kwargs: keyword arguments passed to the Page.show method
-        """
-        for img, result in zip(pages, self.pages):
-            result.show(img, **kwargs)
+    def show(self, **kwargs) -> None:
+        """Overlay the result on a given image"""
+        for result in self.pages:
+            result.show(**kwargs)
 
     def synthesize(self, **kwargs) -> List[np.ndarray]:
         """Synthesize all pages from their predictions
diff --git a/doctr/models/builder.py b/doctr/models/builder.py
index 820689bbac..b974f7c0db 100644
--- a/doctr/models/builder.py
+++ b/doctr/models/builder.py
@@ -287,6 +287,7 @@ def extra_repr(self) -> str:
 
     def __call__(
         self,
+        pages: List[np.ndarray],
         boxes: List[np.ndarray],
         text_preds: List[List[Tuple[str, float]]],
         page_shapes: List[Tuple[int, int]],
@@ -296,7 +297,7 @@ def __call__(
         """Re-arrange detected words into structured blocks
 
         Args:
-        ----
+            pages: list of N elements, where each element represents the page image
             boxes: list of N elements, where each element represents the localization predictions, of shape (*, 5)
                 or (*, 6) for all words for a given page
             text_preds: list of N elements, where each element is the list of all word prediction (text + confidence)
@@ -325,6 +326,7 @@ def __call__(
 
         _pages = [
             Page(
+                page,
                 self._build_blocks(
                     page_boxes,
                     word_preds,
@@ -334,8 +336,8 @@ def __call__(
                 orientation,
                 language,
             )
-            for _idx, shape, page_boxes, word_preds, orientation, language in zip(
-                range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
+            for page, _idx, shape, page_boxes, word_preds, orientation, language in zip(
+                pages, range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
             )
         ]
 
@@ -356,6 +358,7 @@ class KIEDocumentBuilder(DocumentBuilder):
 
     def __call__(  # type: ignore[override]
         self,
+        pages: List[np.ndarray],
         boxes: List[Dict[str, np.ndarray]],
         text_preds: List[Dict[str, List[Tuple[str, float]]]],
         page_shapes: List[Tuple[int, int]],
@@ -365,7 +368,7 @@ def __call__(  # type: ignore[override]
         """Re-arrange detected words into structured predictions
 
         Args:
-        ----
+            pages: list of N elements, where each element represents the page image
             boxes: list of N dictionaries, where each element represents the localization predictions for a class,
                 of shape (*, 5) or (*, 6) for all predictions
             text_preds: list of N dictionaries, where each element is the list of all word prediction
@@ -400,6 +403,7 @@ def __call__(  # type: ignore[override]
 
         _pages = [
             KIEPage(
+                page,
                 {
                     k: self._build_blocks(
                         page_boxes[k],
@@ -412,8 +416,8 @@ def __call__(  # type: ignore[override]
                 orientation,
                 language,
             )
-            for _idx, shape, page_boxes, word_preds, orientation, language in zip(
-                range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
+            for page, _idx, shape, page_boxes, word_preds, orientation, language in zip(
+                pages, range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
             )
         ]
 
diff --git a/doctr/models/kie_predictor/pytorch.py b/doctr/models/kie_predictor/pytorch.py
index 9d7c0ef088..957a7270b3 100644
--- a/doctr/models/kie_predictor/pytorch.py
+++ b/doctr/models/kie_predictor/pytorch.py
@@ -13,7 +13,7 @@
 from doctr.models._utils import estimate_orientation, get_language, invert_data_structure
 from doctr.models.detection.predictor import DetectionPredictor
 from doctr.models.recognition.predictor import RecognitionPredictor
-from doctr.utils.geometry import rotate_boxes, rotate_image
+from doctr.utils.geometry import rotate_image
 
 from .base import _KIEPredictor
 
@@ -77,7 +77,9 @@ def forward(
 
         # Detect document rotation and rotate pages
         seq_maps = [
-            np.sum(np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0), axis=-1, keepdims=True).astype(np.uint8)
+            np.where(np.expand_dims(np.amax(out_map, axis=-1), axis=-1) > kwargs.get("bin_thresh", 0.3), 255, 0).astype(
+                np.uint8
+            )
             for out_map in out_maps
         ]
         if self.detect_orientation:
@@ -94,10 +96,10 @@ def forward(
                 else [estimate_orientation(seq_map) for seq_map in seq_maps]
             )
             pages = [
-                rotate_image(page, -angle, expand=True)  # type: ignore[arg-type]
+                rotate_image(page, -angle, expand=False)  # type: ignore[arg-type]
                 for page, angle in zip(pages, origin_page_orientations)
             ]
-            # forward again to get predictions on straight pages
+            # Forward again to get predictions on straight pages
             loc_preds = self.det_predictor(pages, **kwargs)
 
         dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore[assignment]
@@ -142,27 +144,12 @@ def forward(
             languages_dict = [{"value": lang[0], "confidence": lang[1]} for lang in languages]
         else:
             languages_dict = None
-        # Rotate back pages and boxes while keeping original image size
-        if self.straighten_pages:
-            boxes_per_page = [
-                {
-                    k: rotate_boxes(
-                        page_boxes,
-                        angle,
-                        orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[1:],
-                        target_shape=mask,
-                    )
-                    for k, page_boxes in page_boxes_dict.items()
-                }
-                for page_boxes_dict, page, angle, mask in zip(
-                    boxes_per_page, pages, origin_page_orientations, origin_page_shapes
-                )
-            ]
 
         out = self.doc_builder(
+            pages,  # type: ignore[arg-type]
             boxes_per_page,
             text_preds_per_page,
-            [page.shape[:2] if channels_last else page.shape[-2:] for page in pages],  # type: ignore[misc]
+            origin_page_shapes,  # type: ignore[arg-type]
             orientations,
             languages_dict,
         )
diff --git a/doctr/models/kie_predictor/tensorflow.py b/doctr/models/kie_predictor/tensorflow.py
index a1c1f8d2c3..26c22c66c2 100644
--- a/doctr/models/kie_predictor/tensorflow.py
+++ b/doctr/models/kie_predictor/tensorflow.py
@@ -12,7 +12,7 @@
 from doctr.models._utils import estimate_orientation, get_language, invert_data_structure
 from doctr.models.detection.predictor import DetectionPredictor
 from doctr.models.recognition.predictor import RecognitionPredictor
-from doctr.utils.geometry import rotate_boxes, rotate_image
+from doctr.utils.geometry import rotate_image
 from doctr.utils.repr import NestedObject
 
 from .base import _KIEPredictor
@@ -73,11 +73,13 @@ def __call__(
         origin_page_shapes = [page.shape[:2] for page in pages]
 
         # Localize text elements
-        loc_preds, out_maps = self.det_predictor(pages, return_preds=True, **kwargs)
+        loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
 
         # Detect document rotation and rotate pages
         seq_maps = [
-            np.sum(np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0), axis=-1, keepdims=True).astype(np.uint8)
+            np.where(np.expand_dims(np.amax(out_map, axis=-1), axis=-1) > kwargs.get("bin_thresh", 0.3), 255, 0).astype(
+                np.uint8
+            )
             for out_map in out_maps
         ]
         if self.detect_orientation:
@@ -93,8 +95,8 @@ def __call__(
                 if self.detect_orientation
                 else [estimate_orientation(seq_map) for seq_map in seq_maps]
             )
-            pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)]
-            # forward again to get predictions on straight pages
+            pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
+            # Forward again to get predictions on straight pages
             loc_preds = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]
 
         dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore
@@ -135,24 +137,9 @@ def __call__(
             languages_dict = [{"value": lang[0], "confidence": lang[1]} for lang in languages]
         else:
             languages_dict = None
-        # Rotate back pages and boxes while keeping original image size
-        if self.straighten_pages:
-            boxes_per_page = [
-                {
-                    k: rotate_boxes(
-                        page_boxes,
-                        angle,
-                        orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:],
-                        target_shape=mask,  # type: ignore[arg-type]
-                    )
-                    for k, page_boxes in page_boxes_dict.items()
-                }
-                for page_boxes_dict, page, angle, mask in zip(
-                    boxes_per_page, pages, origin_page_orientations, origin_page_shapes
-                )
-            ]
 
         out = self.doc_builder(
+            pages,
             boxes_per_page,
             text_preds_per_page,
             origin_page_shapes,  # type: ignore[arg-type]
diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py
index c16d404166..a55e4c3cc4 100644
--- a/doctr/models/predictor/pytorch.py
+++ b/doctr/models/predictor/pytorch.py
@@ -13,7 +13,7 @@
 from doctr.models._utils import estimate_orientation, get_language
 from doctr.models.detection.predictor import DetectionPredictor
 from doctr.models.recognition.predictor import RecognitionPredictor
-from doctr.utils.geometry import rotate_boxes, rotate_image
+from doctr.utils.geometry import rotate_image
 
 from .base import _OCRPredictor
 
@@ -91,10 +91,10 @@ def forward(
                 else [estimate_orientation(seq_map) for seq_map in seq_maps]
             )
             pages = [
-                rotate_image(page, -angle, expand=True)  # type: ignore[arg-type]
+                rotate_image(page, -angle, expand=False)  # type: ignore[arg-type]
                 for page, angle in zip(pages, origin_page_orientations)
             ]
-            # forward again to get predictions on straight pages
+            # Forward again to get predictions on straight pages
             loc_preds = self.det_predictor(pages, **kwargs)
 
         assert all(
@@ -128,22 +128,12 @@ def forward(
             languages_dict = [{"value": lang[0], "confidence": lang[1]} for lang in languages]
         else:
             languages_dict = None
-        # Rotate back pages and boxes while keeping original image size
-        if self.straighten_pages:
-            boxes = [
-                rotate_boxes(
-                    page_boxes,
-                    angle,
-                    orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[1:],
-                    target_shape=mask,
-                )
-                for page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations, origin_page_shapes)
-            ]
 
         out = self.doc_builder(
+            pages,  # type: ignore[arg-type]
             boxes,
             text_preds,
-            [page.shape[:2] if channels_last else page.shape[-2:] for page in pages],  # type: ignore[misc]
+            origin_page_shapes,  # type: ignore[arg-type]
             orientations,
             languages_dict,
         )
diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py
index a1d66cdb66..14a80ecfd8 100644
--- a/doctr/models/predictor/tensorflow.py
+++ b/doctr/models/predictor/tensorflow.py
@@ -12,7 +12,7 @@
 from doctr.models._utils import estimate_orientation, get_language
 from doctr.models.detection.predictor import DetectionPredictor
 from doctr.models.recognition.predictor import RecognitionPredictor
-from doctr.utils.geometry import rotate_boxes, rotate_image
+from doctr.utils.geometry import rotate_image
 from doctr.utils.repr import NestedObject
 
 from .base import _OCRPredictor
@@ -90,7 +90,7 @@ def __call__(
                 if self.detect_orientation
                 else [estimate_orientation(seq_map) for seq_map in seq_maps]
             )
-            pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)]
+            pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
             # forward again to get predictions on straight pages
             loc_preds_dict = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]
 
@@ -122,19 +122,9 @@ def __call__(
             languages_dict = [{"value": lang[0], "confidence": lang[1]} for lang in languages]
         else:
             languages_dict = None
-        # Rotate back pages and boxes while keeping original image size
-        if self.straighten_pages:
-            boxes = [
-                rotate_boxes(
-                    page_boxes,
-                    angle,
-                    orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:],
-                    target_shape=mask,  # type: ignore[arg-type]
-                )
-                for page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations, origin_page_shapes)
-            ]
 
         out = self.doc_builder(
+            pages,
             boxes,
             text_preds,
             origin_page_shapes,  # type: ignore[arg-type]
diff --git a/tests/common/test_io_elements.py b/tests/common/test_io_elements.py
index 965033290a..af982c6e04 100644
--- a/tests/common/test_io_elements.py
+++ b/tests/common/test_io_elements.py
@@ -72,6 +72,7 @@ def _mock_blocks(size=(1, 1), offset=(0, 0)):
 def _mock_pages(block_size=(1, 1), block_offset=(0, 0)):
     return [
         elements.Page(
+            np.random.randint(0, 255, (300, 200, 3), dtype=np.uint8),
             _mock_blocks(block_size, block_offset),
             0,
             (300, 200),
@@ -79,6 +80,7 @@ def _mock_pages(block_size=(1, 1), block_offset=(0, 0)):
             {"value": "EN", "confidence": 0.8},
         ),
         elements.Page(
+            np.random.randint(0, 255, (500, 1000, 3), dtype=np.uint8),
             _mock_blocks(block_size, block_offset),
             1,
             (500, 1000),
@@ -91,6 +93,7 @@ def _mock_pages(block_size=(1, 1), block_offset=(0, 0)):
 def _mock_kie_pages(prediction_size=(1, 1), prediction_offset=(0, 0)):
     return [
         elements.KIEPage(
+            np.random.randint(0, 255, (300, 200, 3), dtype=np.uint8),
             {CLASS_NAME: _mock_prediction(prediction_size, prediction_offset)},
             0,
             (300, 200),
@@ -98,6 +101,7 @@ def _mock_kie_pages(prediction_size=(1, 1), prediction_offset=(0, 0)):
             {"value": "EN", "confidence": 0.8},
         ),
         elements.KIEPage(
+            np.random.randint(0, 255, (500, 1000, 3), dtype=np.uint8),
             {CLASS_NAME: _mock_prediction(prediction_size, prediction_offset)},
             1,
             (500, 1000),
@@ -243,16 +247,18 @@ def test_block():
 
 
 def test_page():
+    page = np.zeros((300, 200, 3), dtype=np.uint8)
     page_idx = 0
     page_size = (300, 200)
     orientation = {"value": 0.0, "confidence": 0.0}
     language = {"value": "EN", "confidence": 0.8}
     blocks = _mock_blocks()
-    page = elements.Page(blocks, page_idx, page_size, orientation, language)
+    page = elements.Page(page, blocks, page_idx, page_size, orientation, language)
 
     # Attribute checks
     assert len(page.blocks) == len(blocks)
     assert all(isinstance(b, elements.Block) for b in page.blocks)
+    assert isinstance(page.page, np.ndarray)
     assert page.page_idx == page_idx
     assert page.dimensions == page_size
     assert page.orientation == orientation
@@ -281,7 +287,7 @@ def test_page():
     assert "\n".join(repr(page).split("\n")[:2]) == f"Page(\n  dimensions={page_size!r}"
 
     # Show
-    page.show(np.zeros((256, 256, 3), dtype=np.uint8), block=False)
+    page.show(block=False)
 
     # Synthesize
     img = page.synthesize()
@@ -290,16 +296,18 @@ def test_page():
 
 
 def test_kiepage():
+    page = np.zeros((300, 200, 3), dtype=np.uint8)
     page_idx = 0
     page_size = (300, 200)
     orientation = {"value": 0.0, "confidence": 0.0}
     language = {"value": "EN", "confidence": 0.8}
     predictions = {CLASS_NAME: _mock_prediction()}
-    kie_page = elements.KIEPage(predictions, page_idx, page_size, orientation, language)
+    kie_page = elements.KIEPage(page, predictions, page_idx, page_size, orientation, language)
 
     # Attribute checks
     assert len(kie_page.predictions) == len(predictions)
     assert all(isinstance(b, elements.Prediction) for b in kie_page.predictions[CLASS_NAME])
+    assert isinstance(kie_page.page, np.ndarray)
     assert kie_page.page_idx == page_idx
     assert kie_page.dimensions == page_size
     assert kie_page.orientation == orientation
@@ -328,7 +336,7 @@ def test_kiepage():
     assert "\n".join(repr(kie_page).split("\n")[:2]) == f"KIEPage(\n  dimensions={page_size!r}"
 
     # Show
-    kie_page.show(np.zeros((256, 256, 3), dtype=np.uint8), block=False)
+    kie_page.show(block=False)
 
     # Synthesize
     img = kie_page.synthesize()
@@ -355,7 +363,7 @@ def test_document():
     assert isinstance(doc.export_as_xml(), list) and len(doc.export_as_xml()) == len(pages)
 
     # Show
-    doc.show([np.zeros((256, 256, 3), dtype=np.uint8) for _ in range(len(pages))], block=False)
+    doc.show(block=False)
 
     # Synthesize
     img_list = doc.synthesize()
@@ -381,7 +389,7 @@ def test_kie_document():
     assert isinstance(doc.export_as_xml(), list) and len(doc.export_as_xml()) == len(pages)
 
     # Show
-    doc.show([np.zeros((256, 256, 3), dtype=np.uint8) for _ in range(len(pages))], block=False)
+    doc.show(block=False)
 
     # Synthesize
     img_list = doc.synthesize()
diff --git a/tests/common/test_models_builder.py b/tests/common/test_models_builder.py
index 7940bf8a5d..90c681b5f6 100644
--- a/tests/common/test_models_builder.py
+++ b/tests/common/test_models_builder.py
@@ -20,12 +20,13 @@ def test_documentbuilder():
 
     # Don't resolve lines
     doc_builder = builder.DocumentBuilder(resolve_lines=False, resolve_blocks=False)
+    pages = [np.zeros((100, 200, 3))] * num_pages
     boxes = np.random.rand(words_per_page, 6)  # array format
     boxes[:2] *= boxes[2:4]
     # Arg consistency check
     with pytest.raises(ValueError):
-        doc_builder([boxes, boxes], [("hello", 1.0)] * 3, [(100, 200), (100, 200)])
-    out = doc_builder([boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)])
+        doc_builder(pages, [boxes, boxes], [("hello", 1.0)] * 3, [(100, 200), (100, 200)])
+    out = doc_builder(pages, [boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)])
     assert isinstance(out, Document)
     assert len(out.pages) == num_pages
     # 1 Block & 1 line per page
@@ -34,11 +35,11 @@ def test_documentbuilder():
 
     # Resolve lines
     doc_builder = builder.DocumentBuilder(resolve_lines=True, resolve_blocks=True)
-    out = doc_builder([boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)])
+    out = doc_builder(pages, [boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)])
 
     # No detection
     boxes = np.zeros((0, 5))
-    out = doc_builder([boxes, boxes], [[], []], [(100, 200), (100, 200)])
+    out = doc_builder(pages, [boxes, boxes], [[], []], [(100, 200), (100, 200)])
     assert len(out.pages[0].blocks) == 0
 
     # Rotated boxes to export as straight boxes
@@ -49,7 +50,7 @@ def test_documentbuilder():
         ]
     )
     doc_builder_2 = builder.DocumentBuilder(resolve_blocks=False, resolve_lines=False, export_as_straight_boxes=True)
-    out = doc_builder_2([boxes], [[("hello", 0.99), ("word", 0.99)]], [(100, 100)])
+    out = doc_builder_2([np.zeros((100, 100, 3))], [boxes], [[("hello", 0.99), ("word", 0.99)]], [(100, 100)])
     assert out.pages[0].blocks[0].lines[0].words[-1].geometry == ((0.45, 0.5), (0.6, 0.65))
 
     # Repr
@@ -64,12 +65,14 @@ def test_kiedocumentbuilder():
 
     # Don't resolve lines
     doc_builder = builder.KIEDocumentBuilder(resolve_lines=False, resolve_blocks=False)
+    pages = [np.zeros((100, 200, 3))] * num_pages
     predictions = {CLASS_NAME: np.random.rand(words_per_page, 6)}  # dict format
     predictions[CLASS_NAME][:2] *= predictions[CLASS_NAME][2:4]
     # Arg consistency check
     with pytest.raises(ValueError):
-        doc_builder([predictions, predictions], [{CLASS_NAME: ("hello", 1.0)}] * 3, [(100, 200), (100, 200)])
+        doc_builder(pages, [predictions, predictions], [{CLASS_NAME: ("hello", 1.0)}] * 3, [(100, 200), (100, 200)])
     out = doc_builder(
+        pages,
         [predictions, predictions],
         [{CLASS_NAME: [("hello", 1.0)] * words_per_page}] * num_pages,
         [(100, 200), (100, 200)],
@@ -83,6 +86,7 @@ def test_kiedocumentbuilder():
     # Resolve lines
     doc_builder = builder.KIEDocumentBuilder(resolve_lines=True, resolve_blocks=True)
     out = doc_builder(
+        pages,
         [predictions, predictions],
         [{CLASS_NAME: [("hello", 1.0)] * words_per_page}] * num_pages,
         [(100, 200), (100, 200)],
@@ -90,7 +94,7 @@ def test_kiedocumentbuilder():
 
     # No detection
     predictions = {CLASS_NAME: np.zeros((0, 5))}
-    out = doc_builder([predictions, predictions], [{CLASS_NAME: []}, {CLASS_NAME: []}], [(100, 200), (100, 200)])
+    out = doc_builder(pages, [predictions, predictions], [{CLASS_NAME: []}, {CLASS_NAME: []}], [(100, 200), (100, 200)])
     assert len(out.pages[0].predictions[CLASS_NAME]) == 0
 
     # Rotated boxes to export as straight boxes
@@ -103,7 +107,9 @@ def test_kiedocumentbuilder():
         )
     }
     doc_builder_2 = builder.KIEDocumentBuilder(resolve_blocks=False, resolve_lines=False, export_as_straight_boxes=True)
-    out = doc_builder_2([predictions], [{CLASS_NAME: [("hello", 0.99), ("word", 0.99)]}], [(100, 100)])
+    out = doc_builder_2(
+        [np.zeros((100, 100, 3))], [predictions], [{CLASS_NAME: [("hello", 0.99), ("word", 0.99)]}], [(100, 100)]
+    )
     assert out.pages[0].predictions[CLASS_NAME][0].geometry == ((0.05, 0.1), (0.2, 0.25))
     assert out.pages[0].predictions[CLASS_NAME][1].geometry == ((0.45, 0.5), (0.6, 0.65))
 

From a21c4afb307d585aae9675f0185efde3960c2b9d Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Wed, 4 Oct 2023 16:26:59 +0200
Subject: [PATCH 03/13] update tests

---
 tests/pytorch/test_models_zoo_pt.py    | 53 ++++++++++++++------------
 tests/tensorflow/test_models_zoo_tf.py | 52 ++++++++++++++-----------
 2 files changed, 58 insertions(+), 47 deletions(-)

diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py
index cefb77176f..fa3f23b9d1 100644
--- a/tests/pytorch/test_models_zoo_pt.py
+++ b/tests/pytorch/test_models_zoo_pt.py
@@ -73,10 +73,17 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
     assert out.pages[0].orientation["value"] == orientation
 
 
-def test_trained_ocr_predictor(mock_tilted_payslip):
-    doc = DocumentFile.from_images(mock_tilted_payslip)
+def test_trained_ocr_predictor(mock_payslip):
+    doc = DocumentFile.from_images(mock_payslip)
 
-    det_predictor = detection_predictor("db_resnet50", pretrained=True, batch_size=2, assume_straight_pages=True)
+    det_predictor = detection_predictor(
+        "db_resnet50",
+        pretrained=True,
+        batch_size=2,
+        assume_straight_pages=True,
+        symmetric_pad=True,
+        preserve_aspect_ratio=False,
+    )
     reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128)
 
     predictor = OCRPredictor(
@@ -90,16 +97,12 @@ def test_trained_ocr_predictor(mock_tilted_payslip):
     out = predictor(doc)
 
     assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr."
-    geometry_mr = np.array(
-        [[0.08563021, 0.35584526], [0.11464554, 0.34078913], [0.1274898, 0.36012764], [0.09847447, 0.37518377]]
-    )
-    assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr)
+    geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]])
+    assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr, rtol=0.05)
 
     assert out.pages[0].blocks[1].lines[0].words[-1].value == "revised"
-    geometry_revised = np.array(
-        [[0.50422498, 0.19551784], [0.55741975, 0.16791493], [0.56705294, 0.18241881], [0.51385817, 0.21002172]]
-    )
-    assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised)
+    geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]])
+    assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised, rtol=0.05)
 
     det_predictor = detection_predictor(
         "db_resnet50",
@@ -181,10 +184,17 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
     assert out.pages[0].orientation["value"] == orientation
 
 
-def test_trained_kie_predictor(mock_tilted_payslip):
-    doc = DocumentFile.from_images(mock_tilted_payslip)
+def test_trained_kie_predictor(mock_payslip):
+    doc = DocumentFile.from_images(mock_payslip)
 
-    det_predictor = detection_predictor("db_resnet50", pretrained=True, batch_size=2, assume_straight_pages=True)
+    det_predictor = detection_predictor(
+        "db_resnet50",
+        pretrained=True,
+        batch_size=2,
+        assume_straight_pages=True,
+        symmetric_pad=True,
+        preserve_aspect_ratio=False,
+    )
     reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128)
 
     predictor = KIEPredictor(
@@ -199,17 +209,12 @@ def test_trained_kie_predictor(mock_tilted_payslip):
 
     assert isinstance(out, KIEDocument)
     assert out.pages[0].predictions[CLASS_NAME][0].value == "Mr."
-    geometry_mr = np.array(
-        [[0.08563021, 0.35584526], [0.11464554, 0.34078913], [0.1274898, 0.36012764], [0.09847447, 0.37518377]]
-    )
-    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][0].geometry), geometry_mr)
+    geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]])
+    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][0].geometry), geometry_mr, rtol=0.05)
 
-    print(out.pages[0].predictions[CLASS_NAME])
-    assert out.pages[0].predictions[CLASS_NAME][7].value == "revised"
-    geometry_revised = np.array(
-        [[0.50422498, 0.19551784], [0.55741975, 0.16791493], [0.56705294, 0.18241881], [0.51385817, 0.21002172]]
-    )
-    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][7].geometry), geometry_revised)
+    assert out.pages[0].predictions[CLASS_NAME][6].value == "revised"
+    geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]])
+    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][6].geometry), geometry_revised, rtol=0.05)
 
     det_predictor = detection_predictor(
         "db_resnet50",
diff --git a/tests/tensorflow/test_models_zoo_tf.py b/tests/tensorflow/test_models_zoo_tf.py
index 6d4b85e2c8..32e7988560 100644
--- a/tests/tensorflow/test_models_zoo_tf.py
+++ b/tests/tensorflow/test_models_zoo_tf.py
@@ -72,10 +72,17 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
     assert out.pages[0].language["value"] == language
 
 
-def test_trained_ocr_predictor(mock_tilted_payslip):
-    doc = DocumentFile.from_images(mock_tilted_payslip)
+def test_trained_ocr_predictor(mock_payslip):
+    doc = DocumentFile.from_images(mock_payslip)
 
-    det_predictor = detection_predictor("db_resnet50", pretrained=True, batch_size=2, assume_straight_pages=True)
+    det_predictor = detection_predictor(
+        "db_resnet50",
+        pretrained=True,
+        batch_size=2,
+        assume_straight_pages=True,
+        symmetric_pad=True,
+        preserve_aspect_ratio=False,
+    )
     reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128)
 
     predictor = OCRPredictor(
@@ -89,16 +96,12 @@ def test_trained_ocr_predictor(mock_tilted_payslip):
     out = predictor(doc)
 
     assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr."
-    geometry_mr = np.array(
-        [[0.08844472, 0.35763523], [0.11625107, 0.34320644], [0.12588427, 0.35771032], [0.09807791, 0.37213911]]
-    )
-    assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr)
+    geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]])
+    assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr, rtol=0.05)
 
     assert out.pages[0].blocks[1].lines[0].words[-1].value == "revised"
-    geometry_revised = np.array(
-        [[0.50422498, 0.19551784], [0.55741975, 0.16791493], [0.56705294, 0.18241881], [0.51385817, 0.21002172]]
-    )
-    assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised)
+    geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]])
+    assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised, rtol=0.05)
 
     det_predictor = detection_predictor(
         "db_resnet50",
@@ -179,10 +182,17 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
     assert out.pages[0].language["value"] == language
 
 
-def test_trained_kie_predictor(mock_tilted_payslip):
-    doc = DocumentFile.from_images(mock_tilted_payslip)
+def test_trained_kie_predictor(mock_payslip):
+    doc = DocumentFile.from_images(mock_payslip)
 
-    det_predictor = detection_predictor("db_resnet50", pretrained=True, batch_size=2, assume_straight_pages=True)
+    det_predictor = detection_predictor(
+        "db_resnet50",
+        pretrained=True,
+        batch_size=2,
+        assume_straight_pages=True,
+        symmetric_pad=True,
+        preserve_aspect_ratio=False,
+    )
     reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128)
 
     predictor = KIEPredictor(
@@ -197,16 +207,12 @@ def test_trained_kie_predictor(mock_tilted_payslip):
 
     assert isinstance(out, KIEDocument)
     assert out.pages[0].predictions[CLASS_NAME][0].value == "Mr."
-    geometry_mr = np.array(
-        [[0.08844472, 0.35763523], [0.11625107, 0.34320644], [0.12588427, 0.35771032], [0.09807791, 0.37213911]]
-    )
-    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][0].geometry), geometry_mr)
+    geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]])
+    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][0].geometry), geometry_mr, rtol=0.05)
 
-    assert out.pages[0].predictions[CLASS_NAME][-1].value == "Kabir)"
-    geometry_revised = np.array(
-        [[0.43725992, 0.67232439], [0.49045468, 0.64472149], [0.50570724, 0.66768597], [0.452512473, 0.69528887]]
-    )
-    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][-1].geometry), geometry_revised)
+    assert out.pages[0].predictions[CLASS_NAME][3].value == "revised"
+    geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]])
+    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][3].geometry), geometry_revised, rtol=0.05)
 
     det_predictor = detection_predictor(
         "db_resnet50",

From 6183a44c074a81b5220f1c211dd2509c14e51e34 Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Thu, 5 Oct 2023 09:26:01 +0200
Subject: [PATCH 04/13] update some parts

---
 doctr/models/_utils.py                        | 57 ++++---------------
 doctr/models/detection/predictor/pytorch.py   |  4 +-
 .../models/detection/predictor/tensorflow.py  |  4 +-
 doctr/models/kie_predictor/pytorch.py         |  6 +-
 doctr/models/kie_predictor/tensorflow.py      |  6 +-
 doctr/models/predictor/pytorch.py             |  6 +-
 doctr/models/predictor/tensorflow.py          |  6 +-
 scripts/analyze.py                            |  4 +-
 tests/common/test_models.py                   | 28 ++++++---
 tests/pytorch/test_models_detection_pt.py     |  6 +-
 tests/tensorflow/test_models_detection_tf.py  |  6 +-
 11 files changed, 59 insertions(+), 74 deletions(-)

diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py
index 304fa9a7e8..2b189f216a 100644
--- a/doctr/models/_utils.py
+++ b/doctr/models/_utils.py
@@ -11,7 +11,7 @@
 import numpy as np
 from langdetect import LangDetectException, detect_langs
 
-__all__ = ["estimate_orientation", "get_bitmap_angle", "get_language", "invert_data_structure"]
+__all__ = ["estimate_orientation", "get_language", "invert_data_structure"]
 
 
 def get_max_width_length_ratio(contour: np.ndarray) -> float:
@@ -27,12 +27,12 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
     return max(w / h, h / w)
 
 
-def estimate_orientation(seq_map: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) -> float:
+def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) -> float:
     """Estimate the angle of the general document orientation based on the
      lines of the document and the assumption that they should be horizontal.
 
     Args:
-        seq_map: the binarized image of the document
+        img: the img or bitmap to analyze (H, W, C)
         n_ct: the number of contours used for the orientation estimation
         ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
 
@@ -41,12 +41,19 @@ def estimate_orientation(seq_map: np.ndarray, n_ct: int = 50, ratio_threshold_fo
         the angle of the general document orientation
     """
 
+    if np.max(img) <= 1 and np.min(img) >= 0 or (np.max(img) <= 255 and np.min(img) >= 0 and img.shape[-1] == 1):
+        thresh = img.astype(np.uint8)
+    if np.max(img) <= 255 and np.min(img) >= 0 and img.shape[-1] == 3:
+        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        gray_img = cv2.medianBlur(gray_img, 5)
+        thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+
     # try to merge words in lines
-    (h, w) = seq_map.shape[:2]
+    (h, w) = img.shape[:2]
     k_x = max(1, (floor(w / 100)))
     k_y = max(1, (floor(h / 100)))
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (k_x, k_y))
-    thresh = cv2.dilate(seq_map, kernel, iterations=1)
+    thresh = cv2.dilate(thresh, kernel, iterations=1)
 
     # extract contours
     contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
@@ -68,46 +75,6 @@ def estimate_orientation(seq_map: np.ndarray, n_ct: int = 50, ratio_threshold_fo
         return -median_low(angles)
 
 
-def get_bitmap_angle(bitmap: np.ndarray, n_ct: int = 20, std_max: float = 3.0) -> float:
-    """From a binarized segmentation map, find contours and fit min area rectangles to determine page angle
-
-    Args:
-    ----
-        bitmap: binarized segmentation map
-        n_ct: number of contours to use to fit page angle
-        std_max: maximum deviation of the angle distribution to consider the mean angle reliable
-
-    Returns:
-    -------
-        The angle of the page
-    """
-    # Find all contours on binarized seg map
-    contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
-    # Sort contours
-    contours = sorted(contours, key=cv2.contourArea, reverse=True)
-
-    # Find largest contours and fit angles
-    # Track heights and widths to find aspect ratio (determine is rotation is clockwise)
-    angles, heights, widths = [], [], []
-    for ct in contours[:n_ct]:
-        _, (w, h), alpha = cv2.minAreaRect(ct)
-        widths.append(w)
-        heights.append(h)
-        angles.append(alpha)
-
-    if np.std(angles) > std_max:
-        # Edge case with angles of both 0 and 90°, or multi_oriented docs
-        angle = 0.0
-    else:
-        angle = -np.mean(angles)
-        # Determine rotation direction (clockwise/counterclockwise)
-        # Angle coverage: [-90°, +90°], half of the quadrant
-        if np.sum(widths) < np.sum(heights):  # CounterClockwise
-            angle = 90 + angle
-
-    return angle
-
-
 def rectify_crops(
     crops: List[np.ndarray],
     orientations: List[int],
diff --git a/doctr/models/detection/predictor/pytorch.py b/doctr/models/detection/predictor/pytorch.py
index 34f26f03e4..05a6426503 100644
--- a/doctr/models/detection/predictor/pytorch.py
+++ b/doctr/models/detection/predictor/pytorch.py
@@ -53,9 +53,9 @@ def forward(
             self.model(batch, return_preds=True, return_model_output=True, **kwargs) for batch in processed_batches
         ]
         preds = [pred for batch in predicted_batches for pred in batch["preds"]]
-        seq_maps = [
+        seg_maps = [
             pred.permute(1, 2, 0).detach().cpu().numpy() for batch in predicted_batches for pred in batch["out_map"]
         ]
         if return_maps:
-            return preds, seq_maps
+            return preds, seg_maps
         return preds
diff --git a/doctr/models/detection/predictor/tensorflow.py b/doctr/models/detection/predictor/tensorflow.py
index 6317a61874..30a1d3aba1 100644
--- a/doctr/models/detection/predictor/tensorflow.py
+++ b/doctr/models/detection/predictor/tensorflow.py
@@ -51,7 +51,7 @@ def __call__(
         ]
 
         preds = [pred for batch in predicted_batches for pred in batch["preds"]]
-        seq_maps = [pred.numpy() for batch in predicted_batches for pred in batch["out_map"]]
+        seg_maps = [pred.numpy() for batch in predicted_batches for pred in batch["out_map"]]
         if return_maps:
-            return preds, seq_maps
+            return preds, seg_maps
         return preds
diff --git a/doctr/models/kie_predictor/pytorch.py b/doctr/models/kie_predictor/pytorch.py
index 957a7270b3..115d8668db 100644
--- a/doctr/models/kie_predictor/pytorch.py
+++ b/doctr/models/kie_predictor/pytorch.py
@@ -76,14 +76,14 @@ def forward(
         loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
 
         # Detect document rotation and rotate pages
-        seq_maps = [
+        seg_maps = [
             np.where(np.expand_dims(np.amax(out_map, axis=-1), axis=-1) > kwargs.get("bin_thresh", 0.3), 255, 0).astype(
                 np.uint8
             )
             for out_map in out_maps
         ]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seq_maps]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seg_maps]
             orientations = [
                 {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
@@ -93,7 +93,7 @@ def forward(
             origin_page_orientations = (
                 origin_page_orientations
                 if self.detect_orientation
-                else [estimate_orientation(seq_map) for seq_map in seq_maps]
+                else [estimate_orientation(seq_map) for seq_map in seg_maps]
             )
             pages = [
                 rotate_image(page, -angle, expand=False)  # type: ignore[arg-type]
diff --git a/doctr/models/kie_predictor/tensorflow.py b/doctr/models/kie_predictor/tensorflow.py
index 26c22c66c2..94d6ccab58 100644
--- a/doctr/models/kie_predictor/tensorflow.py
+++ b/doctr/models/kie_predictor/tensorflow.py
@@ -76,14 +76,14 @@ def __call__(
         loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
 
         # Detect document rotation and rotate pages
-        seq_maps = [
+        seg_maps = [
             np.where(np.expand_dims(np.amax(out_map, axis=-1), axis=-1) > kwargs.get("bin_thresh", 0.3), 255, 0).astype(
                 np.uint8
             )
             for out_map in out_maps
         ]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seq_maps]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seg_maps]
             orientations = [
                 {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
@@ -93,7 +93,7 @@ def __call__(
             origin_page_orientations = (
                 origin_page_orientations
                 if self.detect_orientation
-                else [estimate_orientation(seq_map) for seq_map in seq_maps]
+                else [estimate_orientation(seq_map) for seq_map in seg_maps]
             )
             pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
             # Forward again to get predictions on straight pages
diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py
index a55e4c3cc4..350444fba3 100644
--- a/doctr/models/predictor/pytorch.py
+++ b/doctr/models/predictor/pytorch.py
@@ -76,9 +76,9 @@ def forward(
         loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
 
         # Detect document rotation and rotate pages
-        seq_maps = [np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0).astype(np.uint8) for out_map in out_maps]
+        seg_maps = [np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0).astype(np.uint8) for out_map in out_maps]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seq_maps]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seg_maps]
             orientations = [
                 {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
@@ -88,7 +88,7 @@ def forward(
             origin_page_orientations = (
                 origin_page_orientations
                 if self.detect_orientation
-                else [estimate_orientation(seq_map) for seq_map in seq_maps]
+                else [estimate_orientation(seq_map) for seq_map in seg_maps]
             )
             pages = [
                 rotate_image(page, -angle, expand=False)  # type: ignore[arg-type]
diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py
index 14a80ecfd8..5f747a01d4 100644
--- a/doctr/models/predictor/tensorflow.py
+++ b/doctr/models/predictor/tensorflow.py
@@ -76,9 +76,9 @@ def __call__(
         loc_preds_dict, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
 
         # Detect document rotation and rotate pages
-        seq_maps = [np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0).astype(np.uint8) for out_map in out_maps]
+        seg_maps = [np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0).astype(np.uint8) for out_map in out_maps]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seq_maps]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seg_maps]
             orientations = [
                 {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
@@ -88,7 +88,7 @@ def __call__(
             origin_page_orientations = (
                 origin_page_orientations
                 if self.detect_orientation
-                else [estimate_orientation(seq_map) for seq_map in seq_maps]
+                else [estimate_orientation(seq_map) for seq_map in seg_maps]
             )
             pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
             # forward again to get predictions on straight pages
diff --git a/scripts/analyze.py b/scripts/analyze.py
index 067ed62685..2e0f19c034 100644
--- a/scripts/analyze.py
+++ b/scripts/analyze.py
@@ -31,8 +31,8 @@ def main(args):
 
     out = model(doc)
 
-    for page, img in zip(out.pages, doc):
-        page.show(img, block=not args.noblock, interactive=not args.static)
+    for page in out.pages:
+        page.show(block=not args.noblock, interactive=not args.static)
 
 
 def parse_args():
diff --git a/tests/common/test_models.py b/tests/common/test_models.py
index ab1dc35323..556b734990 100644
--- a/tests/common/test_models.py
+++ b/tests/common/test_models.py
@@ -6,7 +6,7 @@
 import requests
 
 from doctr.io import reader
-from doctr.models._utils import estimate_orientation, get_bitmap_angle, get_language, invert_data_structure
+from doctr.models._utils import estimate_orientation, get_language, invert_data_structure
 from doctr.utils import geometry
 
 
@@ -23,22 +23,32 @@ def mock_image(tmpdir_factory):
 
 @pytest.fixture(scope="function")
 def mock_bitmap(mock_image):
-    bitmap = np.squeeze(cv2.cvtColor(mock_image, cv2.COLOR_BGR2GRAY))
+    bitmap = np.squeeze(cv2.cvtColor(mock_image, cv2.COLOR_BGR2GRAY) / 255.0)
+    bitmap = np.expand_dims(bitmap, axis=-1)
     return bitmap
 
 
-def test_get_bitmap_angle(mock_bitmap):
-    angle = get_bitmap_angle(mock_bitmap)
-    assert abs(angle - 30.0) < 1.0
+def test_estimate_orientation(mock_image, mock_bitmap, mock_tilted_payslip):
+    assert estimate_orientation(mock_image * 0) == 0
 
+    # test binarized image
+    angle = estimate_orientation(mock_bitmap)
+    assert abs(angle - 30.0) < 1.0
 
-def test_estimate_orientation(mock_bitmap):
-    assert estimate_orientation(mock_bitmap * 0) == 0
+    angle = estimate_orientation(mock_bitmap * 255)
+    assert abs(angle - 30.0) < 1.0
 
-    angle = estimate_orientation(mock_bitmap)
+    angle = estimate_orientation(mock_image)
     assert abs(angle - 30.0) < 1.0
 
-    rotated = geometry.rotate_image(mock_bitmap, -angle)
+    rotated = geometry.rotate_image(mock_image, -angle)
+    angle_rotated = estimate_orientation(rotated)
+    assert abs(angle_rotated) < 1.0
+
+    mock_tilted_payslip = reader.read_img_as_numpy(mock_tilted_payslip)
+    assert (estimate_orientation(mock_tilted_payslip) - 30.0) < 1.0
+
+    rotated = geometry.rotate_image(mock_tilted_payslip, -30, expand=True)
     angle_rotated = estimate_orientation(rotated)
     assert abs(angle_rotated) < 1.0
 
diff --git a/tests/pytorch/test_models_detection_pt.py b/tests/pytorch/test_models_detection_pt.py
index 39eae65168..8dac82d436 100644
--- a/tests/pytorch/test_models_detection_pt.py
+++ b/tests/pytorch/test_models_detection_pt.py
@@ -95,9 +95,13 @@ def test_detection_zoo(arch_name):
         input_tensor = input_tensor.cuda()
 
     with torch.no_grad():
-        out = predictor(input_tensor)
+        out, seq_maps = predictor(input_tensor, return_maps=True)
     assert all(isinstance(boxes, dict) for boxes in out)
     assert all(isinstance(boxes[CLASS_NAME], np.ndarray) and boxes[CLASS_NAME].shape[1] == 5 for boxes in out)
+    assert all(isinstance(seq_map, np.ndarray) for seq_map in seq_maps)
+    assert all(seq_map.shape[:2] == (1024, 1024) for seq_map in seq_maps)
+    # check that all values in the seq_maps are between 0 and 1
+    assert all((seq_map >= 0).all() and (seq_map <= 1).all() for seq_map in seq_maps)
 
 
 def test_erode():
diff --git a/tests/tensorflow/test_models_detection_tf.py b/tests/tensorflow/test_models_detection_tf.py
index ef8d6920ef..d5411f3027 100644
--- a/tests/tensorflow/test_models_detection_tf.py
+++ b/tests/tensorflow/test_models_detection_tf.py
@@ -146,9 +146,13 @@ def test_detection_zoo(arch_name):
     # object check
     assert isinstance(predictor, DetectionPredictor)
     input_tensor = tf.random.uniform(shape=[2, 1024, 1024, 3], minval=0, maxval=1)
-    out = predictor(input_tensor)
+    out, seq_maps = predictor(input_tensor, return_maps=True)
     assert all(isinstance(boxes, dict) for boxes in out)
     assert all(isinstance(boxes[CLASS_NAME], np.ndarray) and boxes[CLASS_NAME].shape[1] == 5 for boxes in out)
+    assert all(isinstance(seq_map, np.ndarray) for seq_map in seq_maps)
+    assert all(seq_map.shape[:2] == (1024, 1024) for seq_map in seq_maps)
+    # check that all values in the seq_maps are between 0 and 1
+    assert all((seq_map >= 0).all() and (seq_map <= 1).all() for seq_map in seq_maps)
 
 
 def test_detection_zoo_error():

From 768e00e703fa65135acb210fb9639621176e1948 Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Thu, 5 Oct 2023 11:50:00 +0200
Subject: [PATCH 05/13] more checks and tests

---
 doctr/models/_utils.py              | 4 +++-
 tests/common/test_models.py         | 3 +++
 tests/common/test_models_builder.py | 6 ++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py
index 2b189f216a..0f9cfb1c01 100644
--- a/doctr/models/_utils.py
+++ b/doctr/models/_utils.py
@@ -41,6 +41,7 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
         the angle of the general document orientation
     """
 
+    assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
     if np.max(img) <= 1 and np.min(img) >= 0 or (np.max(img) <= 255 and np.min(img) >= 0 and img.shape[-1] == 1):
         thresh = img.astype(np.uint8)
     if np.max(img) <= 255 and np.min(img) >= 0 and img.shape[-1] == 3:
@@ -72,7 +73,8 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
     if len(angles) == 0:
         return 0  # in case no angles is found
     else:
-        return -median_low(angles)
+        median = -median_low(angles)
+        return median if median != 0 else 0
 
 
 def rectify_crops(
diff --git a/tests/common/test_models.py b/tests/common/test_models.py
index 556b734990..25fb2c6c5f 100644
--- a/tests/common/test_models.py
+++ b/tests/common/test_models.py
@@ -52,6 +52,9 @@ def test_estimate_orientation(mock_image, mock_bitmap, mock_tilted_payslip):
     angle_rotated = estimate_orientation(rotated)
     assert abs(angle_rotated) < 1.0
 
+    with pytest.raises(AssertionError):
+        estimate_orientation(np.ones((10, 10, 10)))
+
 
 def test_get_lang():
     sentence = "This is a test sentence."
diff --git a/tests/common/test_models_builder.py b/tests/common/test_models_builder.py
index 90c681b5f6..0a8edadb39 100644
--- a/tests/common/test_models_builder.py
+++ b/tests/common/test_models_builder.py
@@ -29,6 +29,9 @@ def test_documentbuilder():
     out = doc_builder(pages, [boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)])
     assert isinstance(out, Document)
     assert len(out.pages) == num_pages
+    assert all([isinstance(page.page, np.ndarray) for page in out.pages]) and all(
+        [page.page.shape == (100, 200, 3) for page in out.pages]
+    )
     # 1 Block & 1 line per page
     assert len(out.pages[0].blocks) == 1 and len(out.pages[0].blocks[0].lines) == 1
     assert len(out.pages[0].blocks[0].lines[0].words) == words_per_page
@@ -79,6 +82,9 @@ def test_kiedocumentbuilder():
     )
     assert isinstance(out, KIEDocument)
     assert len(out.pages) == num_pages
+    assert all([isinstance(page.page, np.ndarray) for page in out.pages]) and all(
+        [page.page.shape == (100, 200, 3) for page in out.pages]
+    )
     # 1 Block & 1 line per page
     assert len(out.pages[0].predictions) == 1
     assert len(out.pages[0].predictions[CLASS_NAME]) == words_per_page

From 2e2f397963c356d0192c134866ac79b8002bc52c Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Thu, 5 Oct 2023 11:54:28 +0200
Subject: [PATCH 06/13] correct median

---
 doctr/models/_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py
index 0f9cfb1c01..ab2ec63715 100644
--- a/doctr/models/_utils.py
+++ b/doctr/models/_utils.py
@@ -74,7 +74,7 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
         return 0  # in case no angles is found
     else:
         median = -median_low(angles)
-        return median if median != 0 else 0
+        return median if abs(median) != 0 else 0
 
 
 def rectify_crops(

From 13b90cd2cbbd8fd17333c082488a482d15f8d73e Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Thu, 12 Oct 2023 11:01:05 +0200
Subject: [PATCH 07/13] rebase


From 0cbadb5232595ce85cf6396bea3a1ae72ee5d013 Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Fri, 13 Oct 2023 15:34:23 +0200
Subject: [PATCH 08/13] round angle to int

---
 doctr/models/_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py
index ab2ec63715..ec93c56f7e 100644
--- a/doctr/models/_utils.py
+++ b/doctr/models/_utils.py
@@ -27,7 +27,7 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
     return max(w / h, h / w)
 
 
-def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) -> float:
+def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) -> int:
     """Estimate the angle of the general document orientation based on the
      lines of the document and the assumption that they should be horizontal.
 
@@ -74,7 +74,7 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
         return 0  # in case no angles is found
     else:
         median = -median_low(angles)
-        return median if abs(median) != 0 else 0
+        return round(median) if abs(median) != 0 else 0
 
 
 def rectify_crops(

From 54e960467c086437a2882d7d99c50199a7406daf Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Mon, 16 Oct 2023 08:19:12 +0200
Subject: [PATCH 09/13] update doc string

---
 doctr/models/zoo.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/doctr/models/zoo.py b/doctr/models/zoo.py
index da806227c0..1dc131acd7 100644
--- a/doctr/models/zoo.py
+++ b/doctr/models/zoo.py
@@ -99,9 +99,10 @@ def ocr_predictor(
             (potentially rotated) as straight bounding boxes.
         detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
-        straighten_pages: if True, estimates the page general orientation based on the median line orientation.
-            Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
-            accordingly. Doing so will improve performances for documents with page-uniform rotations.
+        straighten_pages: if True, estimates the page general orientation
+            based on the segmentation map median line orientation.
+            Then, rotates page before passing it again to the deep learning detection module.
+            Doing so will improve performances for documents with page-uniform rotations.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
         kwargs: keyword args of `OCRPredictor`
@@ -212,9 +213,10 @@ def kie_predictor(
             (potentially rotated) as straight bounding boxes.
         detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
-        straighten_pages: if True, estimates the page general orientation based on the median line orientation.
-            Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
-            accordingly. Doing so will improve performances for documents with page-uniform rotations.
+        straighten_pages: if True, estimates the page general orientation
+            based on the segmentation map median line orientation.
+            Then, rotates page before passing it again to the deep learning detection module.
+            Doing so will improve performances for documents with page-uniform rotations.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
         kwargs: keyword args of `OCRPredictor`

From 3fc93cff2a5eabfc9539c4fbe50dec2546478a61 Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Fri, 10 Nov 2023 08:42:33 +0100
Subject: [PATCH 10/13] rebase


From d5c23b3a468e6a8377f66dde0ca4f2f2fab3f9a0 Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Wed, 15 Nov 2023 10:19:32 +0100
Subject: [PATCH 11/13] rebase

---
 doctr/models/_utils.py                         | 10 ++++++++--
 doctr/models/detection/predictor/pytorch.py    |  6 +++---
 doctr/models/detection/predictor/tensorflow.py |  2 +-
 doctr/models/kie_predictor/pytorch.py          | 11 ++++-------
 doctr/models/kie_predictor/tensorflow.py       |  2 +-
 doctr/models/predictor/base.py                 |  2 +-
 doctr/models/predictor/pytorch.py              | 11 ++++-------
 doctr/models/predictor/tensorflow.py           |  6 ++----
 tests/common/test_models_builder.py            |  8 ++++----
 9 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py
index ec93c56f7e..71828d3151 100644
--- a/doctr/models/_utils.py
+++ b/doctr/models/_utils.py
@@ -21,7 +21,9 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
     ----
         contour: the contour from cv2.findContour
 
-    Returns: the maximum shape ratio
+    Returns:
+    -------
+        the maximum shape ratio
     """
     _, (w, h), _ = cv2.minAreaRect(contour)
     return max(w / h, h / w)
@@ -32,6 +34,7 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
      lines of the document and the assumption that they should be horizontal.
 
     Args:
+    ----
         img: the img or bitmap to analyze (H, W, C)
         n_ct: the number of contours used for the orientation estimation
         ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
@@ -40,7 +43,6 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
     -------
         the angle of the general document orientation
     """
-
     assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
     if np.max(img) <= 1 and np.min(img) >= 0 or (np.max(img) <= 255 and np.min(img) >= 0 and img.shape[-1] == 1):
         thresh = img.astype(np.uint8)
@@ -119,9 +121,13 @@ def rectify_loc_preds(
 def get_language(text: str) -> Tuple[str, float]:
     """Get languages of a text using langdetect model.
     Get the language with the highest probability or no language if only a few words or a low probability
+
     Args:
+    ----
         text (str): text
+
     Returns:
+    -------
         The detected language in ISO 639 code and confidence score
     """
     try:
diff --git a/doctr/models/detection/predictor/pytorch.py b/doctr/models/detection/predictor/pytorch.py
index 05a6426503..b78dc4b759 100644
--- a/doctr/models/detection/predictor/pytorch.py
+++ b/doctr/models/detection/predictor/pytorch.py
@@ -53,9 +53,9 @@ def forward(
             self.model(batch, return_preds=True, return_model_output=True, **kwargs) for batch in processed_batches
         ]
         preds = [pred for batch in predicted_batches for pred in batch["preds"]]
-        seg_maps = [
-            pred.permute(1, 2, 0).detach().cpu().numpy() for batch in predicted_batches for pred in batch["out_map"]
-        ]
         if return_maps:
+            seg_maps = [
+                pred.permute(1, 2, 0).detach().cpu().numpy() for batch in predicted_batches for pred in batch["out_map"]
+            ]
             return preds, seg_maps
         return preds
diff --git a/doctr/models/detection/predictor/tensorflow.py b/doctr/models/detection/predictor/tensorflow.py
index 30a1d3aba1..d82b9f25f5 100644
--- a/doctr/models/detection/predictor/tensorflow.py
+++ b/doctr/models/detection/predictor/tensorflow.py
@@ -51,7 +51,7 @@ def __call__(
         ]
 
         preds = [pred for batch in predicted_batches for pred in batch["preds"]]
-        seg_maps = [pred.numpy() for batch in predicted_batches for pred in batch["out_map"]]
         if return_maps:
+            seg_maps = [pred.numpy() for batch in predicted_batches for pred in batch["out_map"]]
             return preds, seg_maps
         return preds
diff --git a/doctr/models/kie_predictor/pytorch.py b/doctr/models/kie_predictor/pytorch.py
index 115d8668db..e5dee4fffd 100644
--- a/doctr/models/kie_predictor/pytorch.py
+++ b/doctr/models/kie_predictor/pytorch.py
@@ -36,7 +36,7 @@ class KIEPredictor(nn.Module, _KIEPredictor):
             page. Doing so will slightly deteriorate the overall latency.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
-        kwargs: keyword args of `DocumentBuilder`
+        **kwargs: keyword args of `DocumentBuilder`
     """
 
     def __init__(
@@ -95,10 +95,7 @@ def forward(
                 if self.detect_orientation
                 else [estimate_orientation(seq_map) for seq_map in seg_maps]
             )
-            pages = [
-                rotate_image(page, -angle, expand=False)  # type: ignore[arg-type]
-                for page, angle in zip(pages, origin_page_orientations)
-            ]
+            pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
             # Forward again to get predictions on straight pages
             loc_preds = self.det_predictor(pages, **kwargs)
 
@@ -146,10 +143,10 @@ def forward(
             languages_dict = None
 
         out = self.doc_builder(
-            pages,  # type: ignore[arg-type]
+            pages,
             boxes_per_page,
             text_preds_per_page,
-            origin_page_shapes,  # type: ignore[arg-type]
+            origin_page_shapes,
             orientations,
             languages_dict,
         )
diff --git a/doctr/models/kie_predictor/tensorflow.py b/doctr/models/kie_predictor/tensorflow.py
index 94d6ccab58..6ac0a6221f 100644
--- a/doctr/models/kie_predictor/tensorflow.py
+++ b/doctr/models/kie_predictor/tensorflow.py
@@ -36,7 +36,7 @@ class KIEPredictor(NestedObject, _KIEPredictor):
             page. Doing so will slightly deteriorate the overall latency.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
-        kwargs: keyword args of `DocumentBuilder`
+        **kwargs: keyword args of `DocumentBuilder`
     """
 
     _children_names = ["det_predictor", "reco_predictor", "doc_builder"]
diff --git a/doctr/models/predictor/base.py b/doctr/models/predictor/base.py
index 1190606299..4de41e01e0 100644
--- a/doctr/models/predictor/base.py
+++ b/doctr/models/predictor/base.py
@@ -29,7 +29,7 @@ class _OCRPredictor:
             accordingly. Doing so will improve performances for documents with page-uniform rotations.
         preserve_aspect_ratio: if True, resize preserving the aspect ratio (with padding)
         symmetric_pad: if True and preserve_aspect_ratio is True, pas the image symmetrically.
-        kwargs: keyword args of `DocumentBuilder`
+        **kwargs: keyword args of `DocumentBuilder`
     """
 
     crop_orientation_predictor: Optional[CropOrientationPredictor]
diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py
index 350444fba3..874128c99f 100644
--- a/doctr/models/predictor/pytorch.py
+++ b/doctr/models/predictor/pytorch.py
@@ -36,7 +36,7 @@ class OCRPredictor(nn.Module, _OCRPredictor):
             page. Doing so will slightly deteriorate the overall latency.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
-        kwargs: keyword args of `DocumentBuilder`
+        **kwargs: keyword args of `DocumentBuilder`
     """
 
     def __init__(
@@ -90,10 +90,7 @@ def forward(
                 if self.detect_orientation
                 else [estimate_orientation(seq_map) for seq_map in seg_maps]
             )
-            pages = [
-                rotate_image(page, -angle, expand=False)  # type: ignore[arg-type]
-                for page, angle in zip(pages, origin_page_orientations)
-            ]
+            pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
             # Forward again to get predictions on straight pages
             loc_preds = self.det_predictor(pages, **kwargs)
 
@@ -130,10 +127,10 @@ def forward(
             languages_dict = None
 
         out = self.doc_builder(
-            pages,  # type: ignore[arg-type]
+            pages,
             boxes,
             text_preds,
-            origin_page_shapes,  # type: ignore[arg-type]
+            origin_page_shapes,
             orientations,
             languages_dict,
         )
diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py
index 5f747a01d4..5128711502 100644
--- a/doctr/models/predictor/tensorflow.py
+++ b/doctr/models/predictor/tensorflow.py
@@ -36,7 +36,7 @@ class OCRPredictor(NestedObject, _OCRPredictor):
             page. Doing so will slightly deteriorate the overall latency.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
-        kwargs: keyword args of `DocumentBuilder`
+        **kwargs: keyword args of `DocumentBuilder`
     """
 
     _children_names = ["det_predictor", "reco_predictor", "doc_builder"]
@@ -97,9 +97,7 @@ def __call__(
         assert all(
             len(loc_pred) == 1 for loc_pred in loc_preds_dict
         ), "Detection Model in ocr_predictor should output only one class"
-        loc_preds: List[np.ndarray] = [
-            list(loc_pred.values())[0] for loc_pred in loc_preds_dict  # type: ignore[union-attr]
-        ]
+        loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict]  # type: ignore[union-attr]
 
         # Rectify crops if aspect ratio
         loc_preds = self._remove_padding(pages, loc_preds)
diff --git a/tests/common/test_models_builder.py b/tests/common/test_models_builder.py
index 0a8edadb39..7d233dafb0 100644
--- a/tests/common/test_models_builder.py
+++ b/tests/common/test_models_builder.py
@@ -29,8 +29,8 @@ def test_documentbuilder():
     out = doc_builder(pages, [boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)])
     assert isinstance(out, Document)
     assert len(out.pages) == num_pages
-    assert all([isinstance(page.page, np.ndarray) for page in out.pages]) and all(
-        [page.page.shape == (100, 200, 3) for page in out.pages]
+    assert all(isinstance(page.page, np.ndarray) for page in out.pages) and all(
+        page.page.shape == (100, 200, 3) for page in out.pages
     )
     # 1 Block & 1 line per page
     assert len(out.pages[0].blocks) == 1 and len(out.pages[0].blocks[0].lines) == 1
@@ -82,8 +82,8 @@ def test_kiedocumentbuilder():
     )
     assert isinstance(out, KIEDocument)
     assert len(out.pages) == num_pages
-    assert all([isinstance(page.page, np.ndarray) for page in out.pages]) and all(
-        [page.page.shape == (100, 200, 3) for page in out.pages]
+    assert all(isinstance(page.page, np.ndarray) for page in out.pages) and all(
+        page.page.shape == (100, 200, 3) for page in out.pages
     )
     # 1 Block & 1 line per page
     assert len(out.pages[0].predictions) == 1

From 375486db4309d84a0f9f7b367fb5a43a9f862c12 Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Thu, 16 Nov 2023 15:13:03 +0100
Subject: [PATCH 12/13] fix docstrings

---
 doctr/io/elements.py    | 1 +
 doctr/models/builder.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/doctr/io/elements.py b/doctr/io/elements.py
index d94ade587e..c0d522a0a5 100644
--- a/doctr/io/elements.py
+++ b/doctr/io/elements.py
@@ -234,6 +234,7 @@ class Page(Element):
     """Implements a page element as a collection of blocks
 
     Args:
+    ----
         page: image encoded as a numpy array in uint8
         blocks: list of block elements
         page_idx: the index of the page in the input raw document
diff --git a/doctr/models/builder.py b/doctr/models/builder.py
index b974f7c0db..764b48ec37 100644
--- a/doctr/models/builder.py
+++ b/doctr/models/builder.py
@@ -297,6 +297,7 @@ def __call__(
         """Re-arrange detected words into structured blocks
 
         Args:
+        ----
             pages: list of N elements, where each element represents the page image
             boxes: list of N elements, where each element represents the localization predictions, of shape (*, 5)
                 or (*, 6) for all words for a given page
@@ -368,6 +369,7 @@ def __call__(  # type: ignore[override]
         """Re-arrange detected words into structured predictions
 
         Args:
+        ----
             pages: list of N elements, where each element represents the page image
             boxes: list of N dictionaries, where each element represents the localization predictions for a class,
                 of shape (*, 5) or (*, 6) for all predictions

From 727099d7e20f622aa1f7a1dd62452ebe6d001827 Mon Sep 17 00:00:00 2001
From: felix <felixdittrich92@gmail.com>
Date: Thu, 16 Nov 2023 18:12:30 +0100
Subject: [PATCH 13/13] apply suggestion

---
 doctr/models/_utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py
index 71828d3151..484538b1a0 100644
--- a/doctr/models/_utils.py
+++ b/doctr/models/_utils.py
@@ -44,9 +44,11 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
         the angle of the general document orientation
     """
     assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
-    if np.max(img) <= 1 and np.min(img) >= 0 or (np.max(img) <= 255 and np.min(img) >= 0 and img.shape[-1] == 1):
+    max_value = np.max(img)
+    min_value = np.min(img)
+    if max_value <= 1 and min_value >= 0 or (max_value <= 255 and min_value >= 0 and img.shape[-1] == 1):
         thresh = img.astype(np.uint8)
-    if np.max(img) <= 255 and np.min(img) >= 0 and img.shape[-1] == 3:
+    if max_value <= 255 and min_value >= 0 and img.shape[-1] == 3:
         gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         gray_img = cv2.medianBlur(gray_img, 5)
         thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]