diff --git a/README.md b/README.md
index 242cff4d59..93cc877b70 100644
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ If both options are set to False, the predictor will always fit and return rotat
 To interpret your model's predictions, you can visualize them interactively as follows:
 
 ```python
-result.show(doc)
+result.show()
 ```
 
 ![Visualization sample](docs/images/doctr_example_script.gif)
diff --git a/doctr/io/elements.py b/doctr/io/elements.py
index 4e92a4043f..c0d522a0a5 100644
--- a/doctr/io/elements.py
+++ b/doctr/io/elements.py
@@ -235,6 +235,7 @@ class Page(Element):
 
     Args:
     ----
+        page: image encoded as a numpy array in uint8
         blocks: list of block elements
         page_idx: the index of the page in the input raw document
         dimensions: the page size in pixels in format (height, width)
@@ -248,6 +249,7 @@ class Page(Element):
 
     def __init__(
         self,
+        page: np.ndarray,
         blocks: List[Block],
         page_idx: int,
         dimensions: Tuple[int, int],
@@ -255,6 +257,7 @@ def __init__(
         language: Optional[Dict[str, Any]] = None,
     ) -> None:
         super().__init__(blocks=blocks)
+        self.page = page
         self.page_idx = page_idx
         self.dimensions = dimensions
         self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None)
@@ -267,17 +270,15 @@ def render(self, block_break: str = "\n\n") -> str:
     def extra_repr(self) -> str:
         return f"dimensions={self.dimensions}"
 
-    def show(self, page: np.ndarray, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
+    def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
         """Overlay the result on a given image
 
         Args:
-        ----
-            page: image encoded as a numpy array in uint8
             interactive: whether the display should be interactive
             preserve_aspect_ratio: pass True if you passed True to the predictor
             **kwargs: additional keyword arguments passed to the matplotlib.pyplot.show method
         """
-        visualize_page(self.export(), page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
+        visualize_page(self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
         plt.show(**kwargs)
 
     def synthesize(self, **kwargs) -> np.ndarray:
@@ -408,6 +409,7 @@ class KIEPage(Element):
     Args:
     ----
         predictions: Dictionary with list of block elements for each detection class
+        page: image encoded as a numpy array in uint8
         page_idx: the index of the page in the input raw document
         dimensions: the page size in pixels in format (height, width)
         orientation: a dictionary with the value of the rotation angle in degress and confidence of the prediction
@@ -420,6 +422,7 @@ class KIEPage(Element):
 
     def __init__(
         self,
+        page: np.ndarray,
         predictions: Dict[str, List[Prediction]],
         page_idx: int,
         dimensions: Tuple[int, int],
@@ -427,6 +430,7 @@ def __init__(
         language: Optional[Dict[str, Any]] = None,
     ) -> None:
         super().__init__(predictions=predictions)
+        self.page = page
         self.page_idx = page_idx
         self.dimensions = dimensions
         self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None)
@@ -441,17 +445,17 @@ def render(self, prediction_break: str = "\n\n") -> str:
     def extra_repr(self) -> str:
         return f"dimensions={self.dimensions}"
 
-    def show(self, page: np.ndarray, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
+    def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
         """Overlay the result on a given image
 
         Args:
-        ----
-            page: image encoded as a numpy array in uint8
             interactive: whether the display should be interactive
             preserve_aspect_ratio: pass True if you passed True to the predictor
             **kwargs: keyword arguments passed to the matplotlib.pyplot.show method
         """
-        visualize_kie_page(self.export(), page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
+        visualize_kie_page(
+            self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio
+        )
         plt.show(**kwargs)
 
     def synthesize(self, **kwargs) -> np.ndarray:
@@ -561,16 +565,10 @@ def render(self, page_break: str = "\n\n\n\n") -> str:
         """Renders the full text of the element"""
         return page_break.join(p.render() for p in self.pages)
 
-    def show(self, pages: List[np.ndarray], **kwargs) -> None:
-        """Overlay the result on a given image
-
-        Args:
-        ----
-            pages: list of images encoded as numpy arrays in uint8
-            **kwargs: keyword arguments passed to the Page.show method
-        """
-        for img, result in zip(pages, self.pages):
-            result.show(img, **kwargs)
+    def show(self, **kwargs) -> None:
+        """Overlay the result on a given image"""
+        for result in self.pages:
+            result.show(**kwargs)
 
     def synthesize(self, **kwargs) -> List[np.ndarray]:
         """Synthesize all pages from their predictions
diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py
index 1021afdc0c..484538b1a0 100644
--- a/doctr/models/_utils.py
+++ b/doctr/models/_utils.py
@@ -11,7 +11,7 @@
 import numpy as np
 from langdetect import LangDetectException, detect_langs
 
-__all__ = ["estimate_orientation", "get_bitmap_angle", "get_language", "invert_data_structure"]
+__all__ = ["estimate_orientation", "get_language", "invert_data_structure"]
 
 
 def get_max_width_length_ratio(contour: np.ndarray) -> float:
@@ -21,19 +21,21 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
     ----
         contour: the contour from cv2.findContour
 
-    Returns: the maximum shape ratio
+    Returns:
+    -------
+        the maximum shape ratio
     """
     _, (w, h), _ = cv2.minAreaRect(contour)
     return max(w / h, h / w)
 
 
-def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) -> float:
+def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_lines: float = 5) -> int:
     """Estimate the angle of the general document orientation based on the
      lines of the document and the assumption that they should be horizontal.
 
     Args:
     ----
-        img: the img to analyze
+        img: the img or bitmap to analyze (H, W, C)
         n_ct: the number of contours used for the orientation estimation
         ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
 
@@ -41,9 +43,15 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
     -------
         the angle of the general document orientation
     """
-    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    gray_img = cv2.medianBlur(gray_img, 5)
-    thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+    assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
+    max_value = np.max(img)
+    min_value = np.min(img)
+    if max_value <= 1 and min_value >= 0 or (max_value <= 255 and min_value >= 0 and img.shape[-1] == 1):
+        thresh = img.astype(np.uint8)
+    if max_value <= 255 and min_value >= 0 and img.shape[-1] == 3:
+        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        gray_img = cv2.medianBlur(gray_img, 5)
+        thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
 
     # try to merge words in lines
     (h, w) = img.shape[:2]
@@ -69,47 +77,8 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li
     if len(angles) == 0:
         return 0  # in case no angles is found
     else:
-        return -median_low(angles)
-
-
-def get_bitmap_angle(bitmap: np.ndarray, n_ct: int = 20, std_max: float = 3.0) -> float:
-    """From a binarized segmentation map, find contours and fit min area rectangles to determine page angle
-
-    Args:
-    ----
-        bitmap: binarized segmentation map
-        n_ct: number of contours to use to fit page angle
-        std_max: maximum deviation of the angle distribution to consider the mean angle reliable
-
-    Returns:
-    -------
-        The angle of the page
-    """
-    # Find all contours on binarized seg map
-    contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
-    # Sort contours
-    contours = sorted(contours, key=cv2.contourArea, reverse=True)
-
-    # Find largest contours and fit angles
-    # Track heights and widths to find aspect ratio (determine is rotation is clockwise)
-    angles, heights, widths = [], [], []
-    for ct in contours[:n_ct]:
-        _, (w, h), alpha = cv2.minAreaRect(ct)
-        widths.append(w)
-        heights.append(h)
-        angles.append(alpha)
-
-    if np.std(angles) > std_max:
-        # Edge case with angles of both 0 and 90°, or multi_oriented docs
-        angle = 0.0
-    else:
-        angle = -np.mean(angles)
-        # Determine rotation direction (clockwise/counterclockwise)
-        # Angle coverage: [-90°, +90°], half of the quadrant
-        if np.sum(widths) < np.sum(heights):  # CounterClockwise
-            angle = 90 + angle
-
-    return angle
+        median = -median_low(angles)
+        return round(median) if abs(median) != 0 else 0
 
 
 def rectify_crops(
@@ -154,9 +123,13 @@ def rectify_loc_preds(
 def get_language(text: str) -> Tuple[str, float]:
     """Get languages of a text using langdetect model.
     Get the language with the highest probability or no language if only a few words or a low probability
+
     Args:
+    ----
         text (str): text
+
     Returns:
+    -------
         The detected language in ISO 639 code and confidence score
     """
     try:
diff --git a/doctr/models/builder.py b/doctr/models/builder.py
index 820689bbac..764b48ec37 100644
--- a/doctr/models/builder.py
+++ b/doctr/models/builder.py
@@ -287,6 +287,7 @@ def extra_repr(self) -> str:
 
     def __call__(
         self,
+        pages: List[np.ndarray],
         boxes: List[np.ndarray],
         text_preds: List[List[Tuple[str, float]]],
         page_shapes: List[Tuple[int, int]],
@@ -297,6 +298,7 @@ def __call__(
 
         Args:
         ----
+            pages: list of N elements, where each element represents the page image
             boxes: list of N elements, where each element represents the localization predictions, of shape (*, 5)
                 or (*, 6) for all words for a given page
             text_preds: list of N elements, where each element is the list of all word prediction (text + confidence)
@@ -325,6 +327,7 @@ def __call__(
 
         _pages = [
             Page(
+                page,
                 self._build_blocks(
                     page_boxes,
                     word_preds,
@@ -334,8 +337,8 @@ def __call__(
                 orientation,
                 language,
             )
-            for _idx, shape, page_boxes, word_preds, orientation, language in zip(
-                range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
+            for page, _idx, shape, page_boxes, word_preds, orientation, language in zip(
+                pages, range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
             )
         ]
 
@@ -356,6 +359,7 @@ class KIEDocumentBuilder(DocumentBuilder):
 
     def __call__(  # type: ignore[override]
         self,
+        pages: List[np.ndarray],
         boxes: List[Dict[str, np.ndarray]],
         text_preds: List[Dict[str, List[Tuple[str, float]]]],
         page_shapes: List[Tuple[int, int]],
@@ -366,6 +370,7 @@ def __call__(  # type: ignore[override]
 
         Args:
         ----
+            pages: list of N elements, where each element represents the page image
             boxes: list of N dictionaries, where each element represents the localization predictions for a class,
                 of shape (*, 5) or (*, 6) for all predictions
             text_preds: list of N dictionaries, where each element is the list of all word prediction
@@ -400,6 +405,7 @@ def __call__(  # type: ignore[override]
 
         _pages = [
             KIEPage(
+                page,
                 {
                     k: self._build_blocks(
                         page_boxes[k],
@@ -412,8 +418,8 @@ def __call__(  # type: ignore[override]
                 orientation,
                 language,
             )
-            for _idx, shape, page_boxes, word_preds, orientation, language in zip(
-                range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
+            for page, _idx, shape, page_boxes, word_preds, orientation, language in zip(
+                pages, range(len(boxes)), page_shapes, boxes, text_preds, _orientations, _languages
             )
         ]
 
diff --git a/doctr/models/detection/predictor/pytorch.py b/doctr/models/detection/predictor/pytorch.py
index 8202abca8d..b78dc4b759 100644
--- a/doctr/models/detection/predictor/pytorch.py
+++ b/doctr/models/detection/predictor/pytorch.py
@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 
-from typing import Any, List, Union
+from typing import Any, Dict, List, Tuple, Union
 
 import numpy as np
 import torch
@@ -37,8 +37,9 @@ def __init__(
     def forward(
         self,
         pages: List[Union[np.ndarray, torch.Tensor]],
+        return_maps: bool = False,
         **kwargs: Any,
-    ) -> List[np.ndarray]:
+    ) -> Union[List[Dict[str, np.ndarray]], Tuple[List[Dict[str, np.ndarray]], List[np.ndarray]]]:
         # Dimension check
         if any(page.ndim != 3 for page in pages):
             raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")
@@ -48,5 +49,13 @@ def forward(
         self.model, processed_batches = set_device_and_dtype(
             self.model, processed_batches, _params.device, _params.dtype
         )
-        predicted_batches = [self.model(batch, return_preds=True, **kwargs)["preds"] for batch in processed_batches]
-        return [pred for batch in predicted_batches for pred in batch]
+        predicted_batches = [
+            self.model(batch, return_preds=True, return_model_output=True, **kwargs) for batch in processed_batches
+        ]
+        preds = [pred for batch in predicted_batches for pred in batch["preds"]]
+        if return_maps:
+            seg_maps = [
+                pred.permute(1, 2, 0).detach().cpu().numpy() for batch in predicted_batches for pred in batch["out_map"]
+            ]
+            return preds, seg_maps
+        return preds
diff --git a/doctr/models/detection/predictor/tensorflow.py b/doctr/models/detection/predictor/tensorflow.py
index 80251ff96e..d82b9f25f5 100644
--- a/doctr/models/detection/predictor/tensorflow.py
+++ b/doctr/models/detection/predictor/tensorflow.py
@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Tuple, Union
 
 import numpy as np
 import tensorflow as tf
@@ -37,14 +37,21 @@ def __init__(
     def __call__(
         self,
         pages: List[Union[np.ndarray, tf.Tensor]],
+        return_maps: bool = False,
         **kwargs: Any,
-    ) -> List[Dict[str, np.ndarray]]:
+    ) -> Union[List[Dict[str, np.ndarray]], Tuple[List[Dict[str, np.ndarray]], List[np.ndarray]]]:
         # Dimension check
         if any(page.ndim != 3 for page in pages):
             raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")
 
         processed_batches = self.pre_processor(pages)
         predicted_batches = [
-            self.model(batch, return_preds=True, training=False, **kwargs)["preds"] for batch in processed_batches
+            self.model(batch, return_preds=True, return_model_output=True, training=False, **kwargs)
+            for batch in processed_batches
         ]
-        return [pred for batch in predicted_batches for pred in batch]
+
+        preds = [pred for batch in predicted_batches for pred in batch["preds"]]
+        if return_maps:
+            seg_maps = [pred.numpy() for batch in predicted_batches for pred in batch["out_map"]]
+            return preds, seg_maps
+        return preds
diff --git a/doctr/models/kie_predictor/pytorch.py b/doctr/models/kie_predictor/pytorch.py
index 520dcdaf0e..e5dee4fffd 100644
--- a/doctr/models/kie_predictor/pytorch.py
+++ b/doctr/models/kie_predictor/pytorch.py
@@ -13,7 +13,7 @@
 from doctr.models._utils import estimate_orientation, get_language, invert_data_structure
 from doctr.models.detection.predictor import DetectionPredictor
 from doctr.models.recognition.predictor import RecognitionPredictor
-from doctr.utils.geometry import rotate_boxes, rotate_image
+from doctr.utils.geometry import rotate_image
 
 from .base import _KIEPredictor
 
@@ -36,7 +36,7 @@ class KIEPredictor(nn.Module, _KIEPredictor):
             page. Doing so will slightly deteriorate the overall latency.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
-        kwargs: keyword args of `DocumentBuilder`
+        **kwargs: keyword args of `DocumentBuilder`
     """
 
     def __init__(
@@ -72,22 +72,33 @@ def forward(
 
         origin_page_shapes = [page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:] for page in pages]
 
+        # Localize text elements
+        loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
+
         # Detect document rotation and rotate pages
+        seg_maps = [
+            np.where(np.expand_dims(np.amax(out_map, axis=-1), axis=-1) > kwargs.get("bin_thresh", 0.3), 255, 0).astype(
+                np.uint8
+            )
+            for out_map in out_maps
+        ]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(page) for page in pages]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seg_maps]
             orientations = [
-                {"value": orientation_page, "confidence": 1.0} for orientation_page in origin_page_orientations
+                {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
         else:
             orientations = None
         if self.straighten_pages:
             origin_page_orientations = (
-                origin_page_orientations if self.detect_orientation else [estimate_orientation(page) for page in pages]
+                origin_page_orientations
+                if self.detect_orientation
+                else [estimate_orientation(seq_map) for seq_map in seg_maps]
             )
-            pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)]
+            pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
+            # Forward again to get predictions on straight pages
+            loc_preds = self.det_predictor(pages, **kwargs)
 
-        # Localize text elements
-        loc_preds = self.det_predictor(pages, **kwargs)
         dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore[assignment]
         # Check whether crop mode should be switched to channels first
         channels_last = len(pages) == 0 or isinstance(pages[0], np.ndarray)
@@ -130,27 +141,12 @@ def forward(
             languages_dict = [{"value": lang[0], "confidence": lang[1]} for lang in languages]
         else:
             languages_dict = None
-        # Rotate back pages and boxes while keeping original image size
-        if self.straighten_pages:
-            boxes_per_page = [
-                {
-                    k: rotate_boxes(
-                        page_boxes,
-                        angle,
-                        orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[1:],
-                        target_shape=mask,
-                    )
-                    for k, page_boxes in page_boxes_dict.items()
-                }
-                for page_boxes_dict, page, angle, mask in zip(
-                    boxes_per_page, pages, origin_page_orientations, origin_page_shapes
-                )
-            ]
 
         out = self.doc_builder(
+            pages,
             boxes_per_page,
             text_preds_per_page,
-            [page.shape[:2] if channels_last else page.shape[-2:] for page in pages],  # type: ignore[misc]
+            origin_page_shapes,
             orientations,
             languages_dict,
         )
diff --git a/doctr/models/kie_predictor/tensorflow.py b/doctr/models/kie_predictor/tensorflow.py
index d6dca51520..6ac0a6221f 100644
--- a/doctr/models/kie_predictor/tensorflow.py
+++ b/doctr/models/kie_predictor/tensorflow.py
@@ -12,7 +12,7 @@
 from doctr.models._utils import estimate_orientation, get_language, invert_data_structure
 from doctr.models.detection.predictor import DetectionPredictor
 from doctr.models.recognition.predictor import RecognitionPredictor
-from doctr.utils.geometry import rotate_boxes, rotate_image
+from doctr.utils.geometry import rotate_image
 from doctr.utils.repr import NestedObject
 
 from .base import _KIEPredictor
@@ -36,7 +36,7 @@ class KIEPredictor(NestedObject, _KIEPredictor):
             page. Doing so will slightly deteriorate the overall latency.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
-        kwargs: keyword args of `DocumentBuilder`
+        **kwargs: keyword args of `DocumentBuilder`
     """
 
     _children_names = ["det_predictor", "reco_predictor", "doc_builder"]
@@ -72,24 +72,34 @@ def __call__(
 
         origin_page_shapes = [page.shape[:2] for page in pages]
 
+        # Localize text elements
+        loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
+
         # Detect document rotation and rotate pages
+        seg_maps = [
+            np.where(np.expand_dims(np.amax(out_map, axis=-1), axis=-1) > kwargs.get("bin_thresh", 0.3), 255, 0).astype(
+                np.uint8
+            )
+            for out_map in out_maps
+        ]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(page) for page in pages]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seg_maps]
             orientations = [
-                {"value": orientation_page, "confidence": 1.0} for orientation_page in origin_page_orientations
+                {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
         else:
             orientations = None
         if self.straighten_pages:
             origin_page_orientations = (
-                origin_page_orientations if self.detect_orientation else [estimate_orientation(page) for page in pages]
+                origin_page_orientations
+                if self.detect_orientation
+                else [estimate_orientation(seq_map) for seq_map in seg_maps]
             )
-            pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)]
-
-        # Localize text elements
-        loc_preds = self.det_predictor(pages, **kwargs)
+            pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
+            # Forward again to get predictions on straight pages
+            loc_preds = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]
 
-        dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore[assignment]
+        dict_loc_preds: Dict[str, List[np.ndarray]] = invert_data_structure(loc_preds)  # type: ignore
         # Rectify crops if aspect ratio
         dict_loc_preds = {k: self._remove_padding(pages, loc_pred) for k, loc_pred in dict_loc_preds.items()}
 
@@ -127,24 +137,9 @@ def __call__(
             languages_dict = [{"value": lang[0], "confidence": lang[1]} for lang in languages]
         else:
             languages_dict = None
-        # Rotate back pages and boxes while keeping original image size
-        if self.straighten_pages:
-            boxes_per_page = [
-                {
-                    k: rotate_boxes(
-                        page_boxes,
-                        angle,
-                        orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:],
-                        target_shape=mask,  # type: ignore[arg-type]
-                    )
-                    for k, page_boxes in page_boxes_dict.items()
-                }
-                for page_boxes_dict, page, angle, mask in zip(
-                    boxes_per_page, pages, origin_page_orientations, origin_page_shapes
-                )
-            ]
 
         out = self.doc_builder(
+            pages,
             boxes_per_page,
             text_preds_per_page,
             origin_page_shapes,  # type: ignore[arg-type]
diff --git a/doctr/models/predictor/base.py b/doctr/models/predictor/base.py
index 1190606299..4de41e01e0 100644
--- a/doctr/models/predictor/base.py
+++ b/doctr/models/predictor/base.py
@@ -29,7 +29,7 @@ class _OCRPredictor:
             accordingly. Doing so will improve performances for documents with page-uniform rotations.
         preserve_aspect_ratio: if True, resize preserving the aspect ratio (with padding)
         symmetric_pad: if True and preserve_aspect_ratio is True, pas the image symmetrically.
-        kwargs: keyword args of `DocumentBuilder`
+        **kwargs: keyword args of `DocumentBuilder`
     """
 
     crop_orientation_predictor: Optional[CropOrientationPredictor]
diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py
index 59b34c8dca..874128c99f 100644
--- a/doctr/models/predictor/pytorch.py
+++ b/doctr/models/predictor/pytorch.py
@@ -13,7 +13,7 @@
 from doctr.models._utils import estimate_orientation, get_language
 from doctr.models.detection.predictor import DetectionPredictor
 from doctr.models.recognition.predictor import RecognitionPredictor
-from doctr.utils.geometry import rotate_boxes, rotate_image
+from doctr.utils.geometry import rotate_image
 
 from .base import _OCRPredictor
 
@@ -36,7 +36,7 @@ class OCRPredictor(nn.Module, _OCRPredictor):
             page. Doing so will slightly deteriorate the overall latency.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
-        kwargs: keyword args of `DocumentBuilder`
+        **kwargs: keyword args of `DocumentBuilder`
     """
 
     def __init__(
@@ -72,22 +72,28 @@ def forward(
 
         origin_page_shapes = [page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:] for page in pages]
 
+        # Localize text elements
+        loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
+
         # Detect document rotation and rotate pages
+        seg_maps = [np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0).astype(np.uint8) for out_map in out_maps]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(page) for page in pages]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seg_maps]
             orientations = [
-                {"value": orientation_page, "confidence": 1.0} for orientation_page in origin_page_orientations
+                {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
         else:
             orientations = None
         if self.straighten_pages:
             origin_page_orientations = (
-                origin_page_orientations if self.detect_orientation else [estimate_orientation(page) for page in pages]
+                origin_page_orientations
+                if self.detect_orientation
+                else [estimate_orientation(seq_map) for seq_map in seg_maps]
             )
-            pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)]
+            pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
+            # Forward again to get predictions on straight pages
+            loc_preds = self.det_predictor(pages, **kwargs)
 
-        # Localize text elements
-        loc_preds = self.det_predictor(pages, **kwargs)
         assert all(
             len(loc_pred) == 1 for loc_pred in loc_preds
         ), "Detection Model in ocr_predictor should output only one class"
@@ -119,22 +125,12 @@ def forward(
             languages_dict = [{"value": lang[0], "confidence": lang[1]} for lang in languages]
         else:
             languages_dict = None
-        # Rotate back pages and boxes while keeping original image size
-        if self.straighten_pages:
-            boxes = [
-                rotate_boxes(
-                    page_boxes,
-                    angle,
-                    orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[1:],
-                    target_shape=mask,
-                )
-                for page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations, origin_page_shapes)
-            ]
 
         out = self.doc_builder(
+            pages,
             boxes,
             text_preds,
-            [page.shape[:2] if channels_last else page.shape[-2:] for page in pages],  # type: ignore[misc]
+            origin_page_shapes,
             orientations,
             languages_dict,
         )
diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py
index 9ac31b3957..5128711502 100644
--- a/doctr/models/predictor/tensorflow.py
+++ b/doctr/models/predictor/tensorflow.py
@@ -12,7 +12,7 @@
 from doctr.models._utils import estimate_orientation, get_language
 from doctr.models.detection.predictor import DetectionPredictor
 from doctr.models.recognition.predictor import RecognitionPredictor
-from doctr.utils.geometry import rotate_boxes, rotate_image
+from doctr.utils.geometry import rotate_image
 from doctr.utils.repr import NestedObject
 
 from .base import _OCRPredictor
@@ -36,7 +36,7 @@ class OCRPredictor(NestedObject, _OCRPredictor):
             page. Doing so will slightly deteriorate the overall latency.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
-        kwargs: keyword args of `DocumentBuilder`
+        **kwargs: keyword args of `DocumentBuilder`
     """
 
     _children_names = ["det_predictor", "reco_predictor", "doc_builder"]
@@ -72,27 +72,32 @@ def __call__(
 
         origin_page_shapes = [page.shape[:2] for page in pages]
 
+        # Localize text elements
+        loc_preds_dict, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
+
         # Detect document rotation and rotate pages
+        seg_maps = [np.where(out_map > kwargs.get("bin_thresh", 0.3), 255, 0).astype(np.uint8) for out_map in out_maps]
         if self.detect_orientation:
-            origin_page_orientations = [estimate_orientation(page) for page in pages]
+            origin_page_orientations = [estimate_orientation(seq_map) for seq_map in seg_maps]
             orientations = [
-                {"value": orientation_page, "confidence": 1.0} for orientation_page in origin_page_orientations
+                {"value": orientation_page, "confidence": None} for orientation_page in origin_page_orientations
             ]
         else:
             orientations = None
         if self.straighten_pages:
             origin_page_orientations = (
-                origin_page_orientations if self.detect_orientation else [estimate_orientation(page) for page in pages]
+                origin_page_orientations
+                if self.detect_orientation
+                else [estimate_orientation(seq_map) for seq_map in seg_maps]
             )
-            pages = [rotate_image(page, -angle, expand=True) for page, angle in zip(pages, origin_page_orientations)]
+            pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)]
+            # forward again to get predictions on straight pages
+            loc_preds_dict = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]
 
-        # Localize text elements
-        loc_preds_dict = self.det_predictor(pages, **kwargs)
         assert all(
             len(loc_pred) == 1 for loc_pred in loc_preds_dict
         ), "Detection Model in ocr_predictor should output only one class"
-
-        loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict]
+        loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict]  # type: ignore[union-attr]
 
         # Rectify crops if aspect ratio
         loc_preds = self._remove_padding(pages, loc_preds)
@@ -115,19 +120,9 @@ def __call__(
             languages_dict = [{"value": lang[0], "confidence": lang[1]} for lang in languages]
         else:
             languages_dict = None
-        # Rotate back pages and boxes while keeping original image size
-        if self.straighten_pages:
-            boxes = [
-                rotate_boxes(
-                    page_boxes,
-                    angle,
-                    orig_shape=page.shape[:2] if isinstance(page, np.ndarray) else page.shape[-2:],
-                    target_shape=mask,  # type: ignore[arg-type]
-                )
-                for page_boxes, page, angle, mask in zip(boxes, pages, origin_page_orientations, origin_page_shapes)
-            ]
 
         out = self.doc_builder(
+            pages,
             boxes,
             text_preds,
             origin_page_shapes,  # type: ignore[arg-type]
diff --git a/doctr/models/zoo.py b/doctr/models/zoo.py
index c7842124e6..1dc131acd7 100644
--- a/doctr/models/zoo.py
+++ b/doctr/models/zoo.py
@@ -24,6 +24,7 @@ def _predictor(
     det_bs: int = 2,
     reco_bs: int = 128,
     detect_orientation: bool = False,
+    straighten_pages: bool = False,
     detect_language: bool = False,
     **kwargs,
 ) -> OCRPredictor:
@@ -53,6 +54,7 @@ def _predictor(
         preserve_aspect_ratio=preserve_aspect_ratio,
         symmetric_pad=symmetric_pad,
         detect_orientation=detect_orientation,
+        straighten_pages=straighten_pages,
         detect_language=detect_language,
         **kwargs,
     )
@@ -68,6 +70,7 @@ def ocr_predictor(
     symmetric_pad: bool = True,
     export_as_straight_boxes: bool = False,
     detect_orientation: bool = False,
+    straighten_pages: bool = False,
     detect_language: bool = False,
     **kwargs: Any,
 ) -> OCRPredictor:
@@ -96,6 +99,10 @@ def ocr_predictor(
             (potentially rotated) as straight bounding boxes.
         detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
+        straighten_pages: if True, estimates the page general orientation
+            based on the segmentation map median line orientation.
+            Then, rotates page before passing it again to the deep learning detection module.
+            Doing so will improve performances for documents with page-uniform rotations.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
         kwargs: keyword args of `OCRPredictor`
@@ -114,6 +121,7 @@ def ocr_predictor(
         symmetric_pad=symmetric_pad,
         export_as_straight_boxes=export_as_straight_boxes,
         detect_orientation=detect_orientation,
+        straighten_pages=straighten_pages,
         detect_language=detect_language,
         **kwargs,
     )
@@ -130,6 +138,7 @@ def _kie_predictor(
     det_bs: int = 2,
     reco_bs: int = 128,
     detect_orientation: bool = False,
+    straighten_pages: bool = False,
     detect_language: bool = False,
     **kwargs,
 ) -> KIEPredictor:
@@ -159,6 +168,7 @@ def _kie_predictor(
         preserve_aspect_ratio=preserve_aspect_ratio,
         symmetric_pad=symmetric_pad,
         detect_orientation=detect_orientation,
+        straighten_pages=straighten_pages,
         detect_language=detect_language,
         **kwargs,
     )
@@ -174,6 +184,7 @@ def kie_predictor(
     symmetric_pad: bool = True,
     export_as_straight_boxes: bool = False,
     detect_orientation: bool = False,
+    straighten_pages: bool = False,
     detect_language: bool = False,
     **kwargs: Any,
 ) -> KIEPredictor:
@@ -202,6 +213,10 @@ def kie_predictor(
             (potentially rotated) as straight bounding boxes.
         detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
+        straighten_pages: if True, estimates the page general orientation
+            based on the segmentation map median line orientation.
+            Then, rotates page before passing it again to the deep learning detection module.
+            Doing so will improve performances for documents with page-uniform rotations.
         detect_language: if True, the language prediction will be added to the predictions for each
             page. Doing so will slightly deteriorate the overall latency.
         kwargs: keyword args of `OCRPredictor`
@@ -220,6 +235,7 @@ def kie_predictor(
         symmetric_pad=symmetric_pad,
         export_as_straight_boxes=export_as_straight_boxes,
         detect_orientation=detect_orientation,
+        straighten_pages=straighten_pages,
         detect_language=detect_language,
         **kwargs,
     )
diff --git a/scripts/analyze.py b/scripts/analyze.py
index 067ed62685..2e0f19c034 100644
--- a/scripts/analyze.py
+++ b/scripts/analyze.py
@@ -31,8 +31,8 @@ def main(args):
 
     out = model(doc)
 
-    for page, img in zip(out.pages, doc):
-        page.show(img, block=not args.noblock, interactive=not args.static)
+    for page in out.pages:
+        page.show(block=not args.noblock, interactive=not args.static)
 
 
 def parse_args():
diff --git a/tests/common/test_io_elements.py b/tests/common/test_io_elements.py
index 965033290a..af982c6e04 100644
--- a/tests/common/test_io_elements.py
+++ b/tests/common/test_io_elements.py
@@ -72,6 +72,7 @@ def _mock_blocks(size=(1, 1), offset=(0, 0)):
 def _mock_pages(block_size=(1, 1), block_offset=(0, 0)):
     return [
         elements.Page(
+            np.random.randint(0, 255, (300, 200, 3), dtype=np.uint8),
             _mock_blocks(block_size, block_offset),
             0,
             (300, 200),
@@ -79,6 +80,7 @@ def _mock_pages(block_size=(1, 1), block_offset=(0, 0)):
             {"value": "EN", "confidence": 0.8},
         ),
         elements.Page(
+            np.random.randint(0, 255, (500, 1000, 3), dtype=np.uint8),
             _mock_blocks(block_size, block_offset),
             1,
             (500, 1000),
@@ -91,6 +93,7 @@ def _mock_pages(block_size=(1, 1), block_offset=(0, 0)):
 def _mock_kie_pages(prediction_size=(1, 1), prediction_offset=(0, 0)):
     return [
         elements.KIEPage(
+            np.random.randint(0, 255, (300, 200, 3), dtype=np.uint8),
             {CLASS_NAME: _mock_prediction(prediction_size, prediction_offset)},
             0,
             (300, 200),
@@ -98,6 +101,7 @@ def _mock_kie_pages(prediction_size=(1, 1), prediction_offset=(0, 0)):
             {"value": "EN", "confidence": 0.8},
         ),
         elements.KIEPage(
+            np.random.randint(0, 255, (500, 1000, 3), dtype=np.uint8),
             {CLASS_NAME: _mock_prediction(prediction_size, prediction_offset)},
             1,
             (500, 1000),
@@ -243,16 +247,18 @@ def test_block():
 
 
 def test_page():
+    page = np.zeros((300, 200, 3), dtype=np.uint8)
     page_idx = 0
     page_size = (300, 200)
     orientation = {"value": 0.0, "confidence": 0.0}
     language = {"value": "EN", "confidence": 0.8}
     blocks = _mock_blocks()
-    page = elements.Page(blocks, page_idx, page_size, orientation, language)
+    page = elements.Page(page, blocks, page_idx, page_size, orientation, language)
 
     # Attribute checks
     assert len(page.blocks) == len(blocks)
     assert all(isinstance(b, elements.Block) for b in page.blocks)
+    assert isinstance(page.page, np.ndarray)
     assert page.page_idx == page_idx
     assert page.dimensions == page_size
     assert page.orientation == orientation
@@ -281,7 +287,7 @@ def test_page():
     assert "\n".join(repr(page).split("\n")[:2]) == f"Page(\n  dimensions={page_size!r}"
 
     # Show
-    page.show(np.zeros((256, 256, 3), dtype=np.uint8), block=False)
+    page.show(block=False)
 
     # Synthesize
     img = page.synthesize()
@@ -290,16 +296,18 @@ def test_page():
 
 
 def test_kiepage():
+    page = np.zeros((300, 200, 3), dtype=np.uint8)
     page_idx = 0
     page_size = (300, 200)
     orientation = {"value": 0.0, "confidence": 0.0}
     language = {"value": "EN", "confidence": 0.8}
     predictions = {CLASS_NAME: _mock_prediction()}
-    kie_page = elements.KIEPage(predictions, page_idx, page_size, orientation, language)
+    kie_page = elements.KIEPage(page, predictions, page_idx, page_size, orientation, language)
 
     # Attribute checks
     assert len(kie_page.predictions) == len(predictions)
     assert all(isinstance(b, elements.Prediction) for b in kie_page.predictions[CLASS_NAME])
+    assert isinstance(kie_page.page, np.ndarray)
     assert kie_page.page_idx == page_idx
     assert kie_page.dimensions == page_size
     assert kie_page.orientation == orientation
@@ -328,7 +336,7 @@ def test_kiepage():
     assert "\n".join(repr(kie_page).split("\n")[:2]) == f"KIEPage(\n  dimensions={page_size!r}"
 
     # Show
-    kie_page.show(np.zeros((256, 256, 3), dtype=np.uint8), block=False)
+    kie_page.show(block=False)
 
     # Synthesize
     img = kie_page.synthesize()
@@ -355,7 +363,7 @@ def test_document():
     assert isinstance(doc.export_as_xml(), list) and len(doc.export_as_xml()) == len(pages)
 
     # Show
-    doc.show([np.zeros((256, 256, 3), dtype=np.uint8) for _ in range(len(pages))], block=False)
+    doc.show(block=False)
 
     # Synthesize
     img_list = doc.synthesize()
@@ -381,7 +389,7 @@ def test_kie_document():
     assert isinstance(doc.export_as_xml(), list) and len(doc.export_as_xml()) == len(pages)
 
     # Show
-    doc.show([np.zeros((256, 256, 3), dtype=np.uint8) for _ in range(len(pages))], block=False)
+    doc.show(block=False)
 
     # Synthesize
     img_list = doc.synthesize()
diff --git a/tests/common/test_models.py b/tests/common/test_models.py
index fea26024b1..25fb2c6c5f 100644
--- a/tests/common/test_models.py
+++ b/tests/common/test_models.py
@@ -6,7 +6,7 @@
 import requests
 
 from doctr.io import reader
-from doctr.models._utils import estimate_orientation, get_bitmap_angle, get_language, invert_data_structure
+from doctr.models._utils import estimate_orientation, get_language, invert_data_structure
 from doctr.utils import geometry
 
 
@@ -24,16 +24,19 @@ def mock_image(tmpdir_factory):
 @pytest.fixture(scope="function")
 def mock_bitmap(mock_image):
     bitmap = np.squeeze(cv2.cvtColor(mock_image, cv2.COLOR_BGR2GRAY) / 255.0)
+    bitmap = np.expand_dims(bitmap, axis=-1)
     return bitmap
 
 
-def test_get_bitmap_angle(mock_bitmap):
-    angle = get_bitmap_angle(mock_bitmap)
-    assert abs(angle - 30.0) < 1.0
+def test_estimate_orientation(mock_image, mock_bitmap, mock_tilted_payslip):
+    assert estimate_orientation(mock_image * 0) == 0
 
+    # test binarized image
+    angle = estimate_orientation(mock_bitmap)
+    assert abs(angle - 30.0) < 1.0
 
-def test_estimate_orientation(mock_image, mock_tilted_payslip):
-    assert estimate_orientation(mock_image * 0) == 0
+    angle = estimate_orientation(mock_bitmap * 255)
+    assert abs(angle - 30.0) < 1.0
 
     angle = estimate_orientation(mock_image)
     assert abs(angle - 30.0) < 1.0
@@ -49,6 +52,9 @@ def test_estimate_orientation(mock_image, mock_tilted_payslip):
     angle_rotated = estimate_orientation(rotated)
     assert abs(angle_rotated) < 1.0
 
+    with pytest.raises(AssertionError):
+        estimate_orientation(np.ones((10, 10, 10)))
+
 
 def test_get_lang():
     sentence = "This is a test sentence."
diff --git a/tests/common/test_models_builder.py b/tests/common/test_models_builder.py
index 7940bf8a5d..7d233dafb0 100644
--- a/tests/common/test_models_builder.py
+++ b/tests/common/test_models_builder.py
@@ -20,25 +20,29 @@ def test_documentbuilder():
 
     # Don't resolve lines
     doc_builder = builder.DocumentBuilder(resolve_lines=False, resolve_blocks=False)
+    pages = [np.zeros((100, 200, 3))] * num_pages
     boxes = np.random.rand(words_per_page, 6)  # array format
     boxes[:2] *= boxes[2:4]
     # Arg consistency check
     with pytest.raises(ValueError):
-        doc_builder([boxes, boxes], [("hello", 1.0)] * 3, [(100, 200), (100, 200)])
-    out = doc_builder([boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)])
+        doc_builder(pages, [boxes, boxes], [("hello", 1.0)] * 3, [(100, 200), (100, 200)])
+    out = doc_builder(pages, [boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)])
     assert isinstance(out, Document)
     assert len(out.pages) == num_pages
+    assert all(isinstance(page.page, np.ndarray) for page in out.pages) and all(
+        page.page.shape == (100, 200, 3) for page in out.pages
+    )
     # 1 Block & 1 line per page
     assert len(out.pages[0].blocks) == 1 and len(out.pages[0].blocks[0].lines) == 1
     assert len(out.pages[0].blocks[0].lines[0].words) == words_per_page
 
     # Resolve lines
     doc_builder = builder.DocumentBuilder(resolve_lines=True, resolve_blocks=True)
-    out = doc_builder([boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)])
+    out = doc_builder(pages, [boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)])
 
     # No detection
     boxes = np.zeros((0, 5))
-    out = doc_builder([boxes, boxes], [[], []], [(100, 200), (100, 200)])
+    out = doc_builder(pages, [boxes, boxes], [[], []], [(100, 200), (100, 200)])
     assert len(out.pages[0].blocks) == 0
 
     # Rotated boxes to export as straight boxes
@@ -49,7 +53,7 @@ def test_documentbuilder():
         ]
     )
     doc_builder_2 = builder.DocumentBuilder(resolve_blocks=False, resolve_lines=False, export_as_straight_boxes=True)
-    out = doc_builder_2([boxes], [[("hello", 0.99), ("word", 0.99)]], [(100, 100)])
+    out = doc_builder_2([np.zeros((100, 100, 3))], [boxes], [[("hello", 0.99), ("word", 0.99)]], [(100, 100)])
     assert out.pages[0].blocks[0].lines[0].words[-1].geometry == ((0.45, 0.5), (0.6, 0.65))
 
     # Repr
@@ -64,18 +68,23 @@ def test_kiedocumentbuilder():
 
     # Don't resolve lines
     doc_builder = builder.KIEDocumentBuilder(resolve_lines=False, resolve_blocks=False)
+    pages = [np.zeros((100, 200, 3))] * num_pages
     predictions = {CLASS_NAME: np.random.rand(words_per_page, 6)}  # dict format
     predictions[CLASS_NAME][:2] *= predictions[CLASS_NAME][2:4]
     # Arg consistency check
     with pytest.raises(ValueError):
-        doc_builder([predictions, predictions], [{CLASS_NAME: ("hello", 1.0)}] * 3, [(100, 200), (100, 200)])
+        doc_builder(pages, [predictions, predictions], [{CLASS_NAME: ("hello", 1.0)}] * 3, [(100, 200), (100, 200)])
     out = doc_builder(
+        pages,
         [predictions, predictions],
         [{CLASS_NAME: [("hello", 1.0)] * words_per_page}] * num_pages,
         [(100, 200), (100, 200)],
     )
     assert isinstance(out, KIEDocument)
     assert len(out.pages) == num_pages
+    assert all(isinstance(page.page, np.ndarray) for page in out.pages) and all(
+        page.page.shape == (100, 200, 3) for page in out.pages
+    )
     # 1 Block & 1 line per page
     assert len(out.pages[0].predictions) == 1
     assert len(out.pages[0].predictions[CLASS_NAME]) == words_per_page
@@ -83,6 +92,7 @@ def test_kiedocumentbuilder():
     # Resolve lines
     doc_builder = builder.KIEDocumentBuilder(resolve_lines=True, resolve_blocks=True)
     out = doc_builder(
+        pages,
         [predictions, predictions],
         [{CLASS_NAME: [("hello", 1.0)] * words_per_page}] * num_pages,
         [(100, 200), (100, 200)],
@@ -90,7 +100,7 @@ def test_kiedocumentbuilder():
 
     # No detection
     predictions = {CLASS_NAME: np.zeros((0, 5))}
-    out = doc_builder([predictions, predictions], [{CLASS_NAME: []}, {CLASS_NAME: []}], [(100, 200), (100, 200)])
+    out = doc_builder(pages, [predictions, predictions], [{CLASS_NAME: []}, {CLASS_NAME: []}], [(100, 200), (100, 200)])
     assert len(out.pages[0].predictions[CLASS_NAME]) == 0
 
     # Rotated boxes to export as straight boxes
@@ -103,7 +113,9 @@ def test_kiedocumentbuilder():
         )
     }
     doc_builder_2 = builder.KIEDocumentBuilder(resolve_blocks=False, resolve_lines=False, export_as_straight_boxes=True)
-    out = doc_builder_2([predictions], [{CLASS_NAME: [("hello", 0.99), ("word", 0.99)]}], [(100, 100)])
+    out = doc_builder_2(
+        [np.zeros((100, 100, 3))], [predictions], [{CLASS_NAME: [("hello", 0.99), ("word", 0.99)]}], [(100, 100)]
+    )
     assert out.pages[0].predictions[CLASS_NAME][0].geometry == ((0.05, 0.1), (0.2, 0.25))
     assert out.pages[0].predictions[CLASS_NAME][1].geometry == ((0.45, 0.5), (0.6, 0.65))
 
diff --git a/tests/pytorch/test_models_detection_pt.py b/tests/pytorch/test_models_detection_pt.py
index 39eae65168..8dac82d436 100644
--- a/tests/pytorch/test_models_detection_pt.py
+++ b/tests/pytorch/test_models_detection_pt.py
@@ -95,9 +95,13 @@ def test_detection_zoo(arch_name):
         input_tensor = input_tensor.cuda()
 
     with torch.no_grad():
-        out = predictor(input_tensor)
+        out, seq_maps = predictor(input_tensor, return_maps=True)
     assert all(isinstance(boxes, dict) for boxes in out)
     assert all(isinstance(boxes[CLASS_NAME], np.ndarray) and boxes[CLASS_NAME].shape[1] == 5 for boxes in out)
+    assert all(isinstance(seq_map, np.ndarray) for seq_map in seq_maps)
+    assert all(seq_map.shape[:2] == (1024, 1024) for seq_map in seq_maps)
+    # check that all values in the seq_maps are between 0 and 1
+    assert all((seq_map >= 0).all() and (seq_map <= 1).all() for seq_map in seq_maps)
 
 
 def test_erode():
diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py
index cefb77176f..fa3f23b9d1 100644
--- a/tests/pytorch/test_models_zoo_pt.py
+++ b/tests/pytorch/test_models_zoo_pt.py
@@ -73,10 +73,17 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
     assert out.pages[0].orientation["value"] == orientation
 
 
-def test_trained_ocr_predictor(mock_tilted_payslip):
-    doc = DocumentFile.from_images(mock_tilted_payslip)
+def test_trained_ocr_predictor(mock_payslip):
+    doc = DocumentFile.from_images(mock_payslip)
 
-    det_predictor = detection_predictor("db_resnet50", pretrained=True, batch_size=2, assume_straight_pages=True)
+    det_predictor = detection_predictor(
+        "db_resnet50",
+        pretrained=True,
+        batch_size=2,
+        assume_straight_pages=True,
+        symmetric_pad=True,
+        preserve_aspect_ratio=False,
+    )
     reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128)
 
     predictor = OCRPredictor(
@@ -90,16 +97,12 @@ def test_trained_ocr_predictor(mock_tilted_payslip):
     out = predictor(doc)
 
     assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr."
-    geometry_mr = np.array(
-        [[0.08563021, 0.35584526], [0.11464554, 0.34078913], [0.1274898, 0.36012764], [0.09847447, 0.37518377]]
-    )
-    assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr)
+    geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]])
+    assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr, rtol=0.05)
 
     assert out.pages[0].blocks[1].lines[0].words[-1].value == "revised"
-    geometry_revised = np.array(
-        [[0.50422498, 0.19551784], [0.55741975, 0.16791493], [0.56705294, 0.18241881], [0.51385817, 0.21002172]]
-    )
-    assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised)
+    geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]])
+    assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised, rtol=0.05)
 
     det_predictor = detection_predictor(
         "db_resnet50",
@@ -181,10 +184,17 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
     assert out.pages[0].orientation["value"] == orientation
 
 
-def test_trained_kie_predictor(mock_tilted_payslip):
-    doc = DocumentFile.from_images(mock_tilted_payslip)
+def test_trained_kie_predictor(mock_payslip):
+    doc = DocumentFile.from_images(mock_payslip)
 
-    det_predictor = detection_predictor("db_resnet50", pretrained=True, batch_size=2, assume_straight_pages=True)
+    det_predictor = detection_predictor(
+        "db_resnet50",
+        pretrained=True,
+        batch_size=2,
+        assume_straight_pages=True,
+        symmetric_pad=True,
+        preserve_aspect_ratio=False,
+    )
     reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128)
 
     predictor = KIEPredictor(
@@ -199,17 +209,12 @@ def test_trained_kie_predictor(mock_tilted_payslip):
 
     assert isinstance(out, KIEDocument)
     assert out.pages[0].predictions[CLASS_NAME][0].value == "Mr."
-    geometry_mr = np.array(
-        [[0.08563021, 0.35584526], [0.11464554, 0.34078913], [0.1274898, 0.36012764], [0.09847447, 0.37518377]]
-    )
-    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][0].geometry), geometry_mr)
+    geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]])
+    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][0].geometry), geometry_mr, rtol=0.05)
 
-    print(out.pages[0].predictions[CLASS_NAME])
-    assert out.pages[0].predictions[CLASS_NAME][7].value == "revised"
-    geometry_revised = np.array(
-        [[0.50422498, 0.19551784], [0.55741975, 0.16791493], [0.56705294, 0.18241881], [0.51385817, 0.21002172]]
-    )
-    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][7].geometry), geometry_revised)
+    assert out.pages[0].predictions[CLASS_NAME][6].value == "revised"
+    geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]])
+    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][6].geometry), geometry_revised, rtol=0.05)
 
     det_predictor = detection_predictor(
         "db_resnet50",
diff --git a/tests/tensorflow/test_models_detection_tf.py b/tests/tensorflow/test_models_detection_tf.py
index ef8d6920ef..d5411f3027 100644
--- a/tests/tensorflow/test_models_detection_tf.py
+++ b/tests/tensorflow/test_models_detection_tf.py
@@ -146,9 +146,13 @@ def test_detection_zoo(arch_name):
     # object check
     assert isinstance(predictor, DetectionPredictor)
     input_tensor = tf.random.uniform(shape=[2, 1024, 1024, 3], minval=0, maxval=1)
-    out = predictor(input_tensor)
+    out, seq_maps = predictor(input_tensor, return_maps=True)
     assert all(isinstance(boxes, dict) for boxes in out)
     assert all(isinstance(boxes[CLASS_NAME], np.ndarray) and boxes[CLASS_NAME].shape[1] == 5 for boxes in out)
+    assert all(isinstance(seq_map, np.ndarray) for seq_map in seq_maps)
+    assert all(seq_map.shape[:2] == (1024, 1024) for seq_map in seq_maps)
+    # check that all values in the seq_maps are between 0 and 1
+    assert all((seq_map >= 0).all() and (seq_map <= 1).all() for seq_map in seq_maps)
 
 
 def test_detection_zoo_error():
diff --git a/tests/tensorflow/test_models_zoo_tf.py b/tests/tensorflow/test_models_zoo_tf.py
index 6d4b85e2c8..32e7988560 100644
--- a/tests/tensorflow/test_models_zoo_tf.py
+++ b/tests/tensorflow/test_models_zoo_tf.py
@@ -72,10 +72,17 @@ def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
     assert out.pages[0].language["value"] == language
 
 
-def test_trained_ocr_predictor(mock_tilted_payslip):
-    doc = DocumentFile.from_images(mock_tilted_payslip)
+def test_trained_ocr_predictor(mock_payslip):
+    doc = DocumentFile.from_images(mock_payslip)
 
-    det_predictor = detection_predictor("db_resnet50", pretrained=True, batch_size=2, assume_straight_pages=True)
+    det_predictor = detection_predictor(
+        "db_resnet50",
+        pretrained=True,
+        batch_size=2,
+        assume_straight_pages=True,
+        symmetric_pad=True,
+        preserve_aspect_ratio=False,
+    )
     reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128)
 
     predictor = OCRPredictor(
@@ -89,16 +96,12 @@ def test_trained_ocr_predictor(mock_tilted_payslip):
     out = predictor(doc)
 
     assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr."
-    geometry_mr = np.array(
-        [[0.08844472, 0.35763523], [0.11625107, 0.34320644], [0.12588427, 0.35771032], [0.09807791, 0.37213911]]
-    )
-    assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr)
+    geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]])
+    assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr, rtol=0.05)
 
     assert out.pages[0].blocks[1].lines[0].words[-1].value == "revised"
-    geometry_revised = np.array(
-        [[0.50422498, 0.19551784], [0.55741975, 0.16791493], [0.56705294, 0.18241881], [0.51385817, 0.21002172]]
-    )
-    assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised)
+    geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]])
+    assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised, rtol=0.05)
 
     det_predictor = detection_predictor(
         "db_resnet50",
@@ -179,10 +182,17 @@ def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pa
     assert out.pages[0].language["value"] == language
 
 
-def test_trained_kie_predictor(mock_tilted_payslip):
-    doc = DocumentFile.from_images(mock_tilted_payslip)
+def test_trained_kie_predictor(mock_payslip):
+    doc = DocumentFile.from_images(mock_payslip)
 
-    det_predictor = detection_predictor("db_resnet50", pretrained=True, batch_size=2, assume_straight_pages=True)
+    det_predictor = detection_predictor(
+        "db_resnet50",
+        pretrained=True,
+        batch_size=2,
+        assume_straight_pages=True,
+        symmetric_pad=True,
+        preserve_aspect_ratio=False,
+    )
     reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128)
 
     predictor = KIEPredictor(
@@ -197,16 +207,12 @@ def test_trained_kie_predictor(mock_tilted_payslip):
 
     assert isinstance(out, KIEDocument)
     assert out.pages[0].predictions[CLASS_NAME][0].value == "Mr."
-    geometry_mr = np.array(
-        [[0.08844472, 0.35763523], [0.11625107, 0.34320644], [0.12588427, 0.35771032], [0.09807791, 0.37213911]]
-    )
-    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][0].geometry), geometry_mr)
+    geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]])
+    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][0].geometry), geometry_mr, rtol=0.05)
 
-    assert out.pages[0].predictions[CLASS_NAME][-1].value == "Kabir)"
-    geometry_revised = np.array(
-        [[0.43725992, 0.67232439], [0.49045468, 0.64472149], [0.50570724, 0.66768597], [0.452512473, 0.69528887]]
-    )
-    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][-1].geometry), geometry_revised)
+    assert out.pages[0].predictions[CLASS_NAME][3].value == "revised"
+    geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]])
+    assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][3].geometry), geometry_revised, rtol=0.05)
 
     det_predictor = detection_predictor(
         "db_resnet50",