[docs] documentation for changing predictors batch sizes (mindee#1514)

odulcy-mindee · Mar 15, 2024 · dddc2df · dddc2df
1 parent c2b197d
commit dddc2df
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 3 deletions.
diff --git a/docs/source/using_doctr/using_models.rst b/docs/source/using_doctr/using_models.rst
@@ -298,6 +298,16 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with
     from doctr.model import ocr_predictor
     model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True)
 
+Additionally, you can change the batch size of the underlying detection and recognition predictors to optimize the performance depending on your hardware:
+
+* `det_bs`: batch size for the detection model (default: 2)
+* `reco_bs`: batch size for the recognition model (default: 128)
+
+.. code:: python3
+
+    from doctr.model import ocr_predictor
+    model = ocr_predictor(pretrained=True, det_bs=4, reco_bs=1024)
+
 To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`:
 
 * `resolve_lines`: whether words should be automatically grouped into lines (default: True)

diff --git a/doctr/models/classification/zoo.py b/doctr/models/classification/zoo.py
@@ -42,7 +42,7 @@ def _crop_orientation_predictor(arch: str, pretrained: bool, **kwargs: Any) -> C
     _model = classification.__dict__[arch](pretrained=pretrained)
     kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
     kwargs["std"] = kwargs.get("std", _model.cfg["std"])
-    kwargs["batch_size"] = kwargs.get("batch_size", 64)
+    kwargs["batch_size"] = kwargs.get("batch_size", 128)
     input_shape = _model.cfg["input_shape"][:-1] if is_tf_available() else _model.cfg["input_shape"][1:]
     predictor = CropOrientationPredictor(
         PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs), _model

diff --git a/doctr/models/detection/zoo.py b/doctr/models/detection/zoo.py
@@ -62,7 +62,7 @@ def _predictor(arch: Any, pretrained: bool, assume_straight_pages: bool = True,
 
     kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
     kwargs["std"] = kwargs.get("std", _model.cfg["std"])
-    kwargs["batch_size"] = kwargs.get("batch_size", 1)
+    kwargs["batch_size"] = kwargs.get("batch_size", 2)
     predictor = DetectionPredictor(
         PreProcessor(_model.cfg["input_shape"][:-1] if is_tf_available() else _model.cfg["input_shape"][1:], **kwargs),
         _model,

diff --git a/doctr/models/recognition/zoo.py b/doctr/models/recognition/zoo.py
@@ -45,7 +45,7 @@ def _predictor(arch: Any, pretrained: bool, **kwargs: Any) -> RecognitionPredict
 
     kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
     kwargs["std"] = kwargs.get("std", _model.cfg["std"])
-    kwargs["batch_size"] = kwargs.get("batch_size", 32)
+    kwargs["batch_size"] = kwargs.get("batch_size", 128)
     input_shape = _model.cfg["input_shape"][:2] if is_tf_available() else _model.cfg["input_shape"][-2:]
     predictor = RecognitionPredictor(PreProcessor(input_shape, preserve_aspect_ratio=True, **kwargs), _model)