From d5c23b3a468e6a8377f66dde0ca4f2f2fab3f9a0 Mon Sep 17 00:00:00 2001 From: felix Date: Wed, 15 Nov 2023 10:19:32 +0100 Subject: [PATCH] rebase --- doctr/models/_utils.py | 10 ++++++++-- doctr/models/detection/predictor/pytorch.py | 6 +++--- doctr/models/detection/predictor/tensorflow.py | 2 +- doctr/models/kie_predictor/pytorch.py | 11 ++++------- doctr/models/kie_predictor/tensorflow.py | 2 +- doctr/models/predictor/base.py | 2 +- doctr/models/predictor/pytorch.py | 11 ++++------- doctr/models/predictor/tensorflow.py | 6 ++---- tests/common/test_models_builder.py | 8 ++++---- 9 files changed, 28 insertions(+), 30 deletions(-) diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index ec93c56f7e..71828d3151 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -21,7 +21,9 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float: ---- contour: the contour from cv2.findContour - Returns: the maximum shape ratio + Returns: + ------- + the maximum shape ratio """ _, (w, h), _ = cv2.minAreaRect(contour) return max(w / h, h / w) @@ -32,6 +34,7 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li lines of the document and the assumption that they should be horizontal. Args: + ---- img: the img or bitmap to analyze (H, W, C) n_ct: the number of contours used for the orientation estimation ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines @@ -40,7 +43,6 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li ------- the angle of the general document orientation """ - assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported" if np.max(img) <= 1 and np.min(img) >= 0 or (np.max(img) <= 255 and np.min(img) >= 0 and img.shape[-1] == 1): thresh = img.astype(np.uint8) @@ -119,9 +121,13 @@ def rectify_loc_preds( def get_language(text: str) -> Tuple[str, float]: """Get languages of a text using langdetect model. Get the language with the highest probability or no language if only a few words or a low probability + Args: + ---- text (str): text + Returns: + ------- The detected language in ISO 639 code and confidence score """ try: diff --git a/doctr/models/detection/predictor/pytorch.py b/doctr/models/detection/predictor/pytorch.py index 05a6426503..b78dc4b759 100644 --- a/doctr/models/detection/predictor/pytorch.py +++ b/doctr/models/detection/predictor/pytorch.py @@ -53,9 +53,9 @@ def forward( self.model(batch, return_preds=True, return_model_output=True, **kwargs) for batch in processed_batches ] preds = [pred for batch in predicted_batches for pred in batch["preds"]] - seg_maps = [ - pred.permute(1, 2, 0).detach().cpu().numpy() for batch in predicted_batches for pred in batch["out_map"] - ] if return_maps: + seg_maps = [ + pred.permute(1, 2, 0).detach().cpu().numpy() for batch in predicted_batches for pred in batch["out_map"] + ] return preds, seg_maps return preds diff --git a/doctr/models/detection/predictor/tensorflow.py b/doctr/models/detection/predictor/tensorflow.py index 30a1d3aba1..d82b9f25f5 100644 --- a/doctr/models/detection/predictor/tensorflow.py +++ b/doctr/models/detection/predictor/tensorflow.py @@ -51,7 +51,7 @@ def __call__( ] preds = [pred for batch in predicted_batches for pred in batch["preds"]] - seg_maps = [pred.numpy() for batch in predicted_batches for pred in batch["out_map"]] if return_maps: + seg_maps = [pred.numpy() for batch in predicted_batches for pred in batch["out_map"]] return preds, seg_maps return preds diff --git a/doctr/models/kie_predictor/pytorch.py b/doctr/models/kie_predictor/pytorch.py index 115d8668db..e5dee4fffd 100644 --- a/doctr/models/kie_predictor/pytorch.py +++ b/doctr/models/kie_predictor/pytorch.py @@ -36,7 +36,7 @@ class KIEPredictor(nn.Module, _KIEPredictor): page. Doing so will slightly deteriorate the overall latency. detect_language: if True, the language prediction will be added to the predictions for each page. Doing so will slightly deteriorate the overall latency. - kwargs: keyword args of `DocumentBuilder` + **kwargs: keyword args of `DocumentBuilder` """ def __init__( @@ -95,10 +95,7 @@ def forward( if self.detect_orientation else [estimate_orientation(seq_map) for seq_map in seg_maps] ) - pages = [ - rotate_image(page, -angle, expand=False) # type: ignore[arg-type] - for page, angle in zip(pages, origin_page_orientations) - ] + pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)] # Forward again to get predictions on straight pages loc_preds = self.det_predictor(pages, **kwargs) @@ -146,10 +143,10 @@ def forward( languages_dict = None out = self.doc_builder( - pages, # type: ignore[arg-type] + pages, boxes_per_page, text_preds_per_page, - origin_page_shapes, # type: ignore[arg-type] + origin_page_shapes, orientations, languages_dict, ) diff --git a/doctr/models/kie_predictor/tensorflow.py b/doctr/models/kie_predictor/tensorflow.py index 94d6ccab58..6ac0a6221f 100644 --- a/doctr/models/kie_predictor/tensorflow.py +++ b/doctr/models/kie_predictor/tensorflow.py @@ -36,7 +36,7 @@ class KIEPredictor(NestedObject, _KIEPredictor): page. Doing so will slightly deteriorate the overall latency. detect_language: if True, the language prediction will be added to the predictions for each page. Doing so will slightly deteriorate the overall latency. - kwargs: keyword args of `DocumentBuilder` + **kwargs: keyword args of `DocumentBuilder` """ _children_names = ["det_predictor", "reco_predictor", "doc_builder"] diff --git a/doctr/models/predictor/base.py b/doctr/models/predictor/base.py index 1190606299..4de41e01e0 100644 --- a/doctr/models/predictor/base.py +++ b/doctr/models/predictor/base.py @@ -29,7 +29,7 @@ class _OCRPredictor: accordingly. Doing so will improve performances for documents with page-uniform rotations. preserve_aspect_ratio: if True, resize preserving the aspect ratio (with padding) symmetric_pad: if True and preserve_aspect_ratio is True, pas the image symmetrically. - kwargs: keyword args of `DocumentBuilder` + **kwargs: keyword args of `DocumentBuilder` """ crop_orientation_predictor: Optional[CropOrientationPredictor] diff --git a/doctr/models/predictor/pytorch.py b/doctr/models/predictor/pytorch.py index 350444fba3..874128c99f 100644 --- a/doctr/models/predictor/pytorch.py +++ b/doctr/models/predictor/pytorch.py @@ -36,7 +36,7 @@ class OCRPredictor(nn.Module, _OCRPredictor): page. Doing so will slightly deteriorate the overall latency. detect_language: if True, the language prediction will be added to the predictions for each page. Doing so will slightly deteriorate the overall latency. - kwargs: keyword args of `DocumentBuilder` + **kwargs: keyword args of `DocumentBuilder` """ def __init__( @@ -90,10 +90,7 @@ def forward( if self.detect_orientation else [estimate_orientation(seq_map) for seq_map in seg_maps] ) - pages = [ - rotate_image(page, -angle, expand=False) # type: ignore[arg-type] - for page, angle in zip(pages, origin_page_orientations) - ] + pages = [rotate_image(page, -angle, expand=False) for page, angle in zip(pages, origin_page_orientations)] # Forward again to get predictions on straight pages loc_preds = self.det_predictor(pages, **kwargs) @@ -130,10 +127,10 @@ def forward( languages_dict = None out = self.doc_builder( - pages, # type: ignore[arg-type] + pages, boxes, text_preds, - origin_page_shapes, # type: ignore[arg-type] + origin_page_shapes, orientations, languages_dict, ) diff --git a/doctr/models/predictor/tensorflow.py b/doctr/models/predictor/tensorflow.py index 5f747a01d4..5128711502 100644 --- a/doctr/models/predictor/tensorflow.py +++ b/doctr/models/predictor/tensorflow.py @@ -36,7 +36,7 @@ class OCRPredictor(NestedObject, _OCRPredictor): page. Doing so will slightly deteriorate the overall latency. detect_language: if True, the language prediction will be added to the predictions for each page. Doing so will slightly deteriorate the overall latency. - kwargs: keyword args of `DocumentBuilder` + **kwargs: keyword args of `DocumentBuilder` """ _children_names = ["det_predictor", "reco_predictor", "doc_builder"] @@ -97,9 +97,7 @@ def __call__( assert all( len(loc_pred) == 1 for loc_pred in loc_preds_dict ), "Detection Model in ocr_predictor should output only one class" - loc_preds: List[np.ndarray] = [ - list(loc_pred.values())[0] for loc_pred in loc_preds_dict # type: ignore[union-attr] - ] + loc_preds: List[np.ndarray] = [list(loc_pred.values())[0] for loc_pred in loc_preds_dict] # type: ignore[union-attr] # Rectify crops if aspect ratio loc_preds = self._remove_padding(pages, loc_preds) diff --git a/tests/common/test_models_builder.py b/tests/common/test_models_builder.py index 0a8edadb39..7d233dafb0 100644 --- a/tests/common/test_models_builder.py +++ b/tests/common/test_models_builder.py @@ -29,8 +29,8 @@ def test_documentbuilder(): out = doc_builder(pages, [boxes, boxes], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)]) assert isinstance(out, Document) assert len(out.pages) == num_pages - assert all([isinstance(page.page, np.ndarray) for page in out.pages]) and all( - [page.page.shape == (100, 200, 3) for page in out.pages] + assert all(isinstance(page.page, np.ndarray) for page in out.pages) and all( + page.page.shape == (100, 200, 3) for page in out.pages ) # 1 Block & 1 line per page assert len(out.pages[0].blocks) == 1 and len(out.pages[0].blocks[0].lines) == 1 @@ -82,8 +82,8 @@ def test_kiedocumentbuilder(): ) assert isinstance(out, KIEDocument) assert len(out.pages) == num_pages - assert all([isinstance(page.page, np.ndarray) for page in out.pages]) and all( - [page.page.shape == (100, 200, 3) for page in out.pages] + assert all(isinstance(page.page, np.ndarray) for page in out.pages) and all( + page.page.shape == (100, 200, 3) for page in out.pages ) # 1 Block & 1 line per page assert len(out.pages[0].predictions) == 1