[UPDATE] docs

mindee · Oct 27, 2023 · 2b3a578 · 2b3a578
1 parent e257a29
commit 2b3a578
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 3 deletions.
diff --git a/docs/source/using_doctr/using_datasets.rst b/docs/source/using_doctr/using_datasets.rst
@@ -86,7 +86,7 @@ This datasets contains the information to train or validate a text recognition m
 +-----------------------------+---------------------------------+---------------------------------+---------------------------------------------+
 | IIITHWS                     | 7141797                         | 793533                          | english / handwritten / external resources  |
 +-----------------------------+---------------------------------+---------------------------------+---------------------------------------------+
-| WILDRECEIPT                 | 1268                            | 472                             | english / external resources                |
+| WILDRECEIPT                 | 49377                           | 19598                           | english / external resources                |
 +-----------------------------+---------------------------------+---------------------------------+---------------------------------------------+
 
 .. code:: python3

diff --git a/doctr/datasets/wildreceipt.py b/doctr/datasets/wildreceipt.py
@@ -25,7 +25,7 @@ class WILDRECEIPT(AbstractDataset):
         <https://arxiv.org/abs/2103.14470v1>`_ |
     `repository <https://download.openmmlab.com/mmocr/data/wildreceipt.tar>`_.
 
-    >>> # NOTE: You need to download the dataset from the repository.
+    >>> # NOTE: You need to download the dataset first.
     >>> from doctr.datasets import WILDRECEIPT
     >>> train_set = WILDRECEIPT(train=True, img_folder="/path/to/wildreceipt/",
     >>>                     label_path="/path/to/wildreceipt/train.txt")
@@ -99,7 +99,8 @@ def __init__(
                     img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
                 )
                 for crop, label in zip(crops, list(text_targets)):
-                    self.data.append((crop, label))
+                    if not any(char in label for char in ["", "-", "*", "/", "=", "#", "@"]):
+                        self.data.append((crop, label))
             else:
                 self.data.append(
                     (img_path, dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)))