update models, and bump ver to v2.0.1

breezedeus · Sep 21, 2021 · 752340d · 752340d
1 parent dda89ff
commit 752340d
Show file tree

Hide file tree

Showing 9 changed files with 53 additions and 43 deletions.
diff --git a/Makefile b/Makefile
@@ -24,7 +24,7 @@ predict:
 package:
 	python setup.py sdist bdist_wheel
 
-VERSION = 2.0.0
+VERSION = 2.0.1
 upload:
 	python -m twine upload  dist/cnocr-$(VERSION)* --verbose
 

diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@ English [README](./README_en.md) (`out-dated`).
 
 # cnocr
 
-**cnocr** 是 **Python 3** 下的**文字识别**（**Optical Character Recognition**，简称**OCR**）工具包，支持**中文**、**英文**的常见字符识别，自带了多个训练好的检测模型，安装后即可直接使用。欢迎扫码加入QQ交流群：
+**cnocr** 是 **Python 3** 下的**文字识别**（**Optical Character Recognition**，简称**OCR**）工具包，支持**中文**、**英文**的常见字符识别，自带了多个训练好的识别模型，安装后即可直接使用。欢迎扫码加入QQ交流群：
 
 ![QQ群二维码](./docs/cnocr-qq.jpg)
 
@@ -87,7 +87,7 @@ cnocr的ocr模型可以分为两阶段：第一阶段是获得ocr图片的局部
 
 
 
-cnocr **V2.0** 目前包含以下可直接使用的模型，训练好的模型都放在 **[cnocr-models](https://github.com/breezedeus/cnocr-models)** 项目中，可免费下载使用：
+cnocr **V2.0** 目前包含以下可直接使用的模型，训练好的模型都放在 **[cnstd-cnocr-models](https://github.com/breezedeus/cnstd-cnocr-models)** 项目中，可免费下载使用：
 
 | 模型名称 | 局部编码模型 | 序列编码模型 | 模型大小 | 迭代次数 | 测试集准确率  |
 | :------- | ------------ | ------------ | -------- | ------ | -------- |
@@ -105,7 +105,7 @@ cnocr **V2.0** 目前包含以下可直接使用的模型，训练好的模型
 首次使用cnocr时，系统会**自动下载** zip格式的模型压缩文件，并存于 `~/.cnocr`目录（Windows下默认路径为 `C:\Users\<username>\AppData\Roaming\cnocr`）。
 下载后的zip文件代码会自动对其解压，然后把解压后的模型相关目录放于`~/.cnocr/2.0`目录中。
 
-如果系统无法自动成功下载zip文件，则需要手动从 **[cnocr-models](https://github.com/breezedeus/cnocr-models)** 下载此zip文件并把它放于 `~/.cnocr/2.0`目录。如果Github下载太慢，也可以从 [百度云盘](https://pan.baidu.com/s/1c68zjHfTVeqiSMXBEPYMrg) 下载， 提取码为 ` 9768`。
+如果系统无法自动成功下载zip文件，则需要手动从 **[cnstd-cnocr-models](https://github.com/breezedeus/cnstd-cnocr-models)** 下载此zip文件并把它放于 `~/.cnocr/2.0`目录。如果Github下载太慢，也可以从 [百度云盘](https://pan.baidu.com/s/1c68zjHfTVeqiSMXBEPYMrg) 下载， 提取码为 ` 9768`。
 
 放置好zip文件后，后面的事代码就会自动执行了。
 
@@ -120,7 +120,6 @@ class CnOcr(object):
     def __init__(
         self,
         model_name: str = 'densenet-s-fc'
-        model_epoch: Optional[int] = None,
         *,
         cand_alphabet: Optional[Union[Collection, str]] = None,
         context: str = 'cpu',  # ['cpu', 'gpu', 'cuda']
@@ -134,8 +133,6 @@ class CnOcr(object):
 
 * `model_name`: 模型名称，即上面表格第一列中的值。默认为 `densenet-s-fc`。
 
-* `model_epoch`: 模型迭代次数。默认为 `None`，表示使用默认的迭代次数值。对于模型名称 `densenet-s-fc`就是 `39`。
-
 * `cand_alphabet`: 待识别字符所在的候选集合。默认为 `None`，表示不限定识别字符范围。取值可以是字符串，如 `"0123456789"`，或者字符列表，如 `["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]`。
 
    * `cand_alphabet`也可以初始化后通过类函数 `CnOcr.set_cand_alphabet(cand_alphabet)` 进行设置。这样同一个实例也可以指定不同的`cand_alphabet`进行识别。
@@ -266,7 +263,7 @@ Predicted Chars: (['笠', '淡', '嘿', '骅', '谧', '鼎', '皋', '姚', '歼'
 
 
 
-#### 3. 函数`CnOcr.ocr_for_single_lines(img_list)`
+#### 3. 函数`CnOcr.ocr_for_single_lines(img_list, batch_size=1)`
 
 函数`CnOcr.ocr_for_single_lines(img_list)`可以**对多个单行文字图片进行批量预测**。函数`CnOcr.ocr(img_fp)`和`CnOcr.ocr_for_single_line(img_fp)`内部其实都是调用的函数`CnOcr.ocr_for_single_lines(img_list)`。
 
@@ -275,6 +272,7 @@ Predicted Chars: (['笠', '淡', '嘿', '骅', '谧', '鼎', '皋', '姚', '歼'
 **函数说明**：
 
 - 输入参数` img_list`: 为一个`list`；其中每个元素可以是需要识别的图片文件路径（如下例）；或者是已经从图片文件中读入的数组，类型可以为 `torch.Tensor` 或  `np.ndarray`，取值应该是`[0，255]`的整数，维数应该是 `[height, width]` （灰度图片）或者 `[height, width, channel]`，`channel` 可以等于`1`（灰度图片）或者`3`（`RGB`格式的彩色图片）。
+- 输入参数 `batch_size`: 待处理图片很多时，需要分批处理，每批图片的数量由此参数指定。默认为 `1`。
 - 返回值：为一个嵌套的`list`，其中的每个元素存储了对一行文字的识别结果，其中也包含了识别概率值。类似这样`[(['第', '一', '行'], 0.80), (['第', '二', '行'], 0.75), (['第', '三', '行'], 0.9)]`，其中的数字为对应的识别概率值。
 
 

diff --git a/RELEASE.md b/RELEASE.md
@@ -1,5 +1,13 @@
 # Release Notes
 
+### Update 2021.09.21: 发布 cnocr V2.0.1
+
+主要变更：
+
+* 重新训练了模型，模型识别精度略有提升；
+* 函数 `CnOcr.ocr_for_single_lines(img_list, batch_size=1)` 中加入了 `batch_size` 参数。
+
+
 ### Update 2021.08.26: 发布 cnocr V2.0.0
 
 主要变更：

diff --git a/cnocr/__version__.py b/cnocr/__version__.py
@@ -17,4 +17,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-__version__ = '2.0.0'
+__version__ = '2.0.1'
diff --git a/cnocr/cli.py b/cnocr/cli.py
@@ -25,7 +25,7 @@
 import json
 import glob
 
-from torchvision import transforms
+from torchvision import transforms as T
 
 from cnocr.consts import MODEL_VERSION, ENCODER_CONFIGS, DECODER_CONFIGS
 from cnocr.utils import set_logger, load_model_params, check_model_name
@@ -90,14 +90,18 @@ def train(
     model_name, index_dir, train_config_fp, resume_from_checkpoint, pretrained_model_fp
 ):
     check_model_name(model_name)
-    train_transform = transforms.Compose(
+    train_transform = T.Compose(
         [
-            transforms.RandomInvert(p=0.5),
-            # transforms.RandomErasing(p=0.05, scale=(0.01, 0.05)),
-            transforms.RandomRotation(degrees=2),
-            transforms.RandomAutocontrast(p=0.05),
+            T.RandomInvert(p=0.5),
+            T.RandomRotation(degrees=2),
+            # T.RandomAutocontrast(p=0.05),
+            # T.RandomPosterize(bits=4, p=0.3),
+            # T.RandomAdjustSharpness(sharpness_factor=0.5, p=0.3),
+            # T.RandomEqualize(p=0.3),
+            # T.RandomApply([T.GaussianBlur(kernel_size=3)], p=0.5),
             NormalizeAug(),
-            RandomPaddingAug(p=0.5, max_pad_len=72),
+            # RandomPaddingAug(p=0.5, max_pad_len=72),
+
         ]
     )
     val_transform = NormalizeAug()

diff --git a/cnocr/cn_ocr.py b/cnocr/cn_ocr.py
@@ -63,7 +63,6 @@ class CnOcr(object):
     def __init__(
         self,
         model_name: str = 'densenet-s-fc',
-        model_epoch: Optional[int] = None,
         *,
         cand_alphabet: Optional[Union[Collection, str]] = None,
         context: str = 'cpu',  # ['cpu', 'gpu', 'cuda']
@@ -73,13 +72,12 @@ def __init__(
     ):
         """
 
-        :param model_name: 模型名称
-        :param model_epoch: 模型迭代次数。默认为 None，表示使用系统自带的模型对应的迭代次数
+        :param model_name: 模型名称。默认为 `densenet-s-fc`
         :param cand_alphabet: 待识别字符所在的候选集合。默认为 `None`，表示不限定识别字符范围
-        :param context: 'cpu', or 'gpu'。表明预测时是使用CPU还是GPU。默认为CPU。
+        :param context: 'cpu', or 'gpu'。表明预测时是使用CPU还是GPU。默认为 `cpu`
         :param model_fp: 如果不使用系统自带的模型，可以通过此参数直接指定所使用的模型文件（'.ckpt' 文件）
         :param root: 模型文件所在的根目录。
-            Linux/Mac下默认值为 `~/.cnocr`，表示模型文件所处文件夹类似 `~/.cnocr/1.2.0/densenet-lite-fc`。
+            Linux/Mac下默认值为 `~/.cnocr`，表示模型文件所处文件夹类似 `~/.cnocr/2.0/densenet-s-fc`。
             Windows下默认值为 `C:/Users/<username>/AppData/Roaming/cnocr`。
         """
         if 'name' in kwargs:
@@ -95,15 +93,12 @@ def __init__(
         self.context = context
 
         self._model_file_prefix = '{}-{}'.format(self.MODEL_FILE_PREFIX, model_name)
-        self._model_epoch = (
-            model_epoch
-            if model_epoch is not None
-            else AVAILABLE_MODELS.get(model_name, [None])[0]
-        )
-        if self._model_epoch is not None:
+        model_epoch = AVAILABLE_MODELS.get(model_name, [None])[0]
+
+        if model_epoch is not None:
             self._model_file_prefix = '%s-epoch=%03d' % (
                 self._model_file_prefix,
-                self._model_epoch,
+                model_epoch,
             )
 
         self._assert_and_prepare_model_files(model_fp, root)
@@ -192,7 +187,7 @@ def ocr(
         return line_chars_list
 
     def _prepare_img(
-            self, img_fp: Union[str, Path, torch.Tensor, np.ndarray]
+        self, img_fp: Union[str, Path, torch.Tensor, np.ndarray]
     ) -> np.ndarray:
         """
         :param img: image array with type torch.Tensor or np.ndarray,
@@ -217,8 +212,10 @@ def _prepare_img(
                 # color to gray
                 img = np.expand_dims(np.array(Image.fromarray(img).convert('L')), -1)
             elif img.shape[2] != 1:
-                raise ValueError('only images with shape [height, width, 1] (gray images), '
-                                 'or [height, width, 3] (RGB-formated color images) are supported')
+                raise ValueError(
+                    'only images with shape [height, width, 1] (gray images), '
+                    'or [height, width, 3] (RGB-formated color images) are supported'
+                )
 
         if img.dtype != np.dtype('uint8'):
             img = img.astype('uint8')
@@ -239,7 +236,9 @@ def ocr_for_single_line(
         return res[0]
 
     def ocr_for_single_lines(
-        self, img_list: List[Union[str, Path, torch.Tensor, np.ndarray]]
+        self,
+        img_list: List[Union[str, Path, torch.Tensor, np.ndarray]],
+        batch_size: int = 1,
     ) -> List[Tuple[List[str], float]]:
         """
         Batch recognize characters from a list of one-line-characters images.
@@ -248,6 +247,7 @@ def ocr_for_single_lines(
             Each element should be a tensor with values ranging from 0 to 255,
             and with shape [height, width] or [height, width, channel].
             The optional channel should be 1 (gray image) or 3 (color image).
+        :param batch_size: 待处理图片很多时，需要分批处理，每批图片的数量由此参数指定。默认为 `1`。
         :return: list of (list of chars, prob), such as
             [(['第', '一', '行'], 0.80), (['第', '二', '行'], 0.75), (['第', '三', '行'], 0.9)]
         """
@@ -256,19 +256,23 @@ def ocr_for_single_lines(
         img_list = [self._prepare_img(img) for img in img_list]
         img_list = [self._transform_img(img) for img in img_list]
 
-        out = self._predict(img_list)
+        idx = 0
+        out = []
+        while idx * batch_size < len(img_list):
+            imgs = img_list[idx * batch_size : (idx + 1) * batch_size]
+            batch_out = self._predict(imgs)
+            out.extend(batch_out['preds'])
+            idx += 1
 
         res = []
-        for line in out['preds']:
+        for line in out:
             chars, prob = line
             chars = [c if c != '<space>' else ' ' for c in chars]
             res.append((chars, prob))
 
         return res
 
-    def _transform_img(
-        self, img: np.ndarray
-    ) -> torch.Tensor:
+    def _transform_img(self, img: np.ndarray) -> torch.Tensor:
         """
         :param img: image array with type torch.Tensor or np.ndarray,
         with shape [height, width] or [height, width, channel].

diff --git a/cnocr/consts.py b/cnocr/consts.py
@@ -60,8 +60,8 @@
 )
 # name: (epochs, url)
 AVAILABLE_MODELS = {
-    'densenet-s-fc': (39, root_url + 'densenet-s-fc.zip'),
-    'densenet-s-gru': (11, root_url + 'densenet-s-gru.zip'),
+    'densenet-s-fc': (8, root_url + 'densenet-s-fc-v2.0.1.zip'),
+    'densenet-s-gru': (14, root_url + 'densenet-s-gru-v2.0.1.zip'),
 }
 
 # 候选字符集合

diff --git a/examples/train_config.json b/examples/train_config.json
@@ -1,6 +1,4 @@
 {
-    "version": "2.0.0",
-
     "vocab_fp": "label_cn.txt",
     "img_folder": "data/images",
 

diff --git a/examples/train_config_gpu.json b/examples/train_config_gpu.json
@@ -1,6 +1,4 @@
 {
-    "version": "1.3.0",
-
     "vocab_fp": "label_cn.txt",
     "img_folder": "data/images",