diff --git a/recognizer/crnn/lib/create_lmdb_dataset.py b/recognizer/crnn/lib/create_lmdb_dataset.py index 79dde34..f532126 100644 --- a/recognizer/crnn/lib/create_lmdb_dataset.py +++ b/recognizer/crnn/lib/create_lmdb_dataset.py @@ -1,4 +1,3 @@ - import lmdb import cv2 import numpy as np @@ -26,7 +25,9 @@ def checkImageIsValid(imageBin): def writeCache(env, cache): with env.begin(write=True) as txn: - for k, v in cache.items(): + for _k, _v in cache.items(): + k = _k.encode() if type(_k) == str else _k + v = _v.encode() if type(_v) == str else _v txn.put(k, v) @@ -54,7 +55,7 @@ def createDataset(outputPath, imagePathList, labelList, lexiconList=None, checkV # print('%s does not exist' % imagePath) # continue - with open(imagePath, 'r') as f: + with open(imagePath, 'rb') as f: imageBin = f.read() if checkValid: diff --git a/recognizer/crnn/lib/dataset.py b/recognizer/crnn/lib/dataset.py index 436a918..fc8d63a 100644 --- a/recognizer/crnn/lib/dataset.py +++ b/recognizer/crnn/lib/dataset.py @@ -60,7 +60,7 @@ def __getitem__(self, index): img = self.transform(img) label_key = 'label-%09d' % index - label = txn.get(label_key.encode()) + label = txn.get(label_key.encode()).decode() if self.target_transform is not None: label = self.target_transform(label) diff --git a/recognizer/crnn/lib/get_alphabets.py b/recognizer/crnn/lib/get_alphabets.py new file mode 100644 index 0000000..f17fb70 --- /dev/null +++ b/recognizer/crnn/lib/get_alphabets.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +__title__ = '' +__author__ = 'changxin' +__mtime__ = '2019/3/14' +""" + + +def get_alphabets(filelists): + a = [] + for x in filelists: + with open(x) as f: + strings = f.readlines() + string = [y.strip().split(' ')[1] for y in strings] + tmp = [] + for z in ''.join(string): + tmp.append(z) + a = a + tmp + with open('alphabets.py', 'w') as e: + e.write("#coding=utf8\n") + e.write('\"\"\"' + ''.join(list(set(a))) + '\"\"\"')