From ee87438c9395355b51946c225d01713dc32121ab Mon Sep 17 00:00:00 2001 From: fhasibi Date: Tue, 9 May 2023 14:36:02 +0330 Subject: [PATCH] fix #10 add train class --- model/easyOCR.py | 128 ++++++++++++++++++++--------------------------- 1 file changed, 55 insertions(+), 73 deletions(-) diff --git a/model/easyOCR.py b/model/easyOCR.py index 96f5680..985a437 100644 --- a/model/easyOCR.py +++ b/model/easyOCR.py @@ -2,82 +2,64 @@ from easyocr import Reader from sklearn.model_selection import train_test_split import json -from PIL import Image, ImageDraw , ImageFont - - - -# Direction of images and nnotation -image_dir = "./business_card_dataset/images" -annotation_path ="./business_card_dataset/annotations/instances_default.json" - -# Open and read the annotaion -with open(annotation_path , 'r' , encoding='utf-8') as f: - annotations = json.load(f) - -# Set languages that gonna use in the project -languages = ['en' , 'ja'] - -# Split dataset into training and testing sets -image_files = os.listdir(image_dir) -train_files, test_files = train_test_split(image_files, test_size=0.1, random_state=42) - -# Create easyOCR reader classs -reader = Reader(languages) - -# Loop thtough each image file -for img in train_files: - - # Check the extension(.PNG) - if img.endswith('.png'): - - # Get the full path to the image and annotation file - image_path = os.path.join(image_dir , img) - - image = os.path.basename(image_path) - - # Extract text and bounding box from annotation +from PIL import Image + +class easyOCR: + def __init__(self , languages = ['en' , 'ja'] , image_dir = "./business_card_dataset/images" , annotation_path ="./business_card_dataset/annotations/instances_default.json"): + self.languages = languages + self.image_dir = image_dir + self.annotation_path = annotation_path + self.reader = Reader(languages) + + # Open and read the annotaion + def get_annotations_and_image_file(self): + with open(self.annotation_path , 'r' , encoding='utf-8') as f: + annotations = json.load(f) + image_files = os.listdir(self.image_dir) + return annotations , image_files + + def extract_text_box(self , image_path , annotations): texts = [] boxes = [] for annotation in annotations["annotations"]: - if annotation.get('filename') == image and annotation["language"] in languages: + if annotation.get('filename') == os.path.basename(image_path) and annotation["language"] in self.languages: boxes.append(annotation["bbox"]) texts.append(annotation["text"]) - - # Create a dictionary with the bounding boxes - detail = {'box': boxes} - - # Train the model using image and annotation - results = reader.readtext(image_path , detail=detail) + return texts , boxes - - - -# Test the model using the testing set -for img in test_files: - if img.endswith('.png'): - image_path = os.path.join(image_dir , img) - - image = os.path.basename(image_path) - texts = [] - boxes = [] - for annotation in annotations["annotations"]: - if annotation.get('filename') == image and annotation["language"] in languages: - boxes.append(annotation["bbox"]) - texts.append(annotation["text"]) - detail = {'box': boxes} - results = reader.readtext(image_path , detail=detail) - for res in results: - text = res[1] - bbox = res[0] - conf = res[2] - print(f"System output : {text} - Confidence: {conf}") - - # Crop text region - if all(isinstance(coord, int) for coord in bbox): - bbox = tuple(int(coord) for coord in bbox) - try: - region = Image.open(image_path).crop(bbox) - region.save(f"{text}.png") - except ValueError: - pass - + def train(self): + annotations , image_files = self.get_annotations_and_image_file() + train_files , test_files = train_test_split(image_files, test_size=0.1 , random_state=42) + for img in train_files: + if img.endswith('.png'): + image_path = os.path.join(self.image_dir , img) + texts, boxes = self.extract_text_box(image_path, annotations) + detail = {'box': boxes} + results = self.reader.readtext(image_path , detail=detail) + + def test(self): + annotations , image_files = self.get_annotations_and_image_file() + train_files , test_files = train_test_split(image_files, test_size=0.1 , random_state=42) + for img in test_files: + if img.endswith('.png'): + image_path = os.path.join(self.image_dir , img) + texts, boxes = self.extract_text_box(image_path, annotations) + detail = {'box': boxes} + results = self.reader.readtext(image_path , detail=detail) + for res in results: + text = res[1] + bbox = res[0] + conf = res[2] + print(f"System output : {text} - Confidence: {conf}") + if all(isinstance(coord, int) for coord in bbox): + bbox = tuple(int(coord) for coord in bbox) + try: + + region = Image.open(image_path).crop(bbox) + region.save(f"{text}.png") + except ValueError: + pass + +model = easyOCR() +model.train() +model.test()