forked from PaddlePaddle/PaddleOCR
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Performance exploration with onnx (#2)
* adds onnx model * adds onnx model and utilities * rollback
- Loading branch information
1 parent
67ab18a
commit 65d4e39
Showing
8 changed files
with
145 additions
and
18 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import os | ||
from pdf2image import convert_from_bytes | ||
from PIL.Image import Image | ||
|
||
|
||
def load_pdf_bytes() -> bytes: | ||
dir_path = os.path.dirname(__file__) | ||
path = os.path.join(dir_path, "resources", "sample_pdf.pdf") | ||
with open(path, "rb") as fh: | ||
return fh.read() | ||
|
||
|
||
def pdf_to_image(pdf_bytes: bytes, dpi: int) -> Image: | ||
images = convert_from_bytes(pdf_bytes, dpi=dpi, grayscale=False) | ||
return images[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from paddleocr import PaddleOCR | ||
|
||
import os | ||
import numpy as np | ||
import timeit | ||
import click | ||
|
||
from profile.commons import load_pdf_bytes, pdf_to_image | ||
import onnxruntime as ort | ||
|
||
|
||
def _init_model(base_dir_path: str) -> PaddleOCR: | ||
sess_options = ort.SessionOptions() | ||
sess_options.intra_op_num_threads = 8 | ||
sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL | ||
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL | ||
|
||
return PaddleOCR( | ||
use_onnx=True, | ||
det_model_dir=os.path.join(base_dir_path, "det.onnx"), | ||
rec_model_dir=os.path.join(base_dir_path, "rec.onnx"), | ||
cls_model_dir=os.path.join(base_dir_path, "cls.onnx"), | ||
ocr_version="PP-OCRv4", | ||
rec_batch_num=6, | ||
onnx_providers=["CPUExecutionProvider"], | ||
onnx_sess_options=sess_options, | ||
) | ||
|
||
|
||
@click.command() | ||
@click.option("--paddle-dir-path", default="/home/play/models/paddle2onnx/") | ||
def main( | ||
paddle_dir_path: str, | ||
): | ||
pdf_bytes = load_pdf_bytes() | ||
image = pdf_to_image(pdf_bytes, dpi=200) | ||
|
||
model = _init_model(paddle_dir_path) | ||
|
||
times = timeit.Timer( | ||
lambda: model.ocr( | ||
np.asarray(image), | ||
det=True, | ||
rec=True, | ||
cls=False, | ||
) | ||
).repeat(repeat=4, number=1) | ||
|
||
print(f"Mean time: {np.mean(times[1:])}") | ||
print(f"Std time: {np.std(times[1:])}") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#!/bin/bash | ||
|
||
DET_PATH=~/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/ | ||
REC_PATH=~/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer/ | ||
CLS_PATH=~/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer/ | ||
SAVE_DIR_PATH=~/models/paddle2onnx | ||
|
||
paddle2onnx --model_dir $DET_PATH \ | ||
--model_filename inference.pdmodel \ | ||
--params_filename inference.pdiparams \ | ||
--save_file $SAVE_DIR_PATH/det.onnx \ | ||
--opset_version 14 \ | ||
--enable_onnx_checker True \ | ||
--custom_ops '{"paddle_op":"onnx_op"}' | ||
|
||
paddle2onnx --model_dir $REC_PATH \ | ||
--model_filename inference.pdmodel \ | ||
--params_filename inference.pdiparams \ | ||
--save_file $SAVE_DIR_PATH/rec.onnx \ | ||
--opset_version 14 \ | ||
--enable_onnx_checker True \ | ||
--custom_ops '{"paddle_op":"onnx_op"}' | ||
|
||
paddle2onnx --model_dir $CLS_PATH \ | ||
--model_filename ch_ppocr_mobile_v2.0_cls_infer/inference.pdmodel \ | ||
--params_filename ch_ppocr_mobile_v2.0_cls_infer/inference.pdiparams \ | ||
--save_file @SAVE_DIR_PATH/cls.onnx \ | ||
--opset_version 14 \ | ||
--enable_onnx_checker True \ | ||
--custom_ops '{"paddle_op":"onnx_op"}' |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from onnxruntime.quantization import ( | ||
quantize_dynamic, | ||
QuantType, | ||
quant_pre_process, | ||
) | ||
import os | ||
import click | ||
|
||
|
||
# TODO: produces error: Incomplete symbolic shape inference from | ||
# onnxruntime.tools.symbolic_shape_infer line 2932. | ||
def run(dir_path: str, filename: str): | ||
base_name = filename.split(".")[0] | ||
model_path = os.path.join(dir_path, filename) | ||
infer_model_path = os.path.join(dir_path, f"{base_name}_infer.onnx") | ||
quant_model_path = os.path.join(dir_path, f"{base_name}_quant.onnx") | ||
quant_pre_process( | ||
input_model=model_path, | ||
output_model_path=infer_model_path, | ||
) | ||
quantize_dynamic( | ||
model_input=infer_model_path, | ||
model_output=quant_model_path, | ||
weight_type=QuantType.QInt8, | ||
) | ||
|
||
|
||
@click.command() | ||
@click.option("--dir_path", type=str, required=True) | ||
@click.option("--det_filename", type=str, required=True, default="det.onnx") | ||
@click.option("--rec_filename", type=str, required=True, default="rec.onnx") | ||
def main(dir_path: str, det_filename: str, rec_filename: str) -> None: | ||
for filename in [det_filename, rec_filename]: | ||
run(dir_path=dir_path, filename=filename) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters