From 6299aa558cdf5e087524a3f5afa5e4d9a8eb7f89 Mon Sep 17 00:00:00 2001 From: Balearica Date: Thu, 26 Dec 2024 01:10:58 -0800 Subject: [PATCH] Fixed JSON export bugs and added new tests (#987) --- package-lock.json | 14 +++++++------- package.json | 2 +- tests/recognize.test.js | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/package-lock.json b/package-lock.json index 0e1104d5..790b0551 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,7 +16,7 @@ "node-fetch": "^2.6.9", "opencollective-postinstall": "^2.0.3", "regenerator-runtime": "^0.13.3", - "tesseract.js-core": "^6.0.0-2", + "tesseract.js-core": "^6.0.0-3", "wasm-feature-detect": "^1.2.11", "zlibjs": "^0.3.1" }, @@ -8752,9 +8752,9 @@ } }, "node_modules/tesseract.js-core": { - "version": "6.0.0-2", - "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-6.0.0-2.tgz", - "integrity": "sha512-gYYc+UtQdum9BzYKid4bCHIN3cxxxmtipHFKdA1Tvyrh698wexYy32MXeppvUQkij5N5AvVOLA54mbx0AvXsnQ==", + "version": "6.0.0-3", + "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-6.0.0-3.tgz", + "integrity": "sha512-sHmDE1XnAI8sWpiKisodss5KKN57+9/ABZdp1eEsSBNMmHPGI2HahiY3EZx6LETxahAr3WF3erVF0WPebkk17g==", "license": "Apache-2.0" }, "node_modules/test-exclude": { @@ -16185,9 +16185,9 @@ } }, "tesseract.js-core": { - "version": "6.0.0-2", - "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-6.0.0-2.tgz", - "integrity": "sha512-gYYc+UtQdum9BzYKid4bCHIN3cxxxmtipHFKdA1Tvyrh698wexYy32MXeppvUQkij5N5AvVOLA54mbx0AvXsnQ==" + "version": "6.0.0-3", + "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-6.0.0-3.tgz", + "integrity": "sha512-sHmDE1XnAI8sWpiKisodss5KKN57+9/ABZdp1eEsSBNMmHPGI2HahiY3EZx6LETxahAr3WF3erVF0WPebkk17g==" }, "test-exclude": { "version": "6.0.0", diff --git a/package.json b/package.json index 1012fdae..5f694c54 100644 --- a/package.json +++ b/package.json @@ -68,7 +68,7 @@ "node-fetch": "^2.6.9", "opencollective-postinstall": "^2.0.3", "regenerator-runtime": "^0.13.3", - "tesseract.js-core": "^6.0.0-2", + "tesseract.js-core": "^6.0.0-3", "wasm-feature-detect": "^1.2.11", "zlibjs": "^0.3.1" }, diff --git a/tests/recognize.test.js b/tests/recognize.test.js index 2a333755..44a66adc 100644 --- a/tests/recognize.test.js +++ b/tests/recognize.test.js @@ -286,6 +286,24 @@ describe('recognize()', () => { expect(blocks[0].paragraphs[0].lines[1].text).to.be('Back \\ Slash\n'); }).timeout(TIMEOUT); + it('recongize image with multiple choices', async () => { + await workerLegacy.reinitialize('eng'); + const { data: { blocks } } = await workerLegacy.recognize(`${IMAGE_PATH}/bill.png`, {}, { blocks: true }); + expect(blocks[0].paragraphs[1].lines[0].words[3].choices.length).to.be(3); + expect(blocks[0].paragraphs[1].lines[0].words[3].choices[1].text).to.be('100,000.0ll'); + }).timeout(TIMEOUT); + + it('recongize image with multiple blocks', async () => { + // This also implicitly checks that non-text blocks are ignored, + // as otherwise the length would be 5. + await worker.reinitialize('eng'); + await worker.setParameters({ + tessedit_pageseg_mode: PSM.AUTO, + }); + const { data: { blocks } } = await worker.recognize(`${IMAGE_PATH}/bill.png`, {}, { blocks: true }); + expect(blocks.length).to.be(4); + }).timeout(TIMEOUT); + it('recongize chinese image', async () => { await worker.reinitialize('chi_tra'); const { data: { blocks } } = await worker.recognize(`${IMAGE_PATH}/chinese.png`, {}, { blocks: true });