diff --git a/src/createWorker.js b/src/createWorker.js index f94d5186..1ff0f9fb 100644 --- a/src/createWorker.js +++ b/src/createWorker.js @@ -1,5 +1,4 @@ const resolvePaths = require('./utils/resolvePaths'); -const circularize = require('./utils/circularize'); const createJob = require('./createJob'); const { log } = require('./utils/log'); const getId = require('./utils/getId'); @@ -204,11 +203,7 @@ module.exports = async (langs = 'eng', oem = OEM.LSTM_ONLY, _options = {}, confi const promiseId = `${action}-${jobId}`; if (status === 'resolve') { log(`[${workerId}]: Complete ${jobId}`); - let d = data; - if (action === 'recognize') { - d = circularize(data); - } - promises[promiseId].resolve({ jobId, data: d }); + promises[promiseId].resolve({ jobId, data }); delete promises[promiseId]; } else if (status === 'reject') { promises[promiseId].reject(data); diff --git a/src/index.d.ts b/src/index.d.ts index a1e87c88..1f5a9c80 100644 --- a/src/index.d.ts +++ b/src/index.d.ts @@ -152,14 +152,9 @@ declare namespace Tesseract { paragraphs: Paragraph[]; text: string; confidence: number; - baseline: Baseline; bbox: Bbox; blocktype: string; - polygon: any; page: Page; - lines: Line[]; - words: Word[]; - symbols: Symbol[]; } interface Baseline { x0: number; @@ -186,38 +181,21 @@ declare namespace Tesseract { baseline: Baseline; rowAttributes: RowAttributes bbox: Bbox; - paragraph: Paragraph; - block: Block; - page: Page; - symbols: Symbol[]; } interface Paragraph { lines: Line[]; text: string; confidence: number; - baseline: Baseline; bbox: Bbox; is_ltr: boolean; - block: Block; - page: Page; - words: Word[]; - symbols: Symbol[]; } interface Symbol { - choices: Choice[]; - image: any; text: string; confidence: number; - baseline: Baseline; bbox: Bbox; is_superscript: boolean; is_subscript: boolean; is_dropcap: boolean; - word: Word; - line: Line; - paragraph: Paragraph; - block: Block; - page: Page; } interface Choice { text: string; @@ -228,38 +206,17 @@ declare namespace Tesseract { choices: Choice[]; text: string; confidence: number; - baseline: Baseline; bbox: Bbox; - is_numeric: boolean; - in_dictionary: boolean; - direction: string; - language: string; - is_bold: boolean; - is_italic: boolean; - is_underlined: boolean; - is_monospace: boolean; - is_serif: boolean; - is_smallcaps: boolean; - font_size: number; - font_id: number; font_name: string; - line: Line; - paragraph: Paragraph; - block: Block; - page: Page; } interface Page { blocks: Block[] | null; confidence: number; - lines: Line[]; oem: string; osd: string; - paragraphs: Paragraph[]; psm: string; - symbols: Symbol[]; text: string; version: string; - words: Word[]; hocr: string | null; tsv: string | null; box: string | null; diff --git a/src/utils/circularize.js b/src/utils/circularize.js deleted file mode 100644 index 89a4f0e8..00000000 --- a/src/utils/circularize.js +++ /dev/null @@ -1,56 +0,0 @@ -/** - * In the recognition result of tesseract, there - * is a deep JSON object for details, it has around - * - * The result of dump.js is a big JSON tree - * which can be easily serialized (for instance - * to be sent from a webworker to the main app - * or through Node's IPC), but we want - * a (circular) DOM-like interface for walking - * through the data. - * - * @fileoverview DOM-like interface for walking through data - * @author Kevin Kwok - * @author Guillermo Webster - * @author Jerome Wu - */ - -module.exports = (page) => { - const blocks = []; - const paragraphs = []; - const lines = []; - const words = []; - const symbols = []; - - if (page.blocks) { - page.blocks.forEach((block) => { - block.paragraphs.forEach((paragraph) => { - paragraph.lines.forEach((line) => { - line.words.forEach((word) => { - word.symbols.forEach((sym) => { - symbols.push({ - ...sym, page, block, paragraph, line, word, - }); - }); - words.push({ - ...word, page, block, paragraph, line, - }); - }); - lines.push({ - ...line, page, block, paragraph, - }); - }); - paragraphs.push({ - ...paragraph, page, block, - }); - }); - blocks.push({ - ...block, page, - }); - }); - } - - return { - ...page, blocks, paragraphs, lines, words, symbols, - }; -};