From 121de4c58ff10d97f729cb9cf50b0e6b3134447f Mon Sep 17 00:00:00 2001 From: Martynas Bagdonas Date: Tue, 3 Dec 2024 19:37:36 +0000 Subject: [PATCH] Reduce memory usage when loading or searching PDF pages --- pdfjs/pdf.js | 2 +- src/pdf/page.js | 8 ++- src/pdf/pdf-find-controller.js | 111 ++++++++++++++++++--------------- src/pdf/pdf-view.js | 15 +++-- 4 files changed, 77 insertions(+), 59 deletions(-) diff --git a/pdfjs/pdf.js b/pdfjs/pdf.js index 8ac2ccc5..d57e986d 160000 --- a/pdfjs/pdf.js +++ b/pdfjs/pdf.js @@ -1 +1 @@ -Subproject commit 8ac2ccc54dab2049cd92096c3448d6fdfab436ac +Subproject commit d57e986da34c3151b834c30535b4d89c6657e626 diff --git a/src/pdf/page.js b/src/pdf/page.js index 077112e3..04278c72 100644 --- a/src/pdf/page.js +++ b/src/pdf/page.js @@ -461,11 +461,15 @@ export default class Page { if (!this.layer._findController || !this.layer._findController.highlightMatches || !this.layer._findController._matchesCountTotal + || !this.layer._pdfPages[this.pageIndex] ) { return; } - let { _pageMatchesPosition, selected } = this.layer._findController; - let positions = _pageMatchesPosition[this.pageIndex]; + let { selected } = this.layer._findController; + let positions = this.layer._findController.getMatchPositions( + this.pageIndex, + this.layer._pdfPages[this.pageIndex] + ); if (!positions || !positions.length) { return; diff --git a/src/pdf/pdf-find-controller.js b/src/pdf/pdf-find-controller.js index 57228952..371b0a19 100644 --- a/src/pdf/pdf-find-controller.js +++ b/src/pdf/pdf-find-controller.js @@ -698,15 +698,62 @@ class PDFFindController { }); } + getMatchPositions(pageIndex, pageData) { + let positions = []; + let pageMatches = this._pageMatches[pageIndex]; + let pageMatchesLength = this._pageMatchesLength[pageIndex]; + if (!pageMatches || !pageMatches.length) { + return []; + } + let chars = pageData.chars; + for (let j = 0; j < pageMatches.length; j++) { + let matchPos = pageMatches[j]; + let matchLen = pageMatchesLength[j]; + let start = null; + let end = null; + let total = 0; + for (let i = 0; i < chars.length; i++) { + let char = chars[i]; + total++; + // For an unknown reason char.u can sometimes have decomposed ligatures instead of + // single ligature character + total += char.u.length - 1; + if (char.spaceAfter || char.lineBreakAfter || char.paragraphBreakAfter) { + total++; + } + if (total >= matchPos && start === null) { + start = i; + if (i !== 0) { + start++; + } + } + if (total >= matchPos + matchLen) { + end = i; + break; + } + } + let rects = getRangeRects(chars, start, end); + let position = { pageIndex, rects }; + positions.push(position); + } + return positions; + } + + async getMatchPositionsAsync(pageIndex) { + let pageMatches = this._pageMatches[pageIndex]; + if (!pageMatches || !pageMatches.length) { + return []; + } + let pageData = await this._pdfDocument.getPageData({ pageIndex }); + return this.getMatchPositions(pageIndex, pageData); + } + _reset() { this._highlightMatches = false; this._scrollMatches = false; this._pdfDocument = null; this._pageMatches = []; this._pageMatchesLength = []; - this._pageMatchesPosition = []; - this._pageChars = []; - this._pageText = []; this._visitedPagesCount = 0; this._state = null; // Currently selected match. @@ -827,7 +874,6 @@ class PDFFindController { _calculateRegExpMatch(query, entireWord, pageIndex, pageContent) { const matches = (this._pageMatches[pageIndex] = []); const matchesLength = (this._pageMatchesLength[pageIndex] = []); - const matchesPosition = (this._pageMatchesPosition[pageIndex] = []); if (!query) { // The query can be empty because some chars like diacritics could have // been stripped out. @@ -850,35 +896,8 @@ class PDFFindController { ); if (matchLen) { - let chars = this._pageChars[pageIndex]; - let start = null; - let end = null; - let total = 0; - for (let i = 0; i < chars.length; i++) { - let char = chars[i]; - total++; - // For unknown reason char.u can sometimes have decomposed ligatures instead of - // single ligature character - total += char.u.length - 1; - if (char.spaceAfter || char.lineBreakAfter || char.paragraphBreakAfter) { - total++; - } - if (total >= matchPos && start === null) { - start = i; - if (i !== 0) { - start++; - } - } - if (total >= matchPos + matchLen) { - end = i; - break; - } - } - let rects = getRangeRects(chars, start, end); - let position = { pageIndex, rects }; - matches.push(start); - matchesLength.push(end - start); - matchesPosition.push(position); + matches.push(matchPos); + matchesLength.push(matchLen); } } } @@ -953,7 +972,7 @@ class PDFFindController { return [isUnicode, query]; } - _calculateMatch(pageIndex) { + async _calculateMatch(pageIndex) { let query = this._query; if (query.length === 0) { return; // Do nothing: the matches should be wiped out already. @@ -1016,33 +1035,23 @@ class PDFFindController { for (let i = 0; i < resolvers.length; i++) { let resolve = resolvers[i]; - let text = ''; - let chars = []; + let text = []; try { await new Promise(resolve => setTimeout(resolve)); let pageData = await this._pdfDocument.getPageData({ pageIndex: i }); - - function getTextFromChars(chars) { - let text = []; - for (let char of chars) { - text.push(char.u) - if (char.spaceAfter || char.lineBreakAfter || char.paragraphBreakAfter) { - text.push(' '); - } + for (let char of pageData.chars) { + text.push(char.u); + if (char.spaceAfter || char.lineBreakAfter || char.paragraphBreakAfter) { + text.push(' '); } - return text.join('').trim(); } - - chars = pageData.chars; - text = getTextFromChars(pageData.chars); } catch (e) { console.log(e); } - this._pageChars[i] = chars; - this._pageText[i] = text; + text = text.join('').trim(); [ this._pageContents[i], @@ -1197,7 +1206,7 @@ class PDFFindController { this._updateUIState(state, this._state.findPrevious); if (this._selected.pageIdx !== -1) { - this._onNavigate(this._pageMatchesPosition[this._selected.pageIdx][this._selected.matchIdx]); + this._onNavigate(this._selected.pageIdx, this._selected.matchIdx); } } diff --git a/src/pdf/pdf-view.js b/src/pdf/pdf-view.js index 2d08b822..4d9e75e4 100644 --- a/src/pdf/pdf-view.js +++ b/src/pdf/pdf-view.js @@ -278,8 +278,9 @@ class PDFView { this._findController = new PDFFindController({ linkService: this._iframeWindow.PDFViewerApplication.pdfViewer.linkService, - onNavigate: (position) => { - this.navigateToPosition(position); + onNavigate: async (pageIndex, matchIndex) => { + let matchPositions = await this._findController.getMatchPositionsAsync(pageIndex); + this.navigateToPosition(matchPositions[matchIndex]); }, onUpdateMatches: ({ matchesCount }) => { let result = { total: matchesCount.total, index: matchesCount.current - 1 }; @@ -455,7 +456,7 @@ class PDFView { this._init2 && this._init2(); if (this._preview) { - this._detachPage(originalPage); + this._detachPage(originalPage, true); let page = new Page(this, originalPage); this._pages.push(page); this._render(); @@ -466,7 +467,7 @@ class PDFView { this._initThumbnails(); } - this._detachPage(originalPage); + this._detachPage(originalPage, true); originalPage.textLayerPromise.then(() => { // Text layer may no longer exist if it was detached in the meantime @@ -492,8 +493,12 @@ class PDFView { } } - _detachPage(originalPage) { + _detachPage(originalPage, replacing) { + let pageIndex = originalPage.id - 1; this._pages = this._pages.filter(x => x.originalPage !== originalPage); + if (!replacing) { + delete this._pdfPages[pageIndex]; + } } _getPageLabel(pageIndex, usePrevAnnotation) {