Skip to content

Commit

Permalink
Reduce memory usage when loading or searching PDF pages
Browse files Browse the repository at this point in the history
  • Loading branch information
mrtcode committed Dec 3, 2024
1 parent b5b4d06 commit 121de4c
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 59 deletions.
2 changes: 1 addition & 1 deletion pdfjs/pdf.js
Submodule pdf.js updated 1 files
+34 −38 src/core/module/module.js
8 changes: 6 additions & 2 deletions src/pdf/page.js
Original file line number Diff line number Diff line change
Expand Up @@ -461,11 +461,15 @@ export default class Page {
if (!this.layer._findController
|| !this.layer._findController.highlightMatches
|| !this.layer._findController._matchesCountTotal
|| !this.layer._pdfPages[this.pageIndex]
) {
return;
}
let { _pageMatchesPosition, selected } = this.layer._findController;
let positions = _pageMatchesPosition[this.pageIndex];
let { selected } = this.layer._findController;
let positions = this.layer._findController.getMatchPositions(
this.pageIndex,
this.layer._pdfPages[this.pageIndex]
);

if (!positions || !positions.length) {
return;
Expand Down
111 changes: 60 additions & 51 deletions src/pdf/pdf-find-controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -698,15 +698,62 @@ class PDFFindController {
});
}

getMatchPositions(pageIndex, pageData) {
let positions = [];
let pageMatches = this._pageMatches[pageIndex];
let pageMatchesLength = this._pageMatchesLength[pageIndex];
if (!pageMatches || !pageMatches.length) {
return [];
}
let chars = pageData.chars;
for (let j = 0; j < pageMatches.length; j++) {
let matchPos = pageMatches[j];
let matchLen = pageMatchesLength[j];
let start = null;
let end = null;
let total = 0;
for (let i = 0; i < chars.length; i++) {
let char = chars[i];
total++;
// For an unknown reason char.u can sometimes have decomposed ligatures instead of
// single ligature character
total += char.u.length - 1;
if (char.spaceAfter || char.lineBreakAfter || char.paragraphBreakAfter) {
total++;
}
if (total >= matchPos && start === null) {
start = i;
if (i !== 0) {
start++;
}
}
if (total >= matchPos + matchLen) {
end = i;
break;
}
}
let rects = getRangeRects(chars, start, end);
let position = { pageIndex, rects };
positions.push(position);
}
return positions;
}

async getMatchPositionsAsync(pageIndex) {
let pageMatches = this._pageMatches[pageIndex];
if (!pageMatches || !pageMatches.length) {
return [];
}
let pageData = await this._pdfDocument.getPageData({ pageIndex });
return this.getMatchPositions(pageIndex, pageData);
}

_reset() {
this._highlightMatches = false;
this._scrollMatches = false;
this._pdfDocument = null;
this._pageMatches = [];
this._pageMatchesLength = [];
this._pageMatchesPosition = [];
this._pageChars = [];
this._pageText = [];
this._visitedPagesCount = 0;
this._state = null;
// Currently selected match.
Expand Down Expand Up @@ -827,7 +874,6 @@ class PDFFindController {
_calculateRegExpMatch(query, entireWord, pageIndex, pageContent) {
const matches = (this._pageMatches[pageIndex] = []);
const matchesLength = (this._pageMatchesLength[pageIndex] = []);
const matchesPosition = (this._pageMatchesPosition[pageIndex] = []);
if (!query) {
// The query can be empty because some chars like diacritics could have
// been stripped out.
Expand All @@ -850,35 +896,8 @@ class PDFFindController {
);

if (matchLen) {
let chars = this._pageChars[pageIndex];
let start = null;
let end = null;
let total = 0;
for (let i = 0; i < chars.length; i++) {
let char = chars[i];
total++;
// For unknown reason char.u can sometimes have decomposed ligatures instead of
// single ligature character
total += char.u.length - 1;
if (char.spaceAfter || char.lineBreakAfter || char.paragraphBreakAfter) {
total++;
}
if (total >= matchPos && start === null) {
start = i;
if (i !== 0) {
start++;
}
}
if (total >= matchPos + matchLen) {
end = i;
break;
}
}
let rects = getRangeRects(chars, start, end);
let position = { pageIndex, rects };
matches.push(start);
matchesLength.push(end - start);
matchesPosition.push(position);
matches.push(matchPos);
matchesLength.push(matchLen);
}
}
}
Expand Down Expand Up @@ -953,7 +972,7 @@ class PDFFindController {
return [isUnicode, query];
}

_calculateMatch(pageIndex) {
async _calculateMatch(pageIndex) {
let query = this._query;
if (query.length === 0) {
return; // Do nothing: the matches should be wiped out already.
Expand Down Expand Up @@ -1016,33 +1035,23 @@ class PDFFindController {
for (let i = 0; i < resolvers.length; i++) {
let resolve = resolvers[i];

let text = '';
let chars = [];
let text = [];

try {
await new Promise(resolve => setTimeout(resolve));
let pageData = await this._pdfDocument.getPageData({ pageIndex: i });

function getTextFromChars(chars) {
let text = [];
for (let char of chars) {
text.push(char.u)
if (char.spaceAfter || char.lineBreakAfter || char.paragraphBreakAfter) {
text.push(' ');
}
for (let char of pageData.chars) {
text.push(char.u);
if (char.spaceAfter || char.lineBreakAfter || char.paragraphBreakAfter) {
text.push(' ');
}
return text.join('').trim();
}

chars = pageData.chars;
text = getTextFromChars(pageData.chars);
}
catch (e) {
console.log(e);
}

this._pageChars[i] = chars;
this._pageText[i] = text;
text = text.join('').trim();

[
this._pageContents[i],
Expand Down Expand Up @@ -1197,7 +1206,7 @@ class PDFFindController {

this._updateUIState(state, this._state.findPrevious);
if (this._selected.pageIdx !== -1) {
this._onNavigate(this._pageMatchesPosition[this._selected.pageIdx][this._selected.matchIdx]);
this._onNavigate(this._selected.pageIdx, this._selected.matchIdx);
}
}

Expand Down
15 changes: 10 additions & 5 deletions src/pdf/pdf-view.js
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,9 @@ class PDFView {

this._findController = new PDFFindController({
linkService: this._iframeWindow.PDFViewerApplication.pdfViewer.linkService,
onNavigate: (position) => {
this.navigateToPosition(position);
onNavigate: async (pageIndex, matchIndex) => {
let matchPositions = await this._findController.getMatchPositionsAsync(pageIndex);
this.navigateToPosition(matchPositions[matchIndex]);
},
onUpdateMatches: ({ matchesCount }) => {
let result = { total: matchesCount.total, index: matchesCount.current - 1 };
Expand Down Expand Up @@ -455,7 +456,7 @@ class PDFView {
this._init2 && this._init2();

if (this._preview) {
this._detachPage(originalPage);
this._detachPage(originalPage, true);
let page = new Page(this, originalPage);
this._pages.push(page);
this._render();
Expand All @@ -466,7 +467,7 @@ class PDFView {
this._initThumbnails();
}

this._detachPage(originalPage);
this._detachPage(originalPage, true);

originalPage.textLayerPromise.then(() => {
// Text layer may no longer exist if it was detached in the meantime
Expand All @@ -492,8 +493,12 @@ class PDFView {
}
}

_detachPage(originalPage) {
_detachPage(originalPage, replacing) {
let pageIndex = originalPage.id - 1;
this._pages = this._pages.filter(x => x.originalPage !== originalPage);
if (!replacing) {
delete this._pdfPages[pageIndex];
}
}

_getPageLabel(pageIndex, usePrevAnnotation) {
Expand Down

0 comments on commit 121de4c

Please sign in to comment.