Skip to content

Commit

Permalink
feat: canvasless screenshot (#390)
Browse files Browse the repository at this point in the history
* chore: re-enable screenshotting code

- does not flag out PDF findings yet

* chore: got pdf screenshot working with canvas

* chore: use @napi-rs/canvas instead of node-canvas

- include canvasFactory in renderContext
- do not set width and height to zero when destroying canvas
- pass 'image/png' to canvas.toBuffer()

* fix: type errors during build

* chore: add typesafety to pdfScanFunc

* chore: add even more typesafety

* Fix verapdf to support current version at https://github.com/GovTechSG/purple-a11y/releases/download/cache/verapdf-installer.zip

* fix: types ViewportSize

* fix: add more type definitions

---------

Co-authored-by: younglim <younglim@users.noreply.github.com>
shioju and younglim authored Aug 5, 2024
1 parent 007333e commit ece0e47
Showing 8 changed files with 1,088 additions and 367 deletions.
631 changes: 595 additions & 36 deletions package-lock.json

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
},
"dependencies": {
"@json2csv/node": "^7.0.3",
"@napi-rs/canvas": "^0.1.53",
"axe-core": "^4.9.1",
"axios": "^1.6.2",
"cheerio": "^1.0.0-rc.12",
@@ -38,6 +39,7 @@
"@types/eslint__js": "^8.42.3",
"@types/fs-extra": "^11.0.4",
"@types/inquirer": "^9.0.7",
"@types/lodash": "^4.17.7",
"@types/safe-regex": "^1.1.6",
"@types/validator": "^13.11.10",
"@types/which": "^3.0.4",
4 changes: 1 addition & 3 deletions src/crawlers/crawlDomain.ts
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@ import {
waitForPageLoaded,
} from '../constants/common.js';
import { areLinksEqual, isFollowStrategy } from '../utils.js';
import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
import { handlePdfDownload, runPdfScan, mapPdfScanResults, doPdfScreenshots } from './pdfScanFunc.js';
import fs from 'fs';
import { silentLogger, guiInfoLog } from '../logs.js';
import type { BrowserContext, ElementHandle, Frame, Page } from 'playwright';
@@ -625,13 +625,11 @@ const crawlDomain = async (
const pdfResults = await mapPdfScanResults(randomToken, uuidToPdfMapping);

// get screenshots from pdf docs
/*
if (includeScreenshots) {
await Promise.all(
pdfResults.map(async result => await doPdfScreenshots(randomToken, result)),
);
}
*/

// push results for each pdf document to key value store
await Promise.all(pdfResults.map(result => dataset.pushData(result)));
11 changes: 9 additions & 2 deletions src/crawlers/crawlLocalFile.ts
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ import {
convertPathToLocalFile,
} from '../constants/common.js';
import { areLinksEqual, isWhitelistedContentType } from '../utils.js';
import { handlePdfDownload, runPdfScan, mapPdfScanResults } from './pdfScanFunc.js';
import { handlePdfDownload, runPdfScan, mapPdfScanResults, doPdfScreenshots } from './pdfScanFunc.js';
import fs from 'fs';
import { guiInfoLog } from '../logs.js';
import playwright from 'playwright';
@@ -195,9 +195,16 @@ const crawlLocalFile = async (
// transform result format
const pdfResults = await mapPdfScanResults(randomToken, uuidToPdfMapping);

// get screenshots from pdf docs
if (includeScreenshots) {
await Promise.all(
pdfResults.map(async result => await doPdfScreenshots(randomToken, result)),
);
}

// push results for each pdf document to key value store
await Promise.all(pdfResults.map(result => dataset.pushData(result)));
}
return urlsCrawled;
};
export default crawlLocalFile;
export default crawlLocalFile;
Loading

0 comments on commit ece0e47

Please sign in to comment.