diff --git a/README.md b/README.md index 9a140e77..ce991bcc 100644 --- a/README.md +++ b/README.md @@ -254,6 +254,9 @@ Options: [string] [required] -j, --customFlowLabel Give Custom Flow Scan a label for easier reference in t he report [string] + -t, --specifiedMaxConcurrency Maximum number of pages to scan concurrently. + Use for sites with throttling. Defaults to 25. + [number] Examples: To scan sitemap of website:', 'node cli.js -c [ 1 | Sitemap ] -d -u -w diff --git a/a11y-scan-results.zip b/a11y-scan-results.zip new file mode 100644 index 00000000..97b7bf67 Binary files /dev/null and b/a11y-scan-results.zip differ diff --git a/cli.js b/cli.js index 1cb48112..756b6061 100644 --- a/cli.js +++ b/cli.js @@ -123,6 +123,16 @@ Usage: node cli.js -c -d -w -u OPTIONS`, return option; }) + .coerce('t', option => { + if (!Number.isInteger(option) || Number(option) <= 0) { + printMessage( + [`Invalid number for max concurrency. Please provide a positive integer.`], + messageOptions, + ); + process.exit(1); + } + return option; + }) .coerce('k', nameEmail => { if (nameEmail.indexOf(':') === -1) { printMessage( @@ -168,7 +178,7 @@ const scanInit = async argvs => { argvs.scanner = constants.scannerTypes[argvs.scanner]; argvs.headless = argvs.headless === 'yes'; argvs.browserToRun = constants.browserTypes[argvs.browserToRun]; - + let useChrome = false; let useEdge = false; let chromeDataDir = null; diff --git a/combine.js b/combine.js index b28f1e35..579e5884 100644 --- a/combine.js +++ b/combine.js @@ -26,6 +26,7 @@ const combineRun = async (details, deviceToScan) => { browser, userDataDirectory, strategy, + specifiedMaxConcurrency, } = envDetails; process.env.CRAWLEE_STORAGE_DIR = randomToken; @@ -62,6 +63,7 @@ const combineRun = async (details, deviceToScan) => { maxRequestsPerCrawl, browser, userDataDirectory, + specifiedMaxConcurrency, ); break; @@ -75,6 +77,7 @@ const combineRun = async (details, deviceToScan) => { browser, userDataDirectory, strategy, + specifiedMaxConcurrency, ); break; @@ -88,7 +91,13 @@ const combineRun = async (details, deviceToScan) => { if (scanDetails.urlsCrawled.scanned.length > 0) { await createAndUpdateResultsFolders(randomToken); - const basicFormHTMLSnippet = await generateArtifacts(randomToken, url, type, deviceToScan, urlsCrawled.scanned); + const basicFormHTMLSnippet = await generateArtifacts( + randomToken, + url, + type, + deviceToScan, + urlsCrawled.scanned, + ); const [name, email] = nameEmail.split(':'); await submitFormViaPlaywright( browser, diff --git a/constants/cliFunctions.js b/constants/cliFunctions.js index 423d1153..7260f6bb 100644 --- a/constants/cliFunctions.js +++ b/constants/cliFunctions.js @@ -78,8 +78,8 @@ export const cliOptions = { alias: 'customFlowLabel', describe: 'Give Custom Flow Scan a label for easier reference in the report', type: 'string', - requiresArg: true, - demandOption: false + requiresArg: true, + demandOption: false, }, k: { alias: 'nameEmail', @@ -87,6 +87,13 @@ export const cliOptions = { type: 'string', demandOption: true, }, + t: { + alias: 'specifiedMaxConcurrency', + describe: + 'Maximum number of pages to scan concurrently. Use for sites with throttling. Defaults to 25.', + type: 'number', + demandOption: false, + }, }; export const configureReportSetting = isEnabled => { diff --git a/constants/common.js b/constants/common.js index 5d8694f3..21df8b44 100644 --- a/constants/common.js +++ b/constants/common.js @@ -22,6 +22,7 @@ import constants, { formDataFields, whitelistedAttributes, mutedAttributeValues, + blackListedFileExtensions, } from './constants.js'; import { silentLogger } from '../logs.js'; @@ -100,6 +101,11 @@ export const sortAlphaAttributes = htmlString => { return entireHtml; }; +export const isBlacklistedFileExtensions = (url, blacklistedFileExtensions) => { + const urlExtension = url.split('.').pop(); + return blacklistedFileExtensions.includes(urlExtension); +}; + const document = new JSDOM('').window; const httpsAgent = new https.Agent({ @@ -422,6 +428,7 @@ export const prepareData = argv => { browserToRun, nameEmail, customFlowLabel, + specifiedMaxConcurrency, } = argv; return { @@ -437,7 +444,8 @@ export const prepareData = argv => { isLocalSitemap, browser: browserToRun, nameEmail, - customFlowLabel + customFlowLabel, + specifiedMaxConcurrency, }; }; diff --git a/constants/constants.js b/constants/constants.js index 55d27017..8138950e 100644 --- a/constants/constants.js +++ b/constants/constants.js @@ -60,6 +60,22 @@ export const mutedAttributeValues = [ `aria-labelledby`, ]; +export const blackListedFileExtensions = [ + 'css', + 'js', + 'txt', + 'mp3', + 'mp4', + 'jpg', + 'jpeg', + 'png', + 'svg', + 'gif', + 'woff', + 'pdf', + 'zip', +]; + export const intermediateScreenshotsPath = './screenshots'; export const destinationPath = storagePath => `${storagePath}/screenshots`; @@ -237,7 +253,7 @@ export const impactOrder = { }; export const formDataFields = { - formUrl: `https://docs.google.com/forms/d/e/1FAIpQLSem5C8fyNs5TiU5Vv2Y63-SH7CHN86f-LEPxeN_1u_ldUbgUA/formResponse`, + formUrl: `https://docs.google.com/forms/d/e/1FAIpQLSeUmqoVRSvMrW1DRi1KNMemWyKvDbEWGJp2dve4qb8QB3Zgvw/formResponse`, websiteUrlField: 'entry.1562345227', scanTypeField: 'entry.1148680657', emailField: 'entry.52161304', @@ -284,7 +300,7 @@ export default { allIssueFileName: 'all_issues', cliZipFileName: 'a11y-scan-results.zip', maxRequestsPerCrawl, - maxConcurrency: 50, + maxConcurrency: 25, scannerTypes, browserTypes, urlsCrawledObj, diff --git a/crawlers/commonCrawlerFunc.js b/crawlers/commonCrawlerFunc.js index bd12777d..80ce813a 100644 --- a/crawlers/commonCrawlerFunc.js +++ b/crawlers/commonCrawlerFunc.js @@ -1,18 +1,18 @@ /* eslint-disable no-unused-vars */ /* eslint-disable no-param-reassign */ -import crawlee from 'crawlee'; +import crawlee, { playwrightUtils } from 'crawlee'; import axe from 'axe-core'; import { axeScript, saflyIconSelector } from '../constants/constants.js'; export const filterAxeResults = (results, pageTitle) => { - const { violations, incomplete, passes, url } = results; + const { violations, passes, url } = results; let totalItems = 0; const mustFix = { totalItems: 0, rules: {} }; const goodToFix = { totalItems: 0, rules: {} }; const passed = { totalItems: 0, rules: {} }; - const process = (item, needsReview = false) => { + const process = (item) => { const { id: rule, help: description, helpUrl, tags, nodes } = item; if (rule === 'frame-tested') return; @@ -24,11 +24,9 @@ export const filterAxeResults = (results, pageTitle) => { if (!(rule in category.rules)) { category.rules[rule] = { description, helpUrl, conformance, totalItems: 0, items: [] }; } - const message = needsReview - ? failureSummary.slice(failureSummary.indexOf('\n') + 1).trim() - : failureSummary; + const message = failureSummary; category.rules[rule].items.push( - needsReview ? { html, message, needsReview } : { html, message }, + { html, message }, ); category.rules[rule].totalItems += 1; category.totalItems += 1; @@ -46,7 +44,6 @@ export const filterAxeResults = (results, pageTitle) => { }; violations.forEach(item => process(item)); - incomplete.forEach(item => process(item, true)); passes.forEach(item => { const { id: rule, help: description, helpUrl, tags, nodes } = item; @@ -91,7 +88,7 @@ export const runAxeScript = async (page, selectors = []) => { }, }); return axe.run(selectors, { - resultTypes: ['violations', 'passes', 'incomplete'], + resultTypes: ['violations', 'passes'], }); }, { selectors, saflyIconSelector }, @@ -115,4 +112,4 @@ export const preNavigationHooks = [ export const failedRequestHandler = async ({ request }) => { crawlee.log.error(`Failed Request - ${request.url}: ${request.errorMessages}`); -}; \ No newline at end of file +}; diff --git a/crawlers/crawlDomain.js b/crawlers/crawlDomain.js index 5b4602c8..56a3c937 100644 --- a/crawlers/crawlDomain.js +++ b/crawlers/crawlDomain.js @@ -1,12 +1,12 @@ -import crawlee from 'crawlee'; +import crawlee, { playwrightUtils } from 'crawlee'; import { createCrawleeSubFolders, preNavigationHooks, runAxeScript, failedRequestHandler, } from './commonCrawlerFunc.js'; -import constants, { basicAuthRegex } from '../constants/constants.js'; -import { getPlaywrightLaunchOptions } from '../constants/common.js'; +import constants, { basicAuthRegex, blackListedFileExtensions } from '../constants/constants.js'; +import { getPlaywrightLaunchOptions, isBlacklistedFileExtensions } from '../constants/common.js'; const crawlDomain = async ( url, @@ -17,6 +17,7 @@ const crawlDomain = async ( browser, userDataDirectory, strategy, + specifiedMaxConcurrency, ) => { const urlsCrawled = { ...constants.urlsCrawledObj }; const { maxConcurrency } = constants; @@ -70,13 +71,31 @@ const crawlDomain = async ( }, requestQueue, preNavigationHooks, - requestHandler: async ({ page, request, enqueueLinks, enqueueLinksByClickingElements }) => { + requestHandler: async ({ + page, + request, + response, + enqueueLinks, + enqueueLinksByClickingElements, + }) => { + const currentUrl = request.url; + + if (isBlacklistedFileExtensions(currentUrl, blackListedFileExtensions)) { + urlsCrawled.invalid.push(currentUrl); + return; + } + + if (response.status() !== 200) { + urlsCrawled.invalid.push(request.url); + return; + } + if (pagesCrawled === maxRequestsPerCrawl) { + urlsCrawled.invalid.push(request.url); return; } pagesCrawled++; - const currentUrl = request.url; const location = await page.evaluate('location'); if (isBasicAuth) { @@ -84,7 +103,7 @@ const crawlDomain = async ( } else if (location.host.includes(host)) { const results = await runAxeScript(page); await dataset.pushData(results); - urlsCrawled.scanned.push({url: currentUrl, pageTitle: results.pageTitle}); + urlsCrawled.scanned.push({ url: currentUrl, pageTitle: results.pageTitle }); await enqueueLinks({ // set selector matches anchor elements with href but not contains # or starting with mailto: @@ -116,7 +135,7 @@ const crawlDomain = async ( }, failedRequestHandler, maxRequestsPerCrawl, - maxConcurrency, + maxConcurrency: specifiedMaxConcurrency || maxConcurrency, }); await crawler.run(); diff --git a/crawlers/crawlSitemap.js b/crawlers/crawlSitemap.js index fdb5906f..155db80a 100644 --- a/crawlers/crawlSitemap.js +++ b/crawlers/crawlSitemap.js @@ -23,6 +23,7 @@ const crawlSitemap = async ( maxRequestsPerCrawl, browser, userDataDirectory, + specifiedMaxConcurrency, ) => { const urlsCrawled = { ...constants.urlsCrawledObj }; const { playwrightDeviceDetailsObject } = viewportSettings; @@ -65,14 +66,14 @@ const crawlSitemap = async ( if (status === 200 && isWhitelistedContentType(contentType)) { const results = await runAxeScript(page); await dataset.pushData(results); - urlsCrawled.scanned.push({url: currentUrl, pageTitle: results.pageTitle}); + urlsCrawled.scanned.push({ url: currentUrl, pageTitle: results.pageTitle }); } else { urlsCrawled.invalid.push(currentUrl); } }, failedRequestHandler, maxRequestsPerCrawl, - maxConcurrency, + maxConcurrency: specifiedMaxConcurrency || maxConcurrency, }); await crawler.run(); diff --git a/package.json b/package.json index d2be1839..e6335ec1 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,7 @@ "glob": "^9.1.2", "inquirer": "^9.1.4", "jsdom": "^21.0.0", - "playwright": "1.32.1", + "playwright": "1.36.1", "print-message": "^3.0.1", "safe-regex": "^2.1.1", "validator": "^13.7.0", diff --git a/playwrightAxeGenerator.js b/playwrightAxeGenerator.js index 25f50d51..c2e44785 100644 --- a/playwrightAxeGenerator.js +++ b/playwrightAxeGenerator.js @@ -335,7 +335,7 @@ const processPage = async page => { try { tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), appPrefix)); - let browser = 'webkit'; + let browser = 'chromium'; let userAgentOpts = null; let channel = null; @@ -360,7 +360,7 @@ const processPage = async page => { channel = 'chrome'; } - let codegenCmd = `npx playwright codegen --target javascript -o ${tmpDir}/intermediateScript.js ${data.url}`; + let codegenCmd = `npx playwright codegen --target javascript -o "${tmpDir}/intermediateScript.js" "${data.url}"`; let extraCodegenOpts = `${userAgentOpts} --browser ${browser} --block-service-workers --ignore-https-errors ${ channel && `--channel ${channel}` }`; diff --git a/static/ejs/partials/scripts/ruleOffcanvas.ejs b/static/ejs/partials/scripts/ruleOffcanvas.ejs index 88042a9b..0fe5d536 100644 --- a/static/ejs/partials/scripts/ruleOffcanvas.ejs +++ b/static/ejs/partials/scripts/ruleOffcanvas.ejs @@ -187,34 +187,31 @@ category summary is clicked %> const elementCardsList = createElementFromString('
    '); page.items.forEach(item => { - const itemCard = createElementFromString(` -
  • -
    - ${ - item.needsReview - ? `
    This occurrence might be a false positive that needs to be verified by a human.
    ` - : `` - } -
    -
    -
    HTML element
    -
    ${htmlEscapeString(
    -                    item.html,
    -                  )}
    -
    -
    -
    -
    ${item.needsReview ? 'Details' : 'How to fix'}
    -
    - ${generateItemMessageElement(item.needsReview, item.message)} + if (!item.needsReview){ + const itemCard = createElementFromString(` +
  • +
    +
    +
    +
    HTML element
    +
    ${htmlEscapeString(
    +                      item.html,
    +                    )}
    +
    +
    +
    +
    How to fix
    +
    + ${generateItemMessageElement(item.message)} +
    - -
  • - `); - - elementCardsList.appendChild(itemCard); +
  • + `); + + elementCardsList.appendChild(itemCard); + } }); accordionBody.appendChild(elementCardsList); @@ -224,16 +221,9 @@ category summary is clicked %> hljs.highlightAll(); } - function generateItemMessageElement(needsReview, rawMessage) { + function generateItemMessageElement(rawMessage) { const htmlEscapedMessageArray = rawMessage.split('\n ').map(m => htmlEscapeString(m)); - if (needsReview) { - if (htmlEscapedMessageArray.length === 1) { - return `

    ${htmlEscapedMessageArray[0]}

    `; - } else { - return `
      ${htmlEscapedMessageArray.map(m => `
    • ${m}
    • `).join('')}
    `; - } - } else { let i = 0; const elements = []; @@ -256,7 +246,7 @@ category summary is clicked %> return elements.join(''); } - } + const whyItMatters = { accesskeys: diff --git a/static/ejs/partials/styles/styles.ejs b/static/ejs/partials/styles/styles.ejs index 5d4a6783..6413215a 100644 --- a/static/ejs/partials/styles/styles.ejs +++ b/static/ejs/partials/styles/styles.ejs @@ -341,7 +341,10 @@ padding-left: 0.5rem; } #wcag-compliance-card, - #top-five-card, + #top-five-card{ + border-radius: 0.25rem; + } + #summary-scan-results-card, #scanabout-compliance-card { box-shadow: none;