Skip to content

Commit

Permalink
Various scan improvements and re-implement scan results (#123)
Browse files Browse the repository at this point in the history
* Various scan improvements and re-implement scan results

* Bump version number
  • Loading branch information
khoodehui authored May 24, 2023
1 parent f1dd8ff commit 5beb7f9
Show file tree
Hide file tree
Showing 13 changed files with 48 additions and 162 deletions.
13 changes: 8 additions & 5 deletions __tests__/utils.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import {jest} from '@jest/globals'
import {
setThresholdLimits,
getHostnameFromRegex,
getHost,
getCurrentDate,
validateUrl,
getStoragePath,
Expand Down Expand Up @@ -33,13 +33,16 @@ describe('test setting of threshold warn level', () => {
});
});

describe('test getHostnameFromRegex', () => {
describe('test getHost', () => {
test('should retrieve the hostnames accordingly', () => {
expect(getHostnameFromRegex('https://www.bbc.com/news')).toEqual('www.bbc.com');
expect(getHostnameFromRegex('https://www.isomer.gov.sg/')).toEqual('www.isomer.gov.sg');
expect(getHostnameFromRegex('https://fontawesome.com/sessions/sign-in')).toEqual(
expect(getHost('https://www.bbc.com/news')).toEqual('www.bbc.com');
expect(getHost('https://www.isomer.gov.sg/')).toEqual('www.isomer.gov.sg');
expect(getHost('https://fontawesome.com/sessions/sign-in')).toEqual(
'fontawesome.com',
);
// port number will be excluded since it is the default port for HTTPS
expect(getHost('https://www.crowdtask.gov.sg:443')).toEqual('www.crowdtask.gov.sg');
expect(getHost('http://localhost:5000/about/me')).toEqual('localhost:5000');
});
});

Expand Down
20 changes: 4 additions & 16 deletions cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {
getStoragePath,
} from './utils.js';
import { checkUrl, prepareData, isFileSitemap } from './constants/common.js';
import { cliOptions, messageOptions, configureReportSetting } from './constants/cliFunctions.js';
import { cliOptions, messageOptions } from './constants/cliFunctions.js';
import constants from './constants/constants.js';
import combineRun from './combine.js';
import playwrightAxeGenerator from './playwrightAxeGenerator.js';
Expand Down Expand Up @@ -110,12 +110,6 @@ const scanInit = async argvs => {
argvs.scanner = constants.scannerTypes[argvs.scanner];
argvs.headless = argvs.headless === 'yes';

// Set the parameters required to indicate whether to break down report
configureReportSetting(argvs.reportbreakdown);

// Set the parameters required to indicate threshold limits
setThresholdLimits(argvs.warn);

const res = await checkUrl(argvs.scanner, argvs.url);
const statuses = constants.urlCheckStatuses;
switch (res.status) {
Expand All @@ -125,12 +119,6 @@ const scanInit = async argvs => {
case statuses.cannotBeResolved.code:
printMessage([statuses.cannotBeResolved.message], messageOptions);
process.exit(res.status);
case statuses.errorStatusReceived.code:
printMessage(
[`${statuses.errorStatusReceived.message}${res.serverResponse}.`],
messageOptions,
);
process.exit(res.status);
case statuses.systemError.code:
printMessage([statuses.systemError.message], messageOptions);
process.exit(res.status);
Expand Down Expand Up @@ -193,13 +181,13 @@ const scanInit = async argvs => {
await combineRun(data, screenToScan);
}

// Delete dataset and request queues
cleanUp(data.randomToken);

return getStoragePath(data.randomToken);
};

scanInit(options).then(async storagePath => {
// Delete dataset and request queues
cleanUp(constants.a11yStorage);

// Take option if set
if (typeof options.zip === 'string') {
constants.cliZipFileName = options.zip;
Expand Down
8 changes: 4 additions & 4 deletions combine.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@ import crawlDomain from './crawlers/crawlDomain.js';

import { generateArtifacts } from './mergeAxeResults.js';
import {
getHostnameFromRegex,
getHost,
createAndUpdateResultsFolders,
createDetailsAndLogs,
} from './utils.js';
import constants from './constants/constants.js';

process.env.CRAWLEE_STORAGE_DIR = constants.a11yStorage;

const combineRun = async (details, deviceToScan) => {
const envDetails = { ...details };

Expand All @@ -28,8 +26,10 @@ const combineRun = async (details, deviceToScan) => {
isLocalSitemap,
} = envDetails;

process.env.CRAWLEE_STORAGE_DIR = randomToken;

const host =
type === constants.scannerTypes.sitemap && isLocalSitemap ? '' : getHostnameFromRegex(url);
type === constants.scannerTypes.sitemap && isLocalSitemap ? '' : getHost(url);

const scanDetails = {
startTime: new Date().getTime(),
Expand Down
11 changes: 8 additions & 3 deletions constants/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import fs from 'fs';
import constants from './constants.js';
import { silentLogger } from '../logs.js';
import * as https from 'https';
import { devices } from 'playwright';

const document = new JSDOM('').window;

Expand Down Expand Up @@ -127,8 +128,9 @@ const checkUrlConnectivity = async url => {

if (data.isValid) {
// Validate the connectivity of URL if the string format is url format
// User-Agent is modified to emulate a browser to handle cases where some sites ban non browser agents, resulting in a 403 error
await axios
.get(data.url, { httpsAgent, timeout: 15000 })
.get(data.url, { headers: { 'User-Agent': devices['Desktop Chrome HiDPI'].userAgent }, httpsAgent, timeout: 15000 })
.then(async response => {
const redirectUrl = response.request.res.responseUrl;
res.status = constants.urlCheckStatuses.success.code;
Expand All @@ -144,8 +146,11 @@ const checkUrlConnectivity = async url => {
.catch(error => {
if (error.response) {
// enters here if server responds with a status other than 2xx
res.status = constants.urlCheckStatuses.errorStatusReceived.code;
res.serverResponse = error.response.status
// the scan should still proceed even if error codes are received, so that accessibility scans for error pages can be done too
res.status = constants.urlCheckStatuses.success.code;
res.url = url;
res.content = error.response.data;
return res;
} else if (error.request) {
// enters here if URL cannot be accessed
res.status = constants.urlCheckStatuses.cannotBeResolved.code;
Expand Down
12 changes: 1 addition & 11 deletions constants/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ if (fs.existsSync('/.dockerenv')) {
launchOptionsArgs = ['--disable-gpu', '--no-sandbox', '--disable-dev-shm-usage'];
}

// _folder_paths
const a11yStorage = '.a11y_storage';

export const impactOrder = {
minor: 0,
moderate: 1,
Expand All @@ -79,7 +76,7 @@ const urlCheckStatuses = {
message:
'Provided URL cannot be accessed. Please verify your internet connectivity and the correctness of the domain.',
},
errorStatusReceived: {
errorStatusReceived: { // unused for now
code: 13,
message: 'Provided URL cannot be accessed. Server responded with code ', // append it with the response code received,
},
Expand All @@ -99,8 +96,6 @@ const xmlSitemapTypes = {
};

export default {
a11yStorage,
a11yDataStoragePath: `${a11yStorage}/datasets`,
allIssueFileName: 'all_issues',
cliZipFileName: 'a11y-scan-results.zip',
maxRequestsPerCrawl,
Expand All @@ -118,8 +113,3 @@ export const wcagWebPage = 'https://www.w3.org/TR/WCAG21/';
const latestAxeVersion = '4.4';
export const axeVersion = latestAxeVersion;
export const axeWebPage = `https://dequeuniversity.com/rules/axe/${latestAxeVersion}/`;

export const alertMessageOptions = {
border: true,
borderColor: 'red',
};
4 changes: 0 additions & 4 deletions constants/questions.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,8 @@ const questions = [
return true;
case statuses.cannotBeResolved.code:
return statuses.cannotBeResolved.message;
case statuses.errorStatusReceived.code:
return `${statuses.errorStatusReceived.message}${res.serverResponse}.`;

case statuses.systemError.code:
return statuses.systemError.message;

case statuses.invalidUrl.code:
if (answers.scanner !== constants.scannerTypes.sitemap) {
return statuses.invalidUrl.message;
Expand Down
7 changes: 7 additions & 0 deletions crawlers/crawlDomain.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ const crawlDomain = async (url, randomToken, host, viewportSettings, maxRequests
device = {};
}

let pagesCrawled = 0;

const crawler = new crawlee.PlaywrightCrawler({
launchContext: {
launchOptions: {
Expand All @@ -54,6 +56,11 @@ const crawlDomain = async (url, randomToken, host, viewportSettings, maxRequests
requestQueue,
preNavigationHooks,
requestHandler: async ({ page, request, enqueueLinks, enqueueLinksByClickingElements }) => {
if (pagesCrawled === maxRequestsPerCrawl) {
return;
}
pagesCrawled++;

const currentUrl = request.url;
const location = await page.evaluate('location');

Expand Down
2 changes: 1 addition & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,5 @@ inquirer.prompt(questions).then(async answers => {
await combineRun(data, screenToScan);
}
// Delete dataset and request queues
cleanUp(constants.a11yStorage);
cleanUp(data.randomToken);
});
114 changes: 8 additions & 106 deletions mergeAxeResults.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,112 +70,6 @@ const writeHTML = async (allIssues, storagePath, htmlFilename = 'report') => {
fs.writeFileSync(`${storagePath}/reports/${htmlFilename}.html`, html);
};

// const granularReporting = async (randomToken, allIssues) => {
// if (allIssues.length > 0) {
// const storagePath = getStoragePath(randomToken);
// const impactLevels = ['critical', 'serious', 'moderate', 'minor'];

// let currentImpactLevelIssues;
// impactLevels.forEach(async impactLevel => {
// currentImpactLevelIssues = allIssues.filter(issue => issue.impact === impactLevel);

// if (currentImpactLevelIssues.length > 0) {
// const writeSeverityResult = writeResults(
// currentImpactLevelIssues,
// storagePath,
// `compiledResults-${impactLevel}`,
// );
// const writeHTMLSeverityReport = writeHTML(
// currentImpactLevelIssues,
// storagePath,
// `report-${impactLevel}`,
// );
// await Promise.all([writeSeverityResult, writeHTMLSeverityReport]);
// }
// });

// return true;
// }

// return false;
// };

// const issueCountMap = allIssues => {
// const criticalImpact = allIssues.filter(issue => issue.impact === 'critical');
// const seriousImpact = allIssues.filter(issue => issue.impact === 'serious');
// const moderateImpact = allIssues.filter(issue => issue.impact === 'moderate');
// const minorImpact = allIssues.filter(issue => issue.impact === 'minor');

// const issueCount = new Map();
// issueCount.set('critical', criticalImpact.length);
// issueCount.set('serious', seriousImpact.length);
// issueCount.set('moderate', moderateImpact.length);
// issueCount.set('minor', minorImpact.length);
// issueCount.set('total', allIssues.length);

// return issueCount;
// };

// const thresholdLimitCheck = async (warnLevel, allIssues, totalUniqueIssues) => {
// const issueCounts = issueCountMap(allIssues);

// const messages = [
// [`Total Issues: ${issueCounts.get('total')}`, `Total Unique Issues: ${totalUniqueIssues}`],
// [
// `Issue Breakdown`,
// `Critical: ${issueCounts.get('critical')}`,
// `Serious: ${issueCounts.get('serious')}`,
// `Moderate: ${issueCounts.get('moderate')}`,
// `Minor: ${issueCounts.get('minor')}`,
// ],
// ];

// const uniqueIssues = [`Unique: ${totalUniqueIssues}`];

// if (warnLevel !== 'none' && issueCounts.get(warnLevel) > 0) {
// messages.push([
// `Issues with impact level - ${warnLevel} found in your project. Please review the accessibility issues.`,
// ]);
// process.exitCode = 1;
// }

// messages.forEach((message, index, array) => {
// if (array.length !== 1 && index === array.length - 1) {
// printMessage(message, constants.alertMessageOptions);
// } else {
// printMessage(message);
// }
// });
// };

// export const generateArtifacts = async (randomToken, deviceToScan) => {
// const storagePath = getStoragePath(randomToken);
// const directory = `${storagePath}/${constants.allIssueFileName}`;
// let allIssues = [];
// const allFiles = await extractFileNames(directory);

// await Promise.all(
// allFiles.map(async file => {
// const rPath = `${directory}/${file}`;
// const flattenedIssues = await flattenAxeResults(rPath);
// allIssues = allIssues.concat(flattenedIssues);
// }),
// ).catch(flattenIssuesError => {
// consoleLogger.info('An error has occurred when flattening the issues, please try again.');
// silentLogger.error(flattenIssuesError);
// });

// const totalUniqueIssues = new Set(allIssues.map(issue => issue.description)).size;
// if (process.env.REPORT_BREAKDOWN === '1') {
// await granularReporting(randomToken, allIssues);
// }

// await thresholdLimitCheck(process.env.WARN_LEVEL, allIssues, totalUniqueIssues);

// await writeResults(allIssues, storagePath);
// await writeHTML(allIssues, storagePath, deviceToScan);
// };

const pushResults = async (rPath, allIssues) => {
const pageResults = await parseContentToJson(rPath);
const { url, pageTitle } = pageResults;
Expand Down Expand Up @@ -281,6 +175,14 @@ export const generateArtifacts = async (randomToken, urlScanned, scanType, viewp

flattenAndSortResults(allIssues);

printMessage([
'Scan Summary',
'',
`Must Fix: ${allIssues.items.mustFix.rules.length} issues / ${allIssues.items.mustFix.totalItems} occurrences`,
`Good to Fix: ${allIssues.items.goodToFix.rules.length} issues / ${allIssues.items.goodToFix.totalItems} occurrences`,
`Passed: ${allIssues.items.passed.totalItems} occurrences`,
])

await writeResults(allIssues, storagePath);
await writeHTML(allIssues, storagePath);
};
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "purple-hats",
"main": "index.js",
"version": "0.0.14",
"version": "0.0.15",
"type": "module",
"imports": {
"#root/*.js": "./*.js"
Expand Down
4 changes: 2 additions & 2 deletions playwrightAxeGenerator.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ const playwrightAxeGenerator = async (domain, data) => {
import { consoleLogger, silentLogger } from '#root/logs.js';
const blacklistedPatternsFilename = 'exclusions.txt';
process.env.CRAWLEE_STORAGE_DIR = constants.a11yStorage;
process.env.CRAWLEE_STORAGE_DIR = '${randomToken}';
const compareExe = getExecutablePath('**/ImageMagick*/bin','compare');
if (!compareExe) {
Expand Down Expand Up @@ -291,7 +291,7 @@ const processPage = async page => {
customDevice = 'iPhone 11';
}

if (customDevice) {
if (customDevice && !viewportWidth) {
viewportWidth = devices[customDevice].viewport.width;
userAgentOpts = `--user-agent \"${devices[customDevice].userAgent}\"`;
}
Expand Down
Loading

0 comments on commit 5beb7f9

Please sign in to comment.