Skip to content

Commit

Permalink
Bug fix on sitemap adds localFileScan, move report directory up one l…
Browse files Browse the repository at this point in the history
…evel (#364)

* Allow sitemap to be able to recurse all the files (Previously it was just getting the last childSitemap files, instead of all of the files)
* Allow pdfScanFunc to run both filePath and url (it uses fs for file path and got for url)
* Prevent sitemap from recursing infinitely
* Do checking if its a file path or url else skip it, without stopping the scan
* Added crawlLocalFile (-c 5)
* Added results and log folder name based on the local file name given
* Added try catch to all the json parsing for sitemap
* Fix bug of able to scan files with dot separator within the file name
* Allow all types of files to be scanned (verapdf for pdf files, axescript for non pdf files)
* Added typing for crawLocalFile.js
* Move reports directory to parent directory

---------

Co-authored-by: younglim <[email protected]>
  • Loading branch information
angyonghaseyo and younglim authored Jun 19, 2024
1 parent 8cafeb1 commit 3a7abd1
Show file tree
Hide file tree
Showing 20 changed files with 477 additions and 137 deletions.
2 changes: 1 addition & 1 deletion INTEGRATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ Create <code>cypress.config.js</code> with the following contents, and change yo
return await purpleA11y.pushScanResults(res, metadata, elementsToClick);
},
returnResultsDir() {
return `results/${purpleA11y.randomToken}_${purpleA11y.scanDetails.urlsCrawled.scanned.length}pages/reports/report.html`;
return `results/${purpleA11y.randomToken}_${purpleA11y.scanDetails.urlsCrawled.scanned.length}pages/report.html`;
},
finishPurpleA11yTestCase() {
purpleA11y.testThresholds();
Expand Down
2 changes: 1 addition & 1 deletion __tests__/mergeAxeResults.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ beforeEach(() => {

// Reports storagePath, expected report and compiled result files
htmlFilename = 'report';
expectedHTMLFilename = `${expectedStoragePath}/reports/${htmlFilename}.html`;
expectedHTMLFilename = `${expectedStoragePath}/${htmlFilename}.html`;

// Mock the JSON result generated from the issues
dateTimeStamp = getFormattedTime();
Expand Down
12 changes: 6 additions & 6 deletions gitlab-pipeline-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ a11y-scan:
artifacts:
paths:
# Stores the report CSV, HTML, summary PDF only to save storage space
- artifacts/reports/report.csv
- artifacts/reports/report.html
- artifacts/reports/scanDetails.csv
- artifacts/reports/summary.pdf
- artifacts/report.csv
- artifacts/report.html
- artifacts/scanDetails.csv
- artifacts/summary.pdf
# Include screenhots folder
# - artifacts/reports/elemScreenshots/
# - artifacts/elemScreenshots/
# Stores the reports folder so it can be accessed through Browse
# - artifacts/reports
# - artifacts/
# Uploads the results as zipped file
# - $A11Y_SCAN_ARTIFACT_NAME
6 changes: 3 additions & 3 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ const scanInit = async (argvs: Answers): Promise<string> => {
printMessage([statuses.systemError.message], messageOptions);
process.exit(res.status);
case statuses.invalidUrl.code:
if (argvs.scanner !== ScannerTypes.SITEMAP) {
if (argvs.scanner !== ScannerTypes.SITEMAP && argvs.scanner !== ScannerTypes.LOCALFILE) {
printMessage([statuses.invalidUrl.message], messageOptions);
process.exit(res.status);
}
Expand All @@ -277,7 +277,7 @@ const scanInit = async (argvs: Answers): Promise<string> => {
file is a sitemap */
const finalFilePath = getFileSitemap(argvs.url);
if (finalFilePath) {
argvs.isLocalSitemap = true;
argvs.isLocalFileScan = true;
argvs.finalUrl = finalFilePath;
if (process.env.VALIDATE_URL_PH_GUI) {
console.log('Url is valid');
Expand Down Expand Up @@ -367,7 +367,7 @@ const optionsAnswer: Answers = {
followRobots: options['followRobots'],
customFlowLabel: options['customFlowLabel'],
viewportWidth: options['viewportWidth'],
isLocalSitemap: options['isLocalSitemap'],
isLocalFileScan: options['isLocalFileScan'],
exportDirectory: options['exportDirectory'],
clonedBrowserDataDir: options['clonedBrowserDataDir'],
specifiedMaxConcurrency: options['specifiedMaxConcurrency'],
Expand Down
39 changes: 33 additions & 6 deletions src/combine.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import printMessage from 'print-message';
import crawlSitemap from './crawlers/crawlSitemap.js';
import crawlDomain from './crawlers/crawlDomain.js';
import crawlLocalFile from './crawlers/crawlLocalFile.js';
import crawlIntelligentSitemap from './crawlers/crawlIntelligentSitemap.js';
import { generateArtifacts } from './mergeAxeResults.js';
import { getHost, createAndUpdateResultsFolders, createDetailsAndLogs } from './utils.js';
Expand All @@ -10,6 +11,7 @@ import { consoleLogger, silentLogger } from './logs.js';
import runCustom from './crawlers/runCustom.js';
import { alertMessageOptions } from './constants/cliFunctions.js';
import { Data } from './index.js';
import { fileURLToPath, pathToFileURL } from 'url';


// Class exports
Expand Down Expand Up @@ -42,7 +44,7 @@ const combineRun = async (details:Data, deviceToScan:string) => {
viewportWidth,
playwrightDeviceDetailsObject,
maxRequestsPerCrawl,
isLocalSitemap,
isLocalFileScan,
browser,
userDataDirectory,
strategy,
Expand All @@ -60,7 +62,11 @@ const combineRun = async (details:Data, deviceToScan:string) => {
process.env.CRAWLEE_LOG_LEVEL = 'ERROR';
process.env.CRAWLEE_STORAGE_DIR = randomToken;

const host = type === ScannerTypes.SITEMAP && isLocalSitemap ? '' : getHost(url);
const host =
(type === ScannerTypes.SITEMAP && isLocalFileScan) ||
(type === ScannerTypes.LOCALFILE && isLocalFileScan)
? ''
: getHost(url);

let blacklistedPatterns:string[] | null = null;
try {
Expand All @@ -72,15 +78,17 @@ const combineRun = async (details:Data, deviceToScan:string) => {
}

// remove basic-auth credentials from URL
let finalUrl = urlWithoutAuth(url);
let finalUrl = (!(type === ScannerTypes.SITEMAP && isLocalFileScan || type === ScannerTypes.LOCALFILE && isLocalFileScan)) ? urlWithoutAuth(url) : new URL(pathToFileURL(url));

//Use the string version of finalUrl to reduce logic at submitForm
let finalUrlString = finalUrl.toString();

const scanDetails = {
startTime: new Date(),
endTime: new Date(),
crawlType: type,
requestUrl: finalUrl,
urlsCrawled: new UrlsCrawled(),

};

const viewportSettings:ViewportSettingsClass = new ViewportSettingsClass(
Expand Down Expand Up @@ -119,6 +127,23 @@ const combineRun = async (details:Data, deviceToScan:string) => {
);
break;

case ScannerTypes.LOCALFILE:
urlsCrawledObj = await crawlLocalFile(
url,
randomToken,
host,
viewportSettings,
maxRequestsPerCrawl,
browser,
userDataDirectory,
specifiedMaxConcurrency,
fileTypes,
blacklistedPatterns,
includeScreenshots,
extraHTTPHeaders,
);
break;

case ScannerTypes.INTELLIGENT:
urlsCrawledObj = await crawlIntelligentSitemap(
url,
Expand Down Expand Up @@ -168,6 +193,7 @@ const combineRun = async (details:Data, deviceToScan:string) => {
scanDetails.endTime = new Date();
scanDetails.urlsCrawled = urlsCrawledObj;
await createDetailsAndLogs(randomToken);
if (scanDetails.urlsCrawled) {
if (scanDetails.urlsCrawled.scanned.length > 0) {
await createAndUpdateResultsFolders(randomToken);
const pagesNotScanned = [
Expand All @@ -192,7 +218,7 @@ const combineRun = async (details:Data, deviceToScan:string) => {
browser,
userDataDirectory,
url, // scannedUrl
finalUrl.href, //entryUrl
new URL(finalUrlString).href, //entryUrl
type,
email,
name,
Expand All @@ -202,7 +228,8 @@ const combineRun = async (details:Data, deviceToScan:string) => {
pagesNotScanned.length,
metadata,
);
} else {
}
}else {
printMessage([`No pages were scanned.`], alertMessageOptions);
}
};
Expand Down
6 changes: 4 additions & 2 deletions src/constants/cliFunctions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ export const alertMessageOptions = {
export const cliOptions: { [key: string]: Options } = {
c: {
alias: 'scanner',
describe: 'Type of scan, 1) sitemap, 2) website crawl, 3) custom flow, 4) intelligent',
describe: 'Type of scan, 1) sitemap, 2) website crawl, 3) custom flow, 4) intelligent 5) local file',
requiresArg: true,
coerce: option => {
const choices = ['sitemap', 'website', 'custom', 'intelligent'];
const choices = ['sitemap', 'website', 'custom', 'intelligent', 'localfile'];
if (typeof option === 'number') {
// Will also allow integer choices
if (Number.isInteger(option) && option > 0 && option <= choices.length) {
Expand All @@ -34,6 +34,8 @@ export const cliOptions: { [key: string]: Options } = {
return ScannerTypes.WEBSITE;
case 'custom':
return ScannerTypes.CUSTOM;
case 'localfile':
return ScannerTypes.LOCALFILE;
case 'intelligent':
return ScannerTypes.INTELLIGENT;
default:
Expand Down
Loading

0 comments on commit 3a7abd1

Please sign in to comment.