Skip to content

Commit

Permalink
Browser based scans and scanning in proxy environment (#132)
Browse files Browse the repository at this point in the history
* Ability to scan for basic-authenticated sites
* Ability to scan cookie-authenticated sites with Chrome/Edge profiles.
* Browser-based scans and concurrent browser-based scans
* "-b" flag in cli to specify the browser to be used
* "-s" flag in cli to specify the crawling strategy to be domain or hostname
* Updated paths as per the new Purple-HATS directory structure
* Network settings on Playwright for proxy environments with server-side rendering
* Rename passed_items.json to passed_items.json.txt
* Updated README.md
  • Loading branch information
Georgetxm authored Jun 30, 2023
1 parent 64430a6 commit d2aa6b1
Show file tree
Hide file tree
Showing 17 changed files with 2,026 additions and 1,070 deletions.
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,11 +203,12 @@ Options:
ailable in website and sitemap scans [number]
-h, --headless Whether to run the scan in headless mode. Defaults to y
es. [string] [choices: "yes", "no"] [default: "yes"]
--reportbreakdown Will break down the main report according to impact
[boolean] [default: false]
--warn Track for issues of target impact level
[choices: "critical", "serious", "moderate", "minor", "none"] [default: "none"
]
-b, --browserToRun Browser to run the scan on: 1) Chromium, 2) Chrome, 3) Ed
ge. Defaults to Chromium.
[choices: "chrome", "edge", "chromium"] [default: "chromium"]
-s, --strategy Strategy to choose which links to crawl in a website scan
. Defaults to "same-domain".
[choices: "same-domain", "same-hostname"]

Examples:
To scan sitemap of website:', 'node cli.js -c [ 1 | Sitemap ] -d <device> -u
Expand All @@ -218,10 +219,12 @@ Examples:
<url_link> -w <viewportWidth>
```

### Mobile Device Options
### Device Options
<details>
<summary>Click here for list of device options supported</summary>

- "Desktop" (defaults to a 1280x720 viewport)
- "Mobile" (defaults to iPhone 11 viewport)
- "Desktop Chrome HiDPI"
- "Desktop Edge HiDPI"
- "Desktop Firefox HiDPI"
Expand Down
208 changes: 185 additions & 23 deletions cli.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,29 @@
#!/usr/bin/env node
/* eslint-disable no-fallthrough */
/* eslint-disable no-undef */
/* eslint-disable no-param-reassign */
import fs from 'fs-extra';
import _yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
import printMessage from 'print-message';
import { devices } from 'playwright';
import { cleanUp, zipResults, setHeadlessMode, getVersion, getStoragePath } from './utils.js';
import {
cleanUp,
zipResults,
setHeadlessMode,
setThresholdLimits,
getVersion,
getStoragePath,
} from './utils.js';
import { checkUrl, prepareData, isFileSitemap } from './constants/common.js';
checkUrl,
prepareData,
isFileSitemap,
cloneChromeProfiles,
cloneEdgeProfiles,
deleteClonedChromeProfiles,
deleteClonedEdgeProfiles,
} from './constants/common.js';
import { cliOptions, messageOptions } from './constants/cliFunctions.js';
import constants from './constants/constants.js';
import constants, {
getDefaultChromeDataDir,
getDefaultEdgeDataDir,
} from './constants/constants.js';
import combineRun from './combine.js';
import playwrightAxeGenerator from './playwrightAxeGenerator.js';
import { devices } from 'playwright';
import { silentLogger } from './logs.js';

const appVersion = getVersion();
Expand All @@ -34,13 +39,13 @@ Usage: node cli.js -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
.options(cliOptions)
.example([
[
`To scan sitemap of website:', 'node cli.js -c [ 1 | ${constants.scannerTypes.sitemap} ] -d <device> -u <url_link> -w <viewportWidth>`,
`To scan sitemap of website:', 'node cli.js -c [ 1 | sitemap ] -u <url_link> [ -d <device> | -w <viewport_width> ]`,
],
[
`To scan a website', 'node cli.js -c [ 2 | ${constants.scannerTypes.website} ] -d <device> -u <url_link> -w <viewportWidth>`,
`To scan a website', 'node cli.js -c [ 2 | website ] -u <url_link> [ -d <device> | -w <viewport_width> ]`,
],
[
`To start a custom flow scan', 'node cli.js -c [ 3 | ${constants.scannerTypes.custom} ] -d <device> -u <url_link> -w <viewportWidth>`,
`To start a custom flow scan', 'node cli.js -c [ 3 | custom ] -u <url_link> [ -d <device> | -w <viewport_width> ]`,
],
])
.coerce('c', option => {
Expand All @@ -65,7 +70,7 @@ Usage: node cli.js -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
})
.coerce('d', option => {
const device = devices[option];
if (option != 'Desktop' && !device) {
if (!device && option !== 'Desktop' && option !== 'Mobile') {
printMessage(
[`Invalid device. Please provide an existing device to start the scan.`],
messageOptions,
Expand Down Expand Up @@ -97,9 +102,34 @@ Usage: node cli.js -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
}
return option;
})
.coerce('b', option => {
const { choices } = cliOptions.b;
if (typeof option === 'number') {
if (Number.isInteger(option) && option > 0 && option <= choices.length) {
option = choices[option - 1];
} else {
printMessage(
[
'Invalid option',
`Please enter an integer (1 to ${choices.length}) or keywords (${choices.join(', ')}).`,
],
messageOptions,
);
process.exit(1);
}
}

return option;
})
.check(argvs => {
if (argvs.scanner === 'custom' && argvs.maxpages) {
throw new Error('-p or --maxpages is only available in website and sitemap scans');
throw new Error('-p or --maxpages is only available in website and sitemap scans.');
}
return true;
})
.check(argvs => {
if (argvs.scanner !== 'website' && argvs.strategy) {
throw new Error('-s or --strategy is only available in website scans.');
}
return true;
})
Expand All @@ -109,13 +139,114 @@ Usage: node cli.js -c <crawler> -d <device> -w <viewport> -u <url> OPTIONS`,
const scanInit = async argvs => {
argvs.scanner = constants.scannerTypes[argvs.scanner];
argvs.headless = argvs.headless === 'yes';
argvs.browserToRun = constants.browserTypes[argvs.browserToRun];

let useChrome = false;
let useEdge = false;
let chromeDataDir = null;
let edgeDataDir = null;
// Empty string for profile directory will use incognito mode in playwright
let clonedDataDir = '';

const res = await checkUrl(argvs.scanner, argvs.url);
if (argvs.browserToRun === constants.browserTypes.chrome) {
chromeDataDir = getDefaultChromeDataDir();
clonedDataDir = cloneChromeProfiles();
if (chromeDataDir && clonedDataDir) {
argvs.browserToRun = constants.browserTypes.chrome;
useChrome = true;
} else {
printMessage(['Unable to use Chrome, falling back to Edge browser...'], messageOptions);
edgeDataDir = getDefaultEdgeDataDir();
clonedDataDir = cloneEdgeProfiles();
if (edgeDataDir && clonedDataDir) {
useEdge = true;
argvs.browserToRun = constants.browserTypes.edge;
} else {
printMessage(
['Unable to use both Chrome and Edge, falling back to Chromium...'],
messageOptions,
);
argvs.browserToRun = constants.browserTypes.chromium;
clonedDataDir = '';
}
}
} else if (argvs.browserToRun === constants.browserTypes.edge) {
edgeDataDir = getDefaultEdgeDataDir();
clonedDataDir = cloneEdgeProfiles();
if (edgeDataDir && clonedDataDir) {
useEdge = true;
argvs.browserToRun = constants.browserTypes.edge;
} else {
printMessage(['Unable to use Edge, falling back to Chrome browser...'], messageOptions);
chromeDataDir = getDefaultChromeDataDir();
clonedDataDir = cloneChromeProfiles();
if (chromeDataDir && clonedDataDir) {
useChrome = true;
argvs.browserToRun = constants.browserTypes.chrome;
} else {
printMessage(
['Unable to use both Chrome and Edge, falling back to Chromium...'],
messageOptions,
);
argvs.browserToRun = constants.browserTypes.chromium;
clonedDataDir = '';
}
}
} else {
argvs.browserToRun = constants.browserTypes.chromium;
clonedDataDir = '';
}

if (argvs.customDevice === 'Desktop' || argvs.customDevice === 'Mobile') {
argvs.deviceChosen = argvs.customDevice;
delete argvs.customDevice;
}

// Creating the playwrightDeviceDetailObject
// for use in crawlDomain & crawlSitemap's preLaunchHook
if (argvs.deviceChosen === 'Mobile' || argvs.customDevice === 'iPhone 11') {
argvs.playwrightDeviceDetailsObject = devices['iPhone 11'];
} else if (argvs.customDevice === 'Samsung Galaxy S9+') {
argvs.playwrightDeviceDetailsObject = devices['Galaxy S9+'];
} else if (argvs.viewportWidth) {
argvs.playwrightDeviceDetailsObject = {
viewport: { width: Number(argvs.viewportWidth), height: 720 },
};
} else if (argvs.customDevice) {
argvs.playwrightDeviceDetailsObject = devices[argvs.customDevice.replace('_', / /g)];
} else {
argvs.playwrightDeviceDetailsObject = {};
}

const res = await checkUrl(
argvs.scanner,
argvs.url,
argvs.browserToRun,
clonedDataDir,
argvs.playwrightDeviceDetailsObject,
);

if (argvs.scanner === constants.scannerTypes.website && !argvs.strategy) {
argvs.strategy = 'same-domain';
}
const statuses = constants.urlCheckStatuses;

// File clean up after url check
// files will clone a second time below if url check passes
if (useChrome) {
deleteClonedChromeProfiles();
} else if (useEdge) {
deleteClonedEdgeProfiles();
}

// eslint-disable-next-line default-case
switch (res.status) {
case statuses.success.code:
argvs.finalUrl = res.url;
break;
case statuses.unauthorised.code:
printMessage([statuses.unauthorised.message], messageOptions);
process.exit(res.status);
case statuses.cannotBeResolved.code:
printMessage([statuses.cannotBeResolved.message], messageOptions);
process.exit(res.status);
Expand All @@ -127,7 +258,6 @@ const scanInit = async argvs => {
printMessage([statuses.invalidUrl.message], messageOptions);
process.exit(res.status);
}

/* if sitemap scan is selected, treat this URL as a filepath
isFileSitemap will tell whether the filepath exists, and if it does, whether the
file is a sitemap */
Expand All @@ -140,6 +270,8 @@ const scanInit = async argvs => {
case statuses.notASitemap.code:
printMessage([statuses.notASitemap.message], messageOptions);
process.exit(res.status);
default:
break;
}

const [date, time] = new Date().toLocaleString('sv').replaceAll(/-|:/g, '').split(' ');
Expand All @@ -152,19 +284,42 @@ const scanInit = async argvs => {

let screenToScan;

if (!argvs.customDevice && !argvs.viewportWidth) {
screenToScan = 'Desktop';
if (argvs.deviceChosen) {
screenToScan = argvs.deviceChosen;
} else if (argvs.customDevice) {
screenToScan = argvs.customDevice;
} else {
} else if (argvs.viewportWidth) {
screenToScan = `CustomWidth_${argvs.viewportWidth}px`;
} else {
screenToScan = 'Desktop';
}

data.randomToken = `PHScan_${domain}_${date}_${time}_${argvs.scanner.replaceAll(
' ',
'_',
)}_${screenToScan.replaceAll(' ', '_')}`;

/**
* Cloning a second time with random token for parallel browser sessions
* Also To mitigate agaisnt known bug where cookies are
* overriden after each browser session - i.e. logs user out
* after checkingUrl and unable to utilise same cookie for scan
* */
if (useChrome) {
clonedDataDir = cloneChromeProfiles(data.randomToken);
data.browser = constants.browserTypes.chrome;
data.userDataDirectory = clonedDataDir;
} else if (useEdge) {
clonedDataDir = cloneEdgeProfiles(data.randomToken);
data.browser = constants.browserTypes.edge;
data.userDataDirectory = clonedDataDir;
}
// Defaults to chromium by not specifying channels in Playwright, if no browser is found
else {
data.browser = constants.browserTypes.chromium;
data.userDataDirectory = '';
}

printMessage([`Purple HATS version: ${appVersion}`, 'Starting scan...'], messageOptions);

if (argvs.scanner === constants.scannerTypes.custom) {
Expand All @@ -181,8 +336,14 @@ const scanInit = async argvs => {
await combineRun(data, screenToScan);
}

// Delete cloned directory
if (useChrome) {
deleteClonedChromeProfiles();
} else if (useEdge) {
deleteClonedEdgeProfiles();
}
// Delete dataset and request queues
cleanUp(data.randomToken);
await cleanUp(data.randomToken);

return getStoragePath(data.randomToken);
};
Expand All @@ -195,8 +356,8 @@ scanInit(options).then(async storagePath => {

await fs
.ensureDir(storagePath)
.then(async () => {
await zipResults(constants.cliZipFileName, storagePath);
.then(() => {
zipResults(constants.cliZipFileName, storagePath);
const messageToDisplay = [
`Report of this run is at ${constants.cliZipFileName}`,
`Results directory is at ${storagePath}`,
Expand All @@ -208,6 +369,7 @@ scanInit(options).then(async storagePath => {
);
}
printMessage(messageToDisplay);
process.exit(0);
})
.catch(error => {
printMessage([`Error in zipping results: ${error}`]);
Expand Down
Loading

0 comments on commit d2aa6b1

Please sign in to comment.