Skip to content

Commit

Permalink
Basic auth improvements for special characters and reduce logging (#341)
Browse files Browse the repository at this point in the history
* Remove basic auth in URL when printing console and parsing scan report

* Basic auth support for intelligent scan mode

* Bump package version

* Handle basic auth with decodeURI for special characters in connectivity check
  • Loading branch information
younglim authored May 25, 2024
1 parent 699b3e1 commit 2b00415
Show file tree
Hide file tree
Showing 10 changed files with 47 additions and 33 deletions.
9 changes: 3 additions & 6 deletions combine.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import crawlDomain from './crawlers/crawlDomain.js';
import crawlIntelligentSitemap from './crawlers/crawlIntelligentSitemap.js';
import { generateArtifacts } from './mergeAxeResults.js';
import { getHost, createAndUpdateResultsFolders, createDetailsAndLogs } from './utils.js';
import constants, { basicAuthRegex } from './constants/constants.js';
import { getBlackListedPatterns, submitForm } from './constants/common.js';
import constants from './constants/constants.js';
import { getBlackListedPatterns, submitForm, urlWithoutAuth } from './constants/common.js';
import { consoleLogger, silentLogger } from './logs.js';
import runCustom from './crawlers/runCustom.js';

Expand Down Expand Up @@ -54,10 +54,7 @@ const combineRun = async (details, deviceToScan) => {
}

// remove basic-auth credentials from URL
let finalUrl = url;
if (basicAuthRegex.test(url)) {
finalUrl = `${url.split('://')[0]}://${url.split('@')[1]}`;
}
let finalUrl = urlWithoutAuth(url);

const scanDetails = {
startTime: new Date(),
Expand Down
16 changes: 14 additions & 2 deletions constants/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -263,12 +263,17 @@ export const sanitizeUrlInput = url => {
const requestToUrl = async (url, isNewCustomFlow, extraHTTPHeaders) => {
// User-Agent is modified to emulate a browser to handle cases where some sites ban non browser agents, resulting in a 403 error
const res = {};
const parsedUrl = new URL(url);
await axios
.get(url, {
.get(parsedUrl, {
headers: {
...extraHTTPHeaders,
'User-Agent': devices['Desktop Chrome HiDPI'].userAgent,
'Host': new URL(url).host
'Host': parsedUrl.host
},
auth: {
username: decodeURIComponent(parsedUrl.username),
password: decodeURIComponent(parsedUrl.password),
},
httpsAgent,
timeout: 5000,
Expand Down Expand Up @@ -1656,3 +1661,10 @@ export const getPlaywrightLaunchOptions = browser => {
}
return options;
};

export const urlWithoutAuth = (url) => {
const parsedUrl = new URL(url);
parsedUrl.username = '';
parsedUrl.password = '';
return parsedUrl;
};
8 changes: 6 additions & 2 deletions crawlers/crawlIntelligentSitemap.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,12 @@ import {chromium} from 'playwright';

function getHomeUrl(url) {
const urlObject = new URL(url);
if (urlObject.username !== '' && urlObject.password !== '') {
return `${urlObject.protocol}//${urlObject.username}:${urlObject.password}@${urlObject.hostname}${urlObject.port ? ':' + urlObject.port : ''}`;
}

return `${urlObject.protocol}//${urlObject.hostname}${urlObject.port ? ':' + urlObject.port : ''}`;
}
}

const checkUrlExists = async (page, url) => {
try {
Expand All @@ -78,7 +82,7 @@ import {chromium} from 'playwright';


try {
sitemapUrl = await findSitemap(url)
sitemapUrl = await findSitemap(url);
} catch (error) {
silentLogger.error(error);
}
Expand Down
9 changes: 5 additions & 4 deletions crawlers/custom/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import path from 'path';
import { runAxeScript } from '../commonCrawlerFunc.js';
import { consoleLogger, guiInfoLog, silentLogger } from '../../logs.js';
import { guiInfoStatusTypes } from '../../constants/constants.js';
import { isSkippedUrl } from '../../constants/common.js';
import { isSkippedUrl, urlWithoutAuth } from '../../constants/common.js';

export const DEBUG = false;
export const log = str => {
Expand Down Expand Up @@ -73,8 +73,8 @@ export const screenshotFullPage = async (page, screenshotsDir, screenshotIdx) =>
window.scrollTo(0, 0);
});

consoleLogger.info(`Screenshot page at: ${page.url()}`);
silentLogger.info(`Screenshot page at: ${page.url()}`);
consoleLogger.info(`Screenshot page at: ${urlWithoutAuth(page.url())}`);
silentLogger.info(`Screenshot page at: ${urlWithoutAuth(page.url())}`);

await page.screenshot({
path: imgPath,
Expand Down Expand Up @@ -108,10 +108,11 @@ export const runAxeScan = async (
customFlowDetails,
);


await dataset.pushData(result);

urlsCrawled.scanned.push({
url: page.url(),
url: urlWithoutAuth(page.url()),
pageTitle: result.pageTitle,
pageImagePath: customFlowDetails.pageImagePath,
});
Expand Down
3 changes: 2 additions & 1 deletion logs.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
/* eslint-disable no-shadow */
import { createLogger, format, transports } from 'winston';
import { guiInfoStatusTypes } from './constants/constants.js';
import { urlWithoutAuth } from './constants/common.js';

const { combine, timestamp, printf } = format;

Expand Down Expand Up @@ -49,7 +50,7 @@ export const guiInfoLog = (status, data) => {
case guiInfoStatusTypes.DUPLICATE:
console.log(
`crawling::${data.numScanned || 0}::${status}::${
data.urlScanned || 'no url provided'
urlWithoutAuth(data.urlScanned) || 'no url provided'
}`,
);
break;
Expand Down
9 changes: 4 additions & 5 deletions mergeAxeResults.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ import fs from 'fs-extra';
import printMessage from 'print-message';
import path from 'path';
import { fileURLToPath} from 'url';
import constants, { basicAuthRegex } from './constants/constants.js';
import constants from './constants/constants.js';
import { urlWithoutAuth } from './constants/common.js';
import ejs from 'ejs';
import { createScreenshotsFolder, getFormattedTime, getStoragePath, getVersion, getWcagPassPercentage, formatDateTimeForMassScanner, retryFunction } from './utils.js';
import { consoleLogger, silentLogger } from './logs.js';
Expand All @@ -14,7 +15,7 @@ import { chromium } from 'playwright';
import { createWriteStream } from 'fs';
import { AsyncParser } from '@json2csv/node';
import { purpleAiHtmlETL, purpleAiRules } from './constants/purpleAi.js';
import { all } from 'axios';


const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
Expand Down Expand Up @@ -389,9 +390,7 @@ export const generateArtifacts = async (
const storagePath = getStoragePath(randomToken);
const directory = `${storagePath}/${constants.allIssueFileName}`;

if (basicAuthRegex.test(urlScanned)) {
urlScanned = `${urlScanned.split('://')[0]}://${urlScanned.split('@')[1]}`;
}
urlScanned = urlWithoutAuth(urlScanned);

const formatAboutStartTime = dateString => {
const utcStartTimeDate = new Date(dateString);
Expand Down
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@govtechsg/purple-hats",
"main": "npmIndex.js",
"version": "0.9.55",
"version": "0.9.56",
"type": "module",
"imports": {
"#root/*.js": "./*.js"
Expand Down
18 changes: 9 additions & 9 deletions playwrightAxeGenerator.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import constants, {
getExecutablePath,
removeQuarantineFlag,
} from '#root/constants/constants.js';
import { isSkippedUrl, submitForm, getBlackListedPatterns } from '#root/constants/common.js';
import { isSkippedUrl, submitForm, getBlackListedPatterns, urlWithoutAuth } from '#root/constants/common.js';
import { getDefaultChromeDataDir, getDefaultEdgeDataDir } from './constants/constants.js';

const __filename = fileURLToPath(import.meta.url);
Expand Down Expand Up @@ -69,7 +69,7 @@ const playwrightAxeGenerator = async data => {
import fs from 'fs';
import path from 'path';
import printMessage from 'print-message';
import { isSkippedUrl, submitForm, getBlackListedPatterns } from '#root/constants/common.js';
import { isSkippedUrl, submitForm, getBlackListedPatterns, urlWithoutAuth } from '#root/constants/common.js';
import { consoleLogger, silentLogger, guiInfoLog } from '#root/logs.js';
`;
Expand Down Expand Up @@ -195,8 +195,8 @@ const checkIfScanRequired = async page => {
if (!urlImageDictionary[pageUrl]) {
urlImageDictionary[pageUrl] = [imgPath];
consoleLogger.info(\`Process page at: \${page.url()} , Scan required? true\`);
silentLogger.info(\`Process page at: \${page.url()} , Scan required? true\`);
consoleLogger.info(\`Process page at: \${urlWithoutAuth(page.url())} , Scan required? true\`);
silentLogger.info(\`Process page at: \${urlWithoutAuth(page.url())} , Scan required? true\`);
return {
scanRequired: true,
Expand Down Expand Up @@ -231,8 +231,8 @@ const checkIfScanRequired = async page => {
urlImageDictionary[pageUrl].push(imgPath)
}
consoleLogger.info(\`Process page at: \${page.url()} , Scan required? \${!isSimilarPage}\`);
silentLogger.info(\`Process page at: \${page.url()} , Scan required? \${!isSimilarPage}\`);
consoleLogger.info(\`Process page at: \${urlWithoutAuth(page.url())} , Scan required? \${!isSimilarPage}\`);
silentLogger.info(\`Process page at: \${urlWithoutAuth(page.url())} , Scan required? \${!isSimilarPage}\`);
return {
scanRequired: !isSimilarPage,
Expand All @@ -250,7 +250,7 @@ const runAxeScan = async (includeScreenshots, page, customFlowDetails) => {
)}, customFlowDetails);
await dataset.pushData(result);
urlsCrawled.scanned.push({
url: page.url(),
url: urlWithoutAuth(page.url()),
pageTitle: result.pageTitle,
pageImagePath: customFlowDetails.pageImagePath
});
Expand All @@ -277,7 +277,7 @@ const processPage = async page => {
if (scanRequired) {
guiInfoLog(guiInfoStatusTypes.SCANNED, {
numScanned: urlsCrawled.scanned.length,
urlScanned: pageUrl,
urlScanned: urlWithoutAuth(pageUrl),
});
await runAxeScan(${includeScreenshots}, page, { pageIndex: urlsCrawled.scanned.length + 1, pageImagePath });
}
Expand Down Expand Up @@ -493,7 +493,7 @@ const waitForCaptcha = async (page, captchaLocator) => {
const generatedScript = `${customFlowScripts}/${generatedScriptName}`;

console.log(
` ℹ️ A new browser will be launched shortly.\n Navigate and record custom steps for ${data.url} in the new browser.\n Close the browser when you are done recording your steps.`,
` ℹ️ A new browser will be launched shortly.\n Navigate and record custom steps for ${urlWithoutAuth(data.url)} in the new browser.\n Close the browser when you are done recording your steps.`,
);

try {
Expand Down
2 changes: 1 addition & 1 deletion runCustomFlowFromGUI.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import constants, {
getDefaultEdgeDataDir,
guiInfoStatusTypes,
} from '#root/constants/constants.js';
import { isSkippedUrl, submitForm, getBlackListedPatterns } from '#root/constants/common.js';
import { isSkippedUrl, submitForm, getBlackListedPatterns, urlWithoutAuth } from '#root/constants/common.js';
import { consoleLogger, silentLogger, guiInfoLog } from './logs.js';

const generatedScript = argv[2];
Expand Down

0 comments on commit 2b00415

Please sign in to comment.