From 960b42da61c789cbf9461252c0a306987c609ca1 Mon Sep 17 00:00:00 2001 From: Jerome Paulos Date: Tue, 20 Jun 2023 14:05:02 -0700 Subject: [PATCH 1/4] Replace with modern URL API --- src/canvas-fingerprinting.ts | 6 ++---- src/collector.ts | 3 +-- src/session-recording.ts | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/canvas-fingerprinting.ts b/src/canvas-fingerprinting.ts index abc9f79..63dd8cd 100644 --- a/src/canvas-fingerprinting.ts +++ b/src/canvas-fingerprinting.ts @@ -1,13 +1,11 @@ import { BlacklightEvent, JsInstrumentEvent } from './types'; +import { getScriptUrl, serializeCanvasCallMap } from './utils'; /** * @fileOverview Utility functions for canvas finerprinting analysis. * Implemented following the Princeton study's methodology. */ -import { parse } from 'url'; -import { getScriptUrl, serializeCanvasCallMap } from './utils'; - const MIN_CANVAS_IMAGE_WIDTH = 16; const MIN_CANVAS_IMAGE_HEIGHT = 16; const MIN_FONT_LIST_SIZE = 50; @@ -68,7 +66,7 @@ export const sortCanvasCalls = (canvasCalls: BlacklightEvent[]) => { const cStyles = new Map() as CanvasCallMap; for (const item of canvasCalls) { const { url, data } = item as JsInstrumentEvent; - const url_host = parse(url).hostname; + const url_host = new URL(url).hostname; const script_url = getScriptUrl(item); const { symbol, operation, value } = data; if (typeof script_url === 'undefined' || script_url.indexOf('http:') < -1 || script_url.indexOf('https:') < -1) { diff --git a/src/collector.ts b/src/collector.ts index 2fd7cb5..d18b9a5 100644 --- a/src/collector.ts +++ b/src/collector.ts @@ -5,7 +5,6 @@ import { join } from 'path'; import puppeteer, { Browser, Page, PuppeteerLifeCycleEvent, KnownDevices, PuppeteerLaunchOptions } from 'puppeteer'; import PuppeteerHar from 'puppeteer-har'; import { getDomain, getSubdomain, parse } from 'tldts'; -import url from 'url'; import { captureBrowserCookies, clearCookiesCache, setupHttpCookieCapture } from './cookie-collector'; import { setupBlacklightInspector } from './inspector'; import { setupKeyLoggingInspector } from './key-logging'; @@ -63,7 +62,7 @@ export const collect = async (inUrl: string, args: CollectorOptions) => { uri_dest: null, uri_redirects: null, secure_connection: {}, - host: url.parse(inUrl).hostname, + host: new URL(inUrl).hostname, config: { cleareCache: args.clearCache, captureHar: args.captureHar, diff --git a/src/session-recording.ts b/src/session-recording.ts index 2676179..e94ba96 100644 --- a/src/session-recording.ts +++ b/src/session-recording.ts @@ -1,10 +1,9 @@ import { Page } from 'puppeteer'; -import url from 'url'; import { BlacklightEvent, SESSION_RECORDERS_LIST } from './types'; export const setupSessionRecordingInspector = async (page: Page, eventDataHandler: (event: BlacklightEvent) => void) => { page.on('request', async request => { - const parsedUrl = url.parse(request.url()); + const parsedUrl = new URL(request.url()); const cleanUrl = `${parsedUrl.hostname}${parsedUrl.pathname}`; const stack = [ { From b1716060220c8c192dcaeafec0916ff0e9b1a2ea Mon Sep 17 00:00:00 2001 From: Jerome Paulos Date: Tue, 20 Jun 2023 15:02:16 -0700 Subject: [PATCH 2/4] Add session recorder --- src/types.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/types.ts b/src/types.ts index edac6e7..0eaf129 100644 --- a/src/types.ts +++ b/src/types.ts @@ -85,7 +85,8 @@ export const SESSION_RECORDERS_LIST = [ 'salemove.com', 'd10lpsik1i8c69.cloudfront.net', 'luckyorange.com', - 'vwo.com' + 'vwo.com', + 'clarity.ms' ]; export const BEHAVIOUR_TRACKING_EVENTS = { KEYBOARD: ['keydown', 'keypress', 'keyup', 'input'], From 8f281fcd8e4306161eb578a6cba6c191b8a756d5 Mon Sep 17 00:00:00 2001 From: Jerome Paulos Date: Wed, 21 Jun 2023 10:12:03 -0700 Subject: [PATCH 3/4] Fix example.ts --- example.ts | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/example.ts b/example.ts index 9e74162..2237b4c 100644 --- a/example.ts +++ b/example.ts @@ -1,20 +1,19 @@ - -import { KnownDevices } from "puppeteer"; -import { CollectorOptions, collect } from "./src"; -import { join } from 'path'; +const { KnownDevices } = require('puppeteer'); +const { collect } = require('./build/index'); +const { join } = require('path'); (async () => { const URL = 'example.com'; const EMULATE_DEVICE = 'iPhone 13 Mini'; - const config: CollectorOptions = { + const config = { numPages: 3, headless: false, emulateDevice: KnownDevices[EMULATE_DEVICE], // Uncomment to run with desktop/laptop browser // emulateDevice: { // viewport: {height: 1440, width: 800}, - // userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36" + // userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' // }, outDir: join(__dirname, 'demo-dir'), }; From c62d50b993c75e905ccc8a42e275b2422e7de434 Mon Sep 17 00:00:00 2001 From: Jerome Paulos Date: Wed, 21 Jun 2023 11:45:38 -0700 Subject: [PATCH 4/4] Refactor key-logging.ts --- __tests__/key-logging.ts | 10 ++--- example.ts | 2 +- src/collector.ts | 4 +- src/key-logging.ts | 88 +++++++++++++++++++------------------ src/parser.ts | 13 ++---- src/pptr-utils/get-links.ts | 6 ++- src/types.ts | 42 ++++++++---------- src/utils.ts | 20 +++++---- 8 files changed, 93 insertions(+), 92 deletions(-) diff --git a/__tests__/key-logging.ts b/__tests__/key-logging.ts index 7597b87..8e0f9dc 100644 --- a/__tests__/key-logging.ts +++ b/__tests__/key-logging.ts @@ -2,8 +2,8 @@ import { defaultPuppeteerBrowserOptions } from "../src/pptr-utils/default"; import { fillForms } from "../src/pptr-utils/interaction-utils"; import puppeteer, { Browser } from "puppeteer"; -import { setupKeyLoggingInspector } from "../src/key-logging"; -import { Global } from "../src/types"; +import { setUpKeyLoggingInspector } from "../src/key-logging"; +import { Global, KeyLoggingEvent } from "../src/types"; declare var global: Global; @@ -695,12 +695,12 @@ describe("KeyLogging", () => { const page = await browser.newPage(); const testUrl = `${global.__DEV_SERVER__}/session_recorder.html`; // const testUrl = "https://www.veteransunited.com/"; - const rows = []; + const rows: KeyLoggingEvent[] = []; const eventHandler = event => { rows.push(event); }; - await setupKeyLoggingInspector(page, eventHandler); + await setUpKeyLoggingInspector(page, eventHandler); await page.goto(testUrl, { waitUntil: "networkidle2" }); await page.waitForTimeout(1000); await fillForms(page); @@ -777,7 +777,7 @@ describe("KeyLogging", () => { const eventHandler = event => { rows.push(event); }; - await setupKeyLoggingInspector(page, eventHandler); + await setUpKeyLoggingInspector(page, eventHandler); await page.goto(testUrl, { waitUntil: "networkidle2" }); await page.waitForTimeout(1000); await fillForms(page); diff --git a/example.ts b/example.ts index 2237b4c..f4cc4cf 100644 --- a/example.ts +++ b/example.ts @@ -3,7 +3,7 @@ const { collect } = require('./build/index'); const { join } = require('path'); (async () => { - const URL = 'example.com'; + const URL = 'digg.com'; const EMULATE_DEVICE = 'iPhone 13 Mini'; const config = { diff --git a/src/collector.ts b/src/collector.ts index d18b9a5..803b54d 100644 --- a/src/collector.ts +++ b/src/collector.ts @@ -7,7 +7,7 @@ import PuppeteerHar from 'puppeteer-har'; import { getDomain, getSubdomain, parse } from 'tldts'; import { captureBrowserCookies, clearCookiesCache, setupHttpCookieCapture } from './cookie-collector'; import { setupBlacklightInspector } from './inspector'; -import { setupKeyLoggingInspector } from './key-logging'; +import { setUpKeyLoggingInspector } from './key-logging'; import { getLogger } from './logger'; import { generateReport } from './parser'; import { defaultPuppeteerBrowserOptions, savePageContent } from './pptr-utils/default'; @@ -156,7 +156,7 @@ export const collect = async (inUrl: string, args: CollectorOptions) => { // Init blacklight instruments on page await setupBlacklightInspector(page, logger.warn); - await setupKeyLoggingInspector(page, logger.warn); + await setUpKeyLoggingInspector(page, logger.warn); await setupHttpCookieCapture(page, logger.warn); await setupSessionRecordingInspector(page, logger.warn); await setUpThirdPartyTrackersInspector(page, logger.warn, args.enableAdBlock); diff --git a/src/key-logging.ts b/src/key-logging.ts index 9abbc28..9870eef 100644 --- a/src/key-logging.ts +++ b/src/key-logging.ts @@ -1,62 +1,66 @@ -import { HTTPRequest, Page } from 'puppeteer'; +import { Page } from 'puppeteer'; import { DEFAULT_INPUT_VALUES } from './pptr-utils/interaction-utils'; -import { BlacklightEvent } from './types'; -import { getHashedValues } from './utils'; -const ts = [ - ...Object.values(DEFAULT_INPUT_VALUES), - ...Object.values(getHashedValues('base64', DEFAULT_INPUT_VALUES)), - ...Object.values(getHashedValues('md5', DEFAULT_INPUT_VALUES)), - ...Object.values(getHashedValues('sha256', DEFAULT_INPUT_VALUES)), - ...Object.values(getHashedValues('sha512', DEFAULT_INPUT_VALUES)) -]; +import { BlacklightErrorEvent, KeyLoggingEvent } from './types'; +import { getHashedArray } from './utils'; -const hashesMap = { - base64: Object.values(getHashedValues('base64', DEFAULT_INPUT_VALUES)), - md5: Object.values(getHashedValues('md5', DEFAULT_INPUT_VALUES)), - plaintext: Object.values(DEFAULT_INPUT_VALUES), - sha256: Object.values(getHashedValues('sha256', DEFAULT_INPUT_VALUES)), - sha512: Object.values(getHashedValues('sha512', DEFAULT_INPUT_VALUES)) +const INPUT_VALUES = Object.values(DEFAULT_INPUT_VALUES); + +const hashesMap: Record = { + plaintext: INPUT_VALUES, + base64: getHashedArray('base64', INPUT_VALUES), + md5: getHashedArray('md5', INPUT_VALUES), + sha256: getHashedArray('sha256', INPUT_VALUES), + sha512: getHashedArray('sha512', INPUT_VALUES) }; -export const setupKeyLoggingInspector = async (page: Page, eventDataHandler: (event: BlacklightEvent) => void) => { - page.on('request', (request: HTTPRequest) => { + +export async function setUpKeyLoggingInspector( + page: Page, + eventDataHandler: (event: KeyLoggingEvent|BlacklightErrorEvent) => void +) { + page.on('request', request => { const stack = [ { fileName: request.frame() ? request.frame().url() : '', - source: `RequestHandler` + source: 'RequestHandler' } ]; + if (request.method() === 'POST') { try { - let filter = []; - filter = ts.filter((t: string) => request.postData().indexOf(t) > -1); - if (filter.length > 0) { - let match_type = []; - filter.forEach(val => { - const m = Object.entries(hashesMap).filter(([, hashes]) => { - return hashes.indexOf(val) > -1; - }); - match_type = match_type.concat(m.map(e => e[0])); - }); - match_type = [...new Set(match_type)]; - eventDataHandler({ - data: { - filter, - match_type, - post_data: request.postData(), - post_request_url: request.url() - }, - stack, - type: `KeyLogging`, - url: request.frame().url() - }); + let matchTypes = new Set(); + let filter: string[] = []; + + for (const hashType in hashesMap) { + const hashedValues = hashesMap[hashType]; + + for (const value of hashedValues) { + if (request.postData()?.includes(value)) { + filter.push(value); + matchTypes.add(hashType); + break; + } + } } + + eventDataHandler({ + data: { + filter, + match_type: Array.from(matchTypes), + post_data: request.postData(), + post_request_url: request.url() + }, + stack, + type: 'KeyLogging', + url: request.frame().url() + }); } catch (error) { + console.error(error); eventDataHandler({ data: { message: JSON.stringify(error) }, stack, - type: `Error.KeyLogging`, + type: 'Error.KeyLogging', url: request.frame().url() }); } diff --git a/src/parser.ts b/src/parser.ts index 74a2957..1c6babc 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -2,15 +2,7 @@ import { getDomain } from 'tldts'; import { getCanvasFontFp, getCanvasFp } from './canvas-fingerprinting'; import { loadBrowserCookies, matchCookiesToEvents } from './cookie-collector'; import { FB_ADVANCED_MATCHING_PARAMETERS, FB_STANDARD_EVENTS } from './fb-pixel-lookup'; -import { - BEHAVIOUR_TRACKING_EVENTS, - BlacklightEvent, - FINGERPRINTABLE_WINDOW_APIS, - JsInstrumentEvent, - KeyLoggingEvent, - SessionRecordingEvent, - TrackingRequestEvent -} from './types'; +import { BEHAVIOUR_TRACKING_EVENTS, BlacklightEvent, FINGERPRINTABLE_WINDOW_APIS, JsInstrumentEvent, KeyLoggingEvent, SessionRecordingEvent, TrackingRequestEvent } from './types'; import { getScriptUrl, groupBy, loadJSONSafely } from './utils'; export const generateReport = (reportType, messages, dataDir, url) => { @@ -42,6 +34,7 @@ export const generateReport = (reportType, messages, dataDir, url) => { const filterByEvent = (messages, typePattern) => { return messages.filter(m => m.message.type.includes(typePattern) && !m.message.type.includes('Error')); }; + const getEventData = (reportType, messages): BlacklightEvent[] => { let filtered = []; switch (reportType) { @@ -78,6 +71,7 @@ const getEventData = (reportType, messages): BlacklightEvent[] => { } return filtered.map(m => m.message); }; + const reportSessionRecorders = (eventData: BlacklightEvent[]) => { const report = {}; eventData.forEach((event: SessionRecordingEvent) => { @@ -265,6 +259,7 @@ const reportFbPixelEvents = (eventData: BlacklightEvent[]) => { }; }); }; + const getDomainSafely = (message: KeyLoggingEvent) => { try { if (message.data.post_request_url) { diff --git a/src/pptr-utils/get-links.ts b/src/pptr-utils/get-links.ts index 2ad086a..6f17fe0 100644 --- a/src/pptr-utils/get-links.ts +++ b/src/pptr-utils/get-links.ts @@ -1,4 +1,8 @@ -import { LinkObject } from '../types'; +type LinkObject = { + href: string, + innerHtml: string, + innerText: string +} export const getLinks = async (page): Promise => { return page.evaluate(() => { diff --git a/src/types.ts b/src/types.ts index 0eaf129..993f272 100644 --- a/src/types.ts +++ b/src/types.ts @@ -2,21 +2,23 @@ export interface Global { __DEV_SERVER__: string; } -export type BlacklightEvent = JsInstrumentEvent | KeyLoggingEvent | BlacklightErrorEvent | TrackingRequestEvent | SessionRecordingEvent; - -export interface KeyLoggingEvent { - type: 'KeyLogging'; +export interface BlacklightEvent { + type: string; url: string; stack: any[]; +} + +export interface KeyLoggingEvent extends BlacklightEvent { + type: 'KeyLogging'; data: { + filter: string[], post_request_url: string; post_data: string; match_type: string[]; - filter: string[]; }; } -export interface JsInstrumentEvent { +export interface JsInstrumentEvent extends BlacklightEvent { type: | 'JsInstrument' | 'JsInstrument.Debug' @@ -24,8 +26,6 @@ export interface JsInstrumentEvent { | 'JsInstrument.Function' | 'JsInstrument.FunctionProxy' | 'JsInstrument.ObjectProperty'; - url: string; - stack: any[]; data: { symbol: string; value: string; @@ -35,22 +35,22 @@ export interface JsInstrumentEvent { }; } -export interface SessionRecordingEvent { +export interface SessionRecordingEvent extends BlacklightEvent { type: 'SessionRecording'; - url: string; matches: string[]; - stack: any[]; } -export interface TrackingRequestEvent { + +export interface TrackingRequestEvent extends BlacklightEvent { type: 'TrackingRequest'; - url: string; - stack: any[]; - data: { query?: any; filter: string; listName: string }; + data: { + query?: any; + filter: string; + listName: string; + }; } -export interface BlacklightErrorEvent { + +export interface BlacklightErrorEvent extends BlacklightEvent { type: 'Error' | 'Error.BlacklightInspector' | 'Error.KeyLogging' | 'Error.JsInstrument'; - url: string; - stack: any[]; data: { message: any; objectName?: string; @@ -59,11 +59,6 @@ export interface BlacklightErrorEvent { }; } -export interface LinkObject { - href: string; - innerHtml: string; - innerText: string; -} export const SESSION_RECORDERS_LIST = [ 'mc.yandex.ru/metrika/watch.js', 'mc.yandex.ru/metrika/tag.js', @@ -88,6 +83,7 @@ export const SESSION_RECORDERS_LIST = [ 'vwo.com', 'clarity.ms' ]; + export const BEHAVIOUR_TRACKING_EVENTS = { KEYBOARD: ['keydown', 'keypress', 'keyup', 'input'], MOUSE: ['click', 'mousedown', 'mouseup', 'mousemove', 'select', 'dblclick', 'scroll'], diff --git a/src/utils.ts b/src/utils.ts index 86e050c..f3e7764 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -3,6 +3,7 @@ import fs from 'fs'; import { join } from 'path'; import { getDomain, getPublicSuffix } from 'tldts'; import { BlacklightEvent } from './types'; + export const getFirstPartyPs = firstPartyUri => { return getPublicSuffix(firstPartyUri); }; @@ -116,6 +117,7 @@ export const getStackType = (stack, firstPartyDomain) => { return 'mixed'; } }; + export const isBase64 = str => { if (str === '' || str.trim() === '') { return false; @@ -127,12 +129,12 @@ export const isBase64 = str => { } }; -export const getStringHash = (algorithm, str) => { - return crypto.createHash(algorithm).update(str).digest('hex'); -}; -export const getHashedValues = (algorithm, object) => { - return Object.entries(object).reduce((acc, cur: any) => { - acc[cur[0]] = algorithm === 'base64' ? Buffer.from(cur[1]).toString('base64') : getStringHash(algorithm, cur[1]); - return acc; - }, {}); -}; +export function getHashedArray(algorithm: string, array: string[]): string[] { + return array.map(element => { + if(algorithm === 'base64') { + return btoa(element); + } + + return crypto.createHash(algorithm).update(element).digest('hex'); + }); +} \ No newline at end of file