From 5b9e04bad677d57c5d0895770e0c79894ed22abe Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Tue, 21 Nov 2023 17:33:16 +0000 Subject: [PATCH] feat: payload capture - move timing into copied plugin (#902) As discussed https://posthog.slack.com/archives/C03PB072FMJ/p1700077249746539 first draft (while looking after poorly #4) of removing our own timings plugin in favour of (a copy of) the rrweb plugin --- .../extensions/replay/config.test.ts | 153 ++++++++++- .../extensions/replay/web-performance.test.ts | 161 ----------- src/__tests__/posthog-core.js | 5 + src/decide.ts | 1 - src/extensions/replay/config.ts | 108 +++++++- src/extensions/replay/sessionrecording.ts | 27 +- src/extensions/replay/web-performance.ts | 260 ------------------ src/loader-recorder-v2.ts | 178 +++++++----- src/posthog-core.ts | 19 +- src/types.ts | 40 ++- src/utils/event-utils.ts | 6 +- src/utils/request-utils.ts | 17 ++ src/utils/type-utils.ts | 9 +- 13 files changed, 454 insertions(+), 530 deletions(-) delete mode 100644 src/__tests__/extensions/replay/web-performance.test.ts delete mode 100644 src/extensions/replay/web-performance.ts diff --git a/src/__tests__/extensions/replay/config.test.ts b/src/__tests__/extensions/replay/config.test.ts index 77ced9b8e..fad3dc856 100644 --- a/src/__tests__/extensions/replay/config.test.ts +++ b/src/__tests__/extensions/replay/config.test.ts @@ -31,29 +31,35 @@ describe('config', () => { it('should remove the Authorization header from requests even if no other config is set', () => { const networkOptions = buildNetworkRequestOptions(defaultConfig(), {}) const cleaned = networkOptions.maskRequestFn!({ - url: 'something', + name: 'something', requestHeaders: { Authorization: 'Bearer 123', 'content-type': 'application/json', }, }) - expect(cleaned?.requestHeaders).toEqual({ - 'content-type': 'application/json', + expect(cleaned).toEqual({ + name: 'something', + requestHeaders: { + 'content-type': 'application/json', + }, }) }) it('should cope with no headers when even if no other config is set', () => { const networkOptions = buildNetworkRequestOptions(defaultConfig(), {}) const cleaned = networkOptions.maskRequestFn!({ - url: 'something', + name: 'something', + requestHeaders: undefined, + }) + expect(cleaned).toEqual({ + name: 'something', requestHeaders: undefined, }) - expect(cleaned?.requestHeaders).toBeUndefined() }) it('should remove the Authorization header from requests even when a mask request fn is set', () => { const posthogConfig = defaultConfig() - posthogConfig.session_recording.maskNetworkRequestFn = (data) => { + posthogConfig.session_recording.maskCapturedNetworkRequestFn = (data) => { return { ...data, requestHeaders: { @@ -65,28 +71,151 @@ describe('config', () => { const networkOptions = buildNetworkRequestOptions(posthogConfig, {}) const cleaned = networkOptions.maskRequestFn!({ - url: 'something', + name: 'something', requestHeaders: { Authorization: 'Bearer 123', 'content-type': 'application/json', }, }) - expect(cleaned?.requestHeaders).toEqual({ - 'content-type': 'edited', + expect(cleaned).toEqual({ + name: 'something', + requestHeaders: { + 'content-type': 'edited', + }, + }) + }) + + it('uses the deprecated mask fn when set', () => { + const posthogConfig = defaultConfig() + posthogConfig.session_recording.maskNetworkRequestFn = (data) => { + return { + ...data, + url: 'edited', // deprecated fn only edits the url + } + } + const networkOptions = buildNetworkRequestOptions(posthogConfig, {}) + + const cleaned = networkOptions.maskRequestFn!({ + name: 'something', + requestHeaders: { + Authorization: 'Bearer 123', + 'content-type': 'application/json', + }, + }) + expect(cleaned).toEqual({ + name: 'edited', + requestHeaders: { + 'content-type': 'application/json', + }, }) }) it('case insensitively removes headers on the deny list', () => { const networkOptions = buildNetworkRequestOptions(defaultConfig(), {}) const cleaned = networkOptions.maskRequestFn!({ - url: 'something', + name: 'something', requestHeaders: { AuThOrIzAtIoN: 'Bearer 123', 'content-type': 'application/json', }, }) - expect(cleaned?.requestHeaders).toEqual({ - 'content-type': 'application/json', + expect(cleaned).toEqual({ + name: 'something', + requestHeaders: { + 'content-type': 'application/json', + }, + }) + }) + + it.each([ + [ + { + name: 'https://app.posthog.com/api/feature_flag/', + }, + { + name: 'https://app.posthog.com/api/feature_flag/', + }, + ], + [ + { + name: 'https://app.posthog.com/s/', + }, + undefined, + ], + [ + { + name: 'https://app.posthog.com/e/', + }, + undefined, + ], + [ + { + name: 'https://app.posthog.com/i/vo/e/', + }, + undefined, + ], + ])('ignores ingestion paths', (capturedRequest, expected) => { + const networkOptions = buildNetworkRequestOptions(defaultConfig(), {}) + const x = networkOptions.maskRequestFn!(capturedRequest) + expect(x).toEqual(expected) + }) + + it('redacts large request body', () => { + const networkOptions = buildNetworkRequestOptions(defaultConfig(), {}) + const cleaned = networkOptions.maskRequestFn!({ + name: 'something', + requestHeaders: { + 'content-type': 'application/json', + 'content-length': '1000001', + }, + requestBody: 'something very large', + }) + expect(cleaned).toEqual({ + name: 'something', + requestHeaders: { + 'content-type': 'application/json', + 'content-length': '1000001', + }, + requestBody: 'Request body too large to record', + }) + }) + + it('redacts large response body', () => { + const networkOptions = buildNetworkRequestOptions(defaultConfig(), {}) + const cleaned = networkOptions.maskRequestFn!({ + name: 'something', + responseHeaders: { + 'content-type': 'application/json', + 'content-length': '1000001', + }, + responseBody: 'something very large', + }) + expect(cleaned).toEqual({ + name: 'something', + responseHeaders: { + 'content-type': 'application/json', + 'content-length': '1000001', + }, + responseBody: 'Response body too large to record', + }) + }) + + it('cannot redact when there is no content length header', () => { + const networkOptions = buildNetworkRequestOptions(defaultConfig(), {}) + const largeString = 'a'.repeat(1000001) + const cleaned = networkOptions.maskRequestFn!({ + name: 'something', + requestHeaders: { + 'content-type': 'application/json', + }, + requestBody: largeString, + }) + expect(cleaned).toEqual({ + name: 'something', + requestHeaders: { + 'content-type': 'application/json', + }, + requestBody: largeString, }) }) }) diff --git a/src/__tests__/extensions/replay/web-performance.test.ts b/src/__tests__/extensions/replay/web-performance.test.ts deleted file mode 100644 index 630e354fb..000000000 --- a/src/__tests__/extensions/replay/web-performance.test.ts +++ /dev/null @@ -1,161 +0,0 @@ -/// - -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -/* eslint-disable compat/compat */ - -import { WebPerformanceObserver } from '../../../extensions/replay/web-performance' -import { PostHog } from '../../../posthog-core' -import { NetworkRequest, PostHogConfig } from '../../../types' - -const createMockPerformanceEntry = (overrides: Partial = {}): PerformanceEntry => { - const entry = { - name: 'http://example.com/api/1', - duration: 100, - entryType: 'fetch', - startTime: Date.now() - 1000, - ...overrides, - toJSON: () => { - return { - ...entry, - toJSON: undefined, - } - }, - } - - return entry -} - -describe('WebPerformance', () => { - let webPerformance: WebPerformanceObserver - let mockPostHogInstance: any - const mockConfig: Partial = { - api_host: 'https://app.posthog.com', - session_recording: { - maskNetworkRequestFn: (networkRequest: NetworkRequest) => networkRequest, - }, - } - - beforeEach(() => { - mockPostHogInstance = { - config: mockConfig, - sessionRecording: { - onRRwebEmit: jest.fn(), - }, - } - webPerformance = new WebPerformanceObserver(mockPostHogInstance as PostHog) - jest.clearAllMocks() - jest.useFakeTimers() - jest.setSystemTime(new Date('2023-01-01')) - performance.now = jest.fn(() => Date.now()) - }) - - describe('when the browser does not support performance observer', () => { - const OriginalPerformanceObserver = window.PerformanceObserver - - beforeAll(() => { - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore - window.PerformanceObserver = undefined - }) - - afterAll(() => { - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore - window.PerformanceObserver = OriginalPerformanceObserver - }) - - it('should not start the observer', () => { - const webPerformance = new WebPerformanceObserver(mockPostHogInstance as PostHog) - webPerformance.startObserving() - expect(webPerformance.isObserving()).toBe(false) - }) - }) - - describe('_capturePerformanceEvent', () => { - it('should capture and save a standard perf event', () => { - webPerformance._capturePerformanceEvent( - createMockPerformanceEntry({ - name: 'http://example.com/api/1', - }) - ) - - expect(mockPostHogInstance.sessionRecording.onRRwebEmit).toHaveBeenCalledTimes(1) - expect(mockPostHogInstance.sessionRecording.onRRwebEmit).toHaveBeenCalledWith({ - data: { - payload: { - '0': 'fetch', - '1': 0, - '2': 'http://example.com/api/1', - '3': 1672531199000, - '39': 100, - '40': 1672531199000, - }, - plugin: 'posthog/network@1', - }, - timestamp: 1672531199000, - type: 6, - }) - }) - - it('should ignore posthog network events', () => { - webPerformance._capturePerformanceEvent( - createMockPerformanceEntry({ - name: 'https://app.posthog.com/s/', - }) - ) - - expect(mockPostHogInstance.sessionRecording.onRRwebEmit).toHaveBeenCalledTimes(0) - }) - - it('should ignore events with maskNetworkRequestFn returning null', () => { - mockConfig.session_recording!.maskNetworkRequestFn = (event) => { - if (event.url.includes('ignore')) { - return null - } - return event - } - ;[ - 'https://example.com/ignore/', - 'https://example.com/capture/', - 'https://ignore.example.com/capture/', - ].forEach((url) => { - webPerformance._capturePerformanceEvent( - createMockPerformanceEntry({ - name: url, - }) - ) - }) - expect(mockPostHogInstance.sessionRecording.onRRwebEmit).toHaveBeenCalledTimes(1) - }) - - it('should allow modifying of the content via maskNetworkRequestFn', () => { - mockConfig.session_recording!.maskNetworkRequestFn = (event) => { - event.url = event.url.replace('example', 'replaced') - return event - } - - webPerformance._capturePerformanceEvent( - createMockPerformanceEntry({ - name: 'https://example.com/capture/', - }) - ) - - expect(mockPostHogInstance.sessionRecording.onRRwebEmit).toHaveBeenCalledTimes(1) - expect(mockPostHogInstance.sessionRecording.onRRwebEmit).toHaveBeenCalledWith({ - data: { - payload: { - '0': 'fetch', - '1': 0, - '2': 'https://replaced.com/capture/', - '3': 1672531199000, - '39': 100, - '40': 1672531199000, - }, - plugin: 'posthog/network@1', - }, - timestamp: 1672531199000, - type: 6, - }) - }) - }) -}) diff --git a/src/__tests__/posthog-core.js b/src/__tests__/posthog-core.js index 99f977506..f9262e0ab 100644 --- a/src/__tests__/posthog-core.js +++ b/src/__tests__/posthog-core.js @@ -1124,4 +1124,9 @@ describe('posthog core', () => { ) }) }) + + test('deprecated web performance observer still exposes _forceAllowLocalhost', () => { + expect(given.lib.webPerformance._forceAllowLocalhost).toBe(false) + expect(() => given.lib.webPerformance._forceAllowLocalhost).not.toThrow() + }) }) diff --git a/src/decide.ts b/src/decide.ts index e8b05ae21..f26e93712 100644 --- a/src/decide.ts +++ b/src/decide.ts @@ -62,7 +62,6 @@ export class Decide { this.instance.toolbar.afterDecideResponse(response) this.instance.sessionRecording?.afterDecideResponse(response) autocapture.afterDecideResponse(response, this.instance) - this.instance.webPerformance?.afterDecideResponse(response) this.instance._afterDecideResponse(response) if (!this.instance.config.advanced_disable_feature_flags_on_first_load) { diff --git a/src/extensions/replay/config.ts b/src/extensions/replay/config.ts index d0e573130..554a028dd 100644 --- a/src/extensions/replay/config.ts +++ b/src/extensions/replay/config.ts @@ -1,5 +1,7 @@ -import { NetworkRecordOptions, NetworkRequest, PostHogConfig } from '../../types' +import { CapturedNetworkRequest, NetworkRecordOptions, PostHogConfig, Body } from '../../types' import { _isFunction } from '../../utils/type-utils' +import { convertToURL } from '../../utils/request-utils' +import { logger } from '../../utils/logger' export const defaultNetworkOptions: NetworkRecordOptions = { initiatorTypes: [ @@ -25,10 +27,21 @@ export const defaultNetworkOptions: NetworkRecordOptions = { 'video', 'xmlhttprequest', ], - maskRequestFn: (data: NetworkRequest) => data, + maskRequestFn: (data: CapturedNetworkRequest) => data, recordHeaders: false, recordBody: false, recordInitialRequests: false, + recordPerformance: false, + performanceEntryTypeToObserve: [ + // 'event', // This is too noisy as it covers all browser events + 'first-input', + // 'mark', // Mark is used too liberally. We would need to filter for specific marks + // 'measure', // Measure is used too liberally. We would need to filter for specific measures + 'navigation', + 'paint', + 'resource', + ], + payloadSizeLimitBytes: 1000000, } const HEADER_DENYLIST = [ @@ -47,37 +60,112 @@ const HEADER_DENYLIST = [ 'x-xsrf-token', ] -const removeAuthorizationHeader = (data: NetworkRequest): NetworkRequest => { +// we always remove headers on the deny list because we never want to capture this sensitive data +const removeAuthorizationHeader = (data: CapturedNetworkRequest): CapturedNetworkRequest => { Object.keys(data.requestHeaders ?? {}).forEach((header) => { if (HEADER_DENYLIST.includes(header.toLowerCase())) delete data.requestHeaders?.[header] }) return data } +const POSTHOG_PATHS_TO_IGNORE = ['/s/', '/e/', '/i/vo/e/'] +// want to ignore posthog paths when capturing requests, or we can get trapped in a loop +// because calls to PostHog would be reported using a call to PostHog which would be reported.... +const ignorePostHogPaths = (data: CapturedNetworkRequest): CapturedNetworkRequest | undefined => { + const url = convertToURL(data.name) + if (url && url.pathname && POSTHOG_PATHS_TO_IGNORE.includes(url.pathname)) { + return undefined + } + return data +} + +function redactPayload( + payload: Body, + headers: Record | undefined, + limit: number, + description: string +): Body { + const requestContentLength = headers?.['content-length'] + // in the interests of bundle size and the complexity of estimating payload size + // we only check the content-length header if it's present + // this might mean we can't always limit the payload, but that's better than + // having lots of code shipped to every browser that will rarely run + if (requestContentLength && parseInt(requestContentLength) > limit) { + return `${description} body too large to record` + } + return payload +} + +// people can have arbitrarily large payloads on their site, but we don't want to ingest them +const limitPayloadSize = ( + options: NetworkRecordOptions +): ((data: CapturedNetworkRequest | undefined) => CapturedNetworkRequest | undefined) => { + // the smallest of 1MB or the specified limit if there is one + const limit = Math.min(1000000, options.payloadSizeLimitBytes ?? 1000000) + + return (data) => { + if (data?.requestBody) { + data.requestBody = redactPayload(data.requestBody, data.requestHeaders, limit, 'Request') + } + + if (data?.responseBody) { + data.responseBody = redactPayload(data.responseBody, data.responseHeaders, limit, 'Response') + } + + return data + } +} + /** * whether a maskRequestFn is provided or not, - * we ensure that we remove the Authorization header from requests + * we ensure that we remove the denied header from requests * we _never_ want to record that header by accident * if someone complains then we'll add an opt-in to let them override it */ export const buildNetworkRequestOptions = ( instanceConfig: PostHogConfig, - remoteNetworkOptions: Pick + remoteNetworkOptions: Pick ): NetworkRecordOptions => { const config = instanceConfig.session_recording as NetworkRecordOptions // client can always disable despite remote options const canRecordHeaders = config.recordHeaders === false ? false : remoteNetworkOptions.recordHeaders const canRecordBody = config.recordBody === false ? false : remoteNetworkOptions.recordBody + const canRecordPerformance = config.recordPerformance === false ? false : remoteNetworkOptions.recordPerformance + + const payloadLimiter = limitPayloadSize(config) + + const enforcedCleaningFn: NetworkRecordOptions['maskRequestFn'] = (d: CapturedNetworkRequest) => + payloadLimiter(ignorePostHogPaths(removeAuthorizationHeader(d))) + + const hasDeprecatedMaskFunction = _isFunction(instanceConfig.session_recording.maskNetworkRequestFn) + + if (hasDeprecatedMaskFunction && _isFunction(instanceConfig.session_recording.maskCapturedNetworkRequestFn)) { + logger.warn( + 'Both `maskNetworkRequestFn` and `maskCapturedNetworkRequestFn` are defined. `maskNetworkRequestFn` will be ignored.' + ) + } + + if (hasDeprecatedMaskFunction) { + instanceConfig.session_recording.maskCapturedNetworkRequestFn = (data: CapturedNetworkRequest) => { + const cleanedURL = instanceConfig.session_recording.maskNetworkRequestFn!({ url: data.name }) + return { + ...data, + name: cleanedURL?.url, + } as CapturedNetworkRequest + } + } - config.maskRequestFn = _isFunction(instanceConfig.session_recording.maskNetworkRequestFn) + config.maskRequestFn = _isFunction(instanceConfig.session_recording.maskCapturedNetworkRequestFn) ? (data) => { - const cleanedRequest = removeAuthorizationHeader(data) - return instanceConfig.session_recording.maskNetworkRequestFn?.(cleanedRequest) ?? undefined + const cleanedRequest = enforcedCleaningFn(data) + return cleanedRequest + ? instanceConfig.session_recording.maskCapturedNetworkRequestFn?.(cleanedRequest) ?? undefined + : undefined } : undefined if (!config.maskRequestFn) { - config.maskRequestFn = removeAuthorizationHeader + config.maskRequestFn = enforcedCleaningFn } return { @@ -85,5 +173,7 @@ export const buildNetworkRequestOptions = ( ...config, recordHeaders: canRecordHeaders, recordBody: canRecordBody, + recordPerformance: canRecordPerformance, + recordInitialRequests: canRecordPerformance, } } diff --git a/src/extensions/replay/sessionrecording.ts b/src/extensions/replay/sessionrecording.ts index 9f4fd8cfd..5718cb1fa 100644 --- a/src/extensions/replay/sessionrecording.ts +++ b/src/extensions/replay/sessionrecording.ts @@ -24,6 +24,7 @@ import { _isBoolean, _isFunction, _isNull, _isNumber, _isObject, _isString, _isU import { logger } from '../../utils/logger' import { assignableWindow, window } from '../../utils/globals' import { buildNetworkRequestOptions } from './config' +import { isLocalhost } from '../../utils/request-utils' const BASE_ENDPOINT = '/s/' @@ -102,6 +103,9 @@ export class SessionRecording { private _sampleRate: number | null = null private _minimumDuration: number | null = null + // Util to help developers working on this feature manually override + _forceAllowLocalhostNetworkCapture = false + public get started(): boolean { // TODO could we use status instead of _captureStarted? return this._captureStarted @@ -148,7 +152,11 @@ export class SessionRecording { return recordingVersion_client_side || recordingVersion_server_side || 'v1' } - private get networkPayloadCapture(): Pick | undefined { + // network payload capture config has three parts + // each can be configured server side or client side + private get networkPayloadCapture(): + | Pick + | undefined { const networkPayloadCapture_server_side = this.instance.get_property(SESSION_RECORDING_NETWORK_PAYLOAD_CAPTURE) const networkPayloadCapture_client_side = { recordHeaders: this.instance.config.session_recording?.recordHeaders, @@ -158,7 +166,12 @@ export class SessionRecording { networkPayloadCapture_client_side?.recordHeaders || networkPayloadCapture_server_side?.recordHeaders const bodyEnabled = networkPayloadCapture_client_side?.recordBody || networkPayloadCapture_server_side?.recordBody - return headersEnabled || bodyEnabled ? { recordHeaders: headersEnabled, recordBody: bodyEnabled } : undefined + const performanceEnabled = + this.instance.config.capture_performance || networkPayloadCapture_server_side?.capturePerformance + + return headersEnabled || bodyEnabled || performanceEnabled + ? { recordHeaders: headersEnabled, recordBody: bodyEnabled, recordPerformance: performanceEnabled } + : undefined } /** @@ -265,7 +278,10 @@ export class SessionRecording { [SESSION_RECORDING_ENABLED_SERVER_SIDE]: !!response['sessionRecording'], [CONSOLE_LOG_RECORDING_ENABLED_SERVER_SIDE]: response.sessionRecording?.consoleLogRecordingEnabled, [SESSION_RECORDING_RECORDER_VERSION_SERVER_SIDE]: response.sessionRecording?.recorderVersion, - [SESSION_RECORDING_NETWORK_PAYLOAD_CAPTURE]: response.sessionRecording?.networkPayloadCapture, + [SESSION_RECORDING_NETWORK_PAYLOAD_CAPTURE]: { + capturePerformance: response.capturePerformance, + ...response.sessionRecording?.networkPayloadCapture, + }, }) } @@ -485,6 +501,11 @@ export class SessionRecording { plugins.push(assignableWindow.rrwebConsoleRecord.getRecordConsolePlugin()) } if (this.networkPayloadCapture && _isFunction(assignableWindow.getRecordNetworkPlugin)) { + if (isLocalhost() && !this._forceAllowLocalhostNetworkCapture) { + logger.info('[SessionReplay-NetworkCapture] not started because we are on localhost.') + return + } + plugins.push( assignableWindow.getRecordNetworkPlugin( buildNetworkRequestOptions(this.instance.config, this.networkPayloadCapture) diff --git a/src/extensions/replay/web-performance.ts b/src/extensions/replay/web-performance.ts deleted file mode 100644 index c77a07b62..000000000 --- a/src/extensions/replay/web-performance.ts +++ /dev/null @@ -1,260 +0,0 @@ -import { PostHog } from '../../posthog-core' -import { DecideResponse, NetworkRequest } from '../../types' -import { isLocalhost } from '../../utils/request-utils' - -import { _isUndefined } from '../../utils/type-utils' -import { logger } from '../../utils/logger' -import { window } from '../../utils/globals' - -const PERFORMANCE_EVENTS_MAPPING: { [key: string]: number } = { - // BASE_PERFORMANCE_EVENT_COLUMNS - entryType: 0, - timeOrigin: 1, - name: 2, - - // RESOURCE_EVENT_COLUMNS - startTime: 3, - redirectStart: 4, - redirectEnd: 5, - workerStart: 6, - fetchStart: 7, - domainLookupStart: 8, - domainLookupEnd: 9, - connectStart: 10, - secureConnectionStart: 11, - connectEnd: 12, - requestStart: 13, - responseStart: 14, - responseEnd: 15, - decodedBodySize: 16, - encodedBodySize: 17, - initiatorType: 18, - nextHopProtocol: 19, - renderBlockingStatus: 20, - responseStatus: 21, - transferSize: 22, - - // LARGEST_CONTENTFUL_PAINT_EVENT_COLUMNS - element: 23, - renderTime: 24, - loadTime: 25, - size: 26, - id: 27, - url: 28, - - // NAVIGATION_EVENT_COLUMNS - domComplete: 29, - domContentLoadedEvent: 30, - domInteractive: 31, - loadEventEnd: 32, - loadEventStart: 33, - redirectCount: 34, - navigationType: 35, - unloadEventEnd: 36, - unloadEventStart: 37, - - // Added after v1 - duration: 39, - timestamp: 40, - - // NOTE: CURRENTLY UNSUPPORTED - // EVENT_TIMING_EVENT_COLUMNS - // processingStart: null, - // processingEnd: null, - - // MARK_AND_MEASURE_EVENT_COLUMNS - // detail: null, -} - -const ENTRY_TYPES_TO_OBSERVE = [ - // 'event', // This is too noisy as it covers all browser events - 'first-input', - // 'mark', // Mark is used too liberally. We would need to filter for specific marks - // 'measure', // Measure is used too liberally. We would need to filter for specific measures - 'navigation', - 'paint', - 'resource', -] - -const PERFORMANCE_INGESTION_ENDPOINT = '/e/' -// Don't monitor posthog paths because then events cause performance events which are events and the snake eats its tail 😱 -const POSTHOG_PATHS_TO_IGNORE = ['/s/', PERFORMANCE_INGESTION_ENDPOINT] - -export class WebPerformanceObserver { - instance: PostHog - remoteEnabled: boolean | undefined - observer: PerformanceObserver | undefined - - // Util to help developers working on this feature manually override - _forceAllowLocalhost = false - - constructor(instance: PostHog) { - this.instance = instance - } - - startObservingIfEnabled() { - if (this.isEnabled()) { - this.startObserving() - } else { - this.stopObserving() - } - } - - startObserving() { - if (this.observer) { - return - } - - if (_isUndefined(window?.PerformanceObserver?.supportedEntryTypes)) { - logger.info( - '[PerformanceObserver] not started because PerformanceObserver is not supported by this browser.' - ) - return - } - - if (isLocalhost() && !this._forceAllowLocalhost) { - logger.info('[PerformanceObserver] not started because we are on localhost.') - return - } - - try { - // compat checked above with early return - // eslint-disable-next-line compat/compat - this.observer = new PerformanceObserver((list) => { - list.getEntries().forEach((entry) => { - this._capturePerformanceEvent(entry) - }) - }) - - // compat checked above with early return - // eslint-disable-next-line compat/compat - const entryTypes = PerformanceObserver.supportedEntryTypes.filter((x) => ENTRY_TYPES_TO_OBSERVE.includes(x)) - - entryTypes.forEach((entryType) => { - this.observer?.observe({ type: entryType, buffered: true }) - }) - } catch (e) { - logger.error('PostHog failed to start performance observer', e) - this.stopObserving() - } - } - - stopObserving() { - if (this.observer) { - this.observer.disconnect() - this.observer = undefined - } - } - - isObserving() { - return !!this.observer - } - - isEnabled() { - return this.instance.config.capture_performance ?? this.remoteEnabled ?? false - } - - afterDecideResponse(response: DecideResponse) { - this.remoteEnabled = response.capturePerformance || false - if (this.isEnabled()) { - this.startObserving() - } - } - - _capturePerformanceEvent(event: PerformanceEntry) { - // NOTE: We don't want to capture our own request events. - - if (event.name.indexOf(this.instance.config.api_host) === 0) { - const path = event.name.replace(this.instance.config.api_host, '') - - if (POSTHOG_PATHS_TO_IGNORE.find((x) => path.indexOf(x) === 0)) { - return - } - } - - // NOTE: This is minimal atm but will include more options when we move to the - // built-in rrweb network recorder - let networkRequest: NetworkRequest | null | undefined = { - url: event.name, - } - - const userSessionRecordingOptions = this.instance.config.session_recording - - if (userSessionRecordingOptions.maskNetworkRequestFn) { - networkRequest = userSessionRecordingOptions.maskNetworkRequestFn(networkRequest) - } - - if (!networkRequest) { - return - } - - const eventJson = event.toJSON() - eventJson.name = networkRequest.url - const properties: { [key: number]: any } = {} - // kudos to sentry javascript sdk for excellent background on why to use Date.now() here - // https://github.com/getsentry/sentry-javascript/blob/e856e40b6e71a73252e788cd42b5260f81c9c88e/packages/utils/src/time.ts#L70 - // can't start observer if performance.now() is not available - // eslint-disable-next-line compat/compat - const timeOrigin = Math.floor(Date.now() - performance.now()) - properties[PERFORMANCE_EVENTS_MAPPING['timeOrigin']] = timeOrigin - // clickhouse can't ingest timestamps that are floats - // (in this case representing fractions of a millisecond we don't care about anyway) - properties[PERFORMANCE_EVENTS_MAPPING['timestamp']] = Math.floor(timeOrigin + event.startTime) - for (const key in PERFORMANCE_EVENTS_MAPPING) { - if (!_isUndefined(eventJson[key])) { - properties[PERFORMANCE_EVENTS_MAPPING[key]] = eventJson[key] - } - } - - this.capturePerformanceEvent(properties) - - if (exposesServerTiming(event)) { - for (const timing of event.serverTiming || []) { - this.capturePerformanceEvent({ - [PERFORMANCE_EVENTS_MAPPING['timeOrigin']]: timeOrigin, - [PERFORMANCE_EVENTS_MAPPING['timestamp']]: Math.floor(timeOrigin + event.startTime), - [PERFORMANCE_EVENTS_MAPPING['name']]: timing.name, - [PERFORMANCE_EVENTS_MAPPING['duration']]: timing.duration, - // the spec has a closed list of possible types - // https://developer.mozilla.org/en-US/docs/Web/API/PerformanceEntry/entryType - // but, we need to know this was a server timing so that we know to - // match it to the appropriate navigation or resource timing - // that matching will have to be on timestamp and $current_url - [PERFORMANCE_EVENTS_MAPPING['entryType']]: 'serverTiming', - }) - } - } - } - - /** - * :TRICKY: Make sure we batch these requests, and don't truncate the strings. - */ - private capturePerformanceEvent(properties: { [key: number]: any }) { - const timestamp = properties[PERFORMANCE_EVENTS_MAPPING['timestamp']] - - this.instance.sessionRecording?.onRRwebEmit({ - type: 6, // EventType.Plugin, - data: { - plugin: 'posthog/network@1', - payload: properties, - }, - timestamp, - }) - - // this.instance.capture('$performance_event', properties, { - // transport: 'XHR', - // method: 'POST', - // endpoint: PERFORMANCE_INGESTION_ENDPOINT, - // _noTruncate: true, - // _batchKey: 'performanceEvent', - // }) - } -} - -/** - * Check if this PerformanceEntry is either a PerformanceResourceTiming or a PerformanceNavigationTiming - * NB PerformanceNavigationTiming extends PerformanceResourceTiming - * Here we don't care which interface it implements as both expose `serverTimings` - */ -const exposesServerTiming = (event: PerformanceEntry): event is PerformanceResourceTiming => - event.entryType === 'navigation' || event.entryType === 'resource' diff --git a/src/loader-recorder-v2.ts b/src/loader-recorder-v2.ts index 4d99c3c40..9de94dcb6 100644 --- a/src/loader-recorder-v2.ts +++ b/src/loader-recorder-v2.ts @@ -11,23 +11,20 @@ import { getRecordConsolePlugin } from 'rrweb/es/rrweb/packages/rrweb/src/plugin // rrweb/network@1 code starts // most of what is below here will be removed when rrweb release their code for this // see https://github.com/rrweb-io/rrweb/pull/1105 - /// - // NB adopted from https://github.com/rrweb-io/rrweb/pull/1105 which looks like it will be accepted into rrweb // however, in the PR, it throws when the performance observer data is not available // and assumes it is running in a browser with the Request API (i.e. not IE11) // copying here so that we can use it before rrweb adopt it - import type { IWindow, listenerHandler, RecordPlugin } from '@rrweb/types' -import { InitiatorType, NetworkRecordOptions, NetworkRequest, Headers } from './types' -import { _isBoolean, _isFunction, _isArray, _isUndefined, _isNull } from './utils/type-utils' +import { CapturedNetworkRequest, Headers, InitiatorType, NetworkRecordOptions } from './types' +import { _isArray, _isBoolean, _isFunction, _isNull, _isUndefined } from './utils/type-utils' import { logger } from './utils/logger' import { window } from './utils/globals' import { defaultNetworkOptions } from './extensions/replay/config' export type NetworkData = { - requests: NetworkRequest[] + requests: CapturedNetworkRequest[] isInitial?: boolean } @@ -97,6 +94,12 @@ export function findLast(array: Array, predicate: (value: T) => boolean): } function initPerformanceObserver(cb: networkCallback, win: IWindow, options: Required) { + // if we are only observing timings then we could have a single observer for all types, with buffer true, + // but we are going to filter by initiatorType _if we are wrapping fetch and xhr as the wrapped functions + // will deal with those. + // so we have a block which captures requests from before fetch/xhr is wrapped + // these are marked `isInitial` so playback can display them differently if needed + // they will never have method/status/headers/body because they are pre-wrapping that provides that if (options.recordInitialRequests) { const initialPerformanceEntries = win.performance .getEntries() @@ -106,38 +109,41 @@ function initPerformanceObserver(cb: networkCallback, win: IWindow, options: Req (isResourceTiming(entry) && options.initiatorTypes.includes(entry.initiatorType as InitiatorType)) ) cb({ - requests: initialPerformanceEntries.map((entry) => ({ - url: entry.name, - initiatorType: entry.initiatorType as InitiatorType, - status: 'responseStatus' in entry ? entry.responseStatus : undefined, - startTime: Math.round(entry.startTime), - endTime: Math.round(entry.responseEnd), - })), + requests: initialPerformanceEntries.flatMap((entry) => + prepareRequest(entry, undefined, undefined, {}, true) + ), isInitial: true, }) } const observer = new win.PerformanceObserver((entries) => { - const performanceEntries = entries - .getEntries() - .filter( - (entry): entry is ObservedPerformanceEntry => - isNavigationTiming(entry) || - (isResourceTiming(entry) && - options.initiatorTypes.includes(entry.initiatorType as InitiatorType) && - entry.initiatorType !== 'xmlhttprequest' && - entry.initiatorType !== 'fetch') - ) + // if recordBody or recordHeaders is true then we don't want to record fetch or xhr here + // as the wrapped functions will do that. Otherwise, this filter becomes a noop + // because we do want to record them here + const wrappedInitiatorFilter = (entry: ObservedPerformanceEntry) => + options.recordBody || options.recordHeaders + ? entry.initiatorType !== 'xmlhttprequest' && entry.initiatorType !== 'fetch' + : true + + const performanceEntries = entries.getEntries().filter( + (entry): entry is ObservedPerformanceEntry => + isNavigationTiming(entry) || + (isResourceTiming(entry) && + options.initiatorTypes.includes(entry.initiatorType as InitiatorType) && + // TODO if we are _only_ capturing timing we don't want to filter initiator here + wrappedInitiatorFilter(entry)) + ) + cb({ - requests: performanceEntries.map((entry) => ({ - url: entry.name, - initiatorType: entry.initiatorType as InitiatorType, - status: 'responseStatus' in entry ? entry.responseStatus : undefined, - startTime: Math.round(entry.startTime), - endTime: Math.round(entry.responseEnd), - })), + requests: performanceEntries.flatMap((entry) => prepareRequest(entry, undefined, undefined, {})), }) }) - observer.observe({ entryTypes: ['navigation', 'resource'] }) + // compat checked earlier + // eslint-disable-next-line compat/compat + const entryTypes = PerformanceObserver.supportedEntryTypes.filter((x) => + options.performanceEntryTypeToObserve.includes(x) + ) + // initial records are gathered above, so we don't need to observe and buffer each type separately + observer.observe({ entryTypes }) return () => { observer.disconnect() } @@ -224,7 +230,7 @@ function initXhrObserver(cb: networkCallback, win: IWindow, options: Required = {} + const networkRequest: Partial = {} let after: number | undefined let before: number | undefined const requestHeaders: Headers = {} @@ -280,19 +286,8 @@ function initXhrObserver(cb: networkCallback, win: IWindow, options: Required { // @@ -307,6 +302,69 @@ function initXhrObserver(cb: networkCallback, win: IWindow, options: Required + event.entryType === 'navigation' || event.entryType === 'resource' + +function prepareRequest( + entry: PerformanceResourceTiming, + method: string | undefined, + status: number | undefined, + networkRequest: Partial, + isInitial?: boolean +): CapturedNetworkRequest[] { + // kudos to sentry javascript sdk for excellent background on why to use Date.now() here + // https://github.com/getsentry/sentry-javascript/blob/e856e40b6e71a73252e788cd42b5260f81c9c88e/packages/utils/src/time.ts#L70 + // can't start observer if performance.now() is not available + // eslint-disable-next-line compat/compat + const timeOrigin = Math.floor(Date.now() - performance.now()) + // clickhouse can't ingest timestamps that are floats + // (in this case representing fractions of a millisecond we don't care about anyway) + const timestamp = Math.floor(timeOrigin + entry.startTime) + + const requests: CapturedNetworkRequest[] = [ + { + ...entry.toJSON(), + startTime: Math.round(entry.startTime), + endTime: Math.round(entry.responseEnd), + timeOrigin, + timestamp, + method: method, + initiatorType: entry.initiatorType as InitiatorType, + status, + requestHeaders: networkRequest.requestHeaders, + requestBody: networkRequest.requestBody, + responseHeaders: networkRequest.responseHeaders, + responseBody: networkRequest.responseBody, + isInitial, + }, + ] + + if (exposesServerTiming(entry)) { + for (const timing of entry.serverTiming || []) { + requests.push({ + timeOrigin, + timestamp, + startTime: Math.round(entry.startTime), + name: timing.name, + duration: timing.duration, + // the spec has a closed list of possible types + // https://developer.mozilla.org/en-US/docs/Web/API/PerformanceEntry/entryType + // but, we need to know this was a server timing so that we know to + // match it to the appropriate navigation or resource timing + // that matching will have to be on timestamp and $current_url + entryType: 'serverTiming', + }) + } + } + + return requests +} + function initFetchObserver( cb: networkCallback, win: IWindow, @@ -328,7 +386,7 @@ function initFetchObserver( // eslint-disable-next-line compat/compat const req = new Request(url, init) let res: Response | undefined - const networkRequest: Partial = {} + const networkRequest: Partial = {} let after: number | undefined let before: number | undefined try { @@ -376,19 +434,8 @@ function initFetchObserver( if (_isNull(entry)) { return } - const request: NetworkRequest = { - url: entry.name, - method: req.method, - initiatorType: entry.initiatorType as InitiatorType, - status: res?.status, - startTime: Math.round(entry.startTime), - endTime: Math.round(entry.responseEnd), - requestHeaders: networkRequest.requestHeaders, - requestBody: networkRequest.requestBody, - responseHeaders: networkRequest.responseHeaders, - responseBody: networkRequest.responseBody, - } - cb({ requests: [request] }) + const requests = prepareRequest(entry, req.method, res?.status, networkRequest) + cb({ requests }) }) .catch(() => { // @@ -416,7 +463,7 @@ function initNetworkObserver( ) as Required const cb: networkCallback = (data) => { - const requests: NetworkRequest[] = [] + const requests: CapturedNetworkRequest[] = [] data.requests.forEach((request) => { const maskedRequest = networkOptions.maskRequestFn(request) if (maskedRequest) { @@ -429,8 +476,15 @@ function initNetworkObserver( } } const performanceObserver = initPerformanceObserver(cb, win, networkOptions) - const xhrObserver = initXhrObserver(cb, win, networkOptions) - const fetchObserver = initFetchObserver(cb, win, networkOptions) + + // only wrap fetch and xhr if headers or body are being recorded + let xhrObserver: listenerHandler = () => {} + let fetchObserver: listenerHandler = () => {} + if (networkOptions.recordHeaders || networkOptions.recordBody) { + xhrObserver = initXhrObserver(cb, win, networkOptions) + fetchObserver = initFetchObserver(cb, win, networkOptions) + } + return () => { performanceObserver() xhrObserver() diff --git a/src/posthog-core.ts b/src/posthog-core.ts index e48d9cc13..0de7d319a 100644 --- a/src/posthog-core.ts +++ b/src/posthog-core.ts @@ -14,7 +14,6 @@ import { PostHogFeatureFlags } from './posthog-featureflags' import { PostHogPersistence } from './posthog-persistence' import { ALIAS_ID_KEY, FLAG_CALL_REPORTED, PEOPLE_DISTINCT_ID_KEY } from './constants' import { SessionRecording } from './extensions/replay/sessionrecording' -import { WebPerformanceObserver } from './extensions/replay/web-performance' import { Decide } from './decide' import { Toolbar } from './extensions/toolbar' import { clearOptInOut, hasOptedIn, hasOptedOut, optIn, optOut, userOptedOut } from './gdpr-utils' @@ -212,9 +211,6 @@ const create_phlib = function ( instance.sessionRecording = new SessionRecording(instance) instance.sessionRecording.startRecordingIfEnabled() - instance.webPerformance = new WebPerformanceObserver(instance) - instance.webPerformance.startObservingIfEnabled() - if (instance.config.__preview_measure_pageview_stats) { instance.pageViewManager.startMeasuringScrollPosition() } @@ -253,6 +249,19 @@ const create_phlib = function ( return instance } +class DeprecatedWebPerformanceObserver { + get _forceAllowLocalhost(): boolean { + return this.__forceAllowLocalhost + } + set _forceAllowLocalhost(value: boolean) { + logger.error( + 'WebPerformanceObserver is deprecated and has no impact on network capture. Use `_forceAllowLocalhostNetworkCapture` on `posthog.sessionRecording`' + ) + this.__forceAllowLocalhost = value + } + private __forceAllowLocalhost: boolean = false +} + /** * PostHog Library Object * @constructor @@ -277,7 +286,7 @@ export class PostHog { _requestQueue?: RequestQueue _retryQueue?: RetryQueue sessionRecording?: SessionRecording - webPerformance?: WebPerformanceObserver + webPerformance = new DeprecatedWebPerformanceObserver() _triggered_notifs: any compression: Partial> diff --git a/src/types.ts b/src/types.ts index 85a0651cc..e682c145e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -159,9 +159,10 @@ export interface SessionRecordingOptions { inlineStylesheet?: boolean recorderVersion?: 'v1' | 'v2' recordCrossOriginIframes?: boolean - /** Modify the network request before it is captured. Returning null stops it being captured */ - // TODO this has to work for both capture mechanisms? 😱 + /** @deprecated - use maskCapturedNetworkRequestFn instead */ maskNetworkRequestFn?: ((data: NetworkRequest) => NetworkRequest | null | undefined) | null + /** Modify the network request before it is captured. Returning null or undefined stops it being captured */ + maskCapturedNetworkRequestFn?: ((data: CapturedNetworkRequest) => CapturedNetworkRequest | null | undefined) | null // properties below here are ALPHA, don't rely on them, they may change without notice // TODO which of these do we actually expose? // if this isn't provided a default will be used @@ -371,6 +372,11 @@ export type Body = | ArrayBufferView | ArrayBuffer | FormData + // rrweb uses URLSearchParams and ReadableStream + // as part of the union for this type + // because they don't support IE11 + // but, we do 🫠 + // what's going to happen here in IE11? | URLSearchParams | ReadableStream | null @@ -404,26 +410,46 @@ export type InitiatorType = export type NetworkRecordOptions = { initiatorTypes?: InitiatorType[] - maskRequestFn?: (data: NetworkRequest) => NetworkRequest | undefined + maskRequestFn?: (data: CapturedNetworkRequest) => CapturedNetworkRequest | undefined recordHeaders?: boolean | { request: boolean; response: boolean } recordBody?: boolean | string[] | { request: boolean | string[]; response: boolean | string[] } recordInitialRequests?: boolean + // whether to record PerformanceEntry events for network requests + recordPerformance?: boolean + // the PerformanceObserver will only observe these entry types + performanceEntryTypeToObserve: string[] + // the maximum size of the request/response body to record + // NB this will be at most 1MB even if set larger + payloadSizeLimitBytes: number } -// extending this to match the rrweb NetworkRequest type -// it is different in that the rrweb type will have initator type, starttime, and endtime -// as required properties. but we don't want to require them here -// because we've previously exposed this type as only having `url` +/** @deprecated - use CapturedNetworkRequest instead */ export type NetworkRequest = { url: string +} + +// In rrweb this is called NetworkRequest, but we already exposed that as having only URL +// we also want to vary from the rrweb NetworkRequest because we want to include +// all PerformanceEntry properties too. +// that has 4 required properties +// readonly duration: DOMHighResTimeStamp; +// readonly entryType: string; +// readonly name: string; +// readonly startTime: DOMHighResTimeStamp; +// NB: properties below here are ALPHA, don't rely on them, they may change without notice +export type CapturedNetworkRequest = Omit & { // properties below here are ALPHA, don't rely on them, they may change without notice method?: string initiatorType?: InitiatorType status?: number + timeOrigin?: number + timestamp?: number startTime?: number endTime?: number requestHeaders?: Headers requestBody?: Body responseHeaders?: Headers responseBody?: Body + // was this captured before fetch/xhr could have been wrapped + isInitial?: boolean } diff --git a/src/utils/event-utils.ts b/src/utils/event-utils.ts index 6e202a678..f0d13e678 100644 --- a/src/utils/event-utils.ts +++ b/src/utils/event-utils.ts @@ -1,4 +1,4 @@ -import { _getQueryParam } from './request-utils' +import { _getQueryParam, convertToURL } from './request-utils' import { _isNull, _isUndefined } from './type-utils' import { Properties } from '../types' import Config from '../config' @@ -256,9 +256,7 @@ export const _info = { if (!document?.referrer) { return '$direct' } - const parser = document.createElement('a') // Unfortunately we cannot use new URL due to IE11 - parser.href = document.referrer - return parser.host + return convertToURL(document.referrer)?.host || '$direct' }, properties: function (): Properties { diff --git a/src/utils/request-utils.ts b/src/utils/request-utils.ts index d5193e14b..a097001d6 100644 --- a/src/utils/request-utils.ts +++ b/src/utils/request-utils.ts @@ -2,9 +2,26 @@ import { _each, _isValidRegex } from './' import { _isArray, _isUndefined } from './type-utils' import { logger } from './logger' +import { document } from './globals' const localDomains = ['localhost', '127.0.0.1'] +/** + * IE11 doesn't support `new URL` + * so we can create an anchor element and use that to parse the URL + * there's a lot of overlap between HTMLHyperlinkElementUtils and URL + * meaning useful properties like `pathname` are available on both + */ +export const convertToURL = (url: string): HTMLAnchorElement | null => { + const location = document?.createElement('a') + if (_isUndefined(location)) { + return null + } + + location.href = url + return location +} + export const _isUrlMatchingRegex = function (url: string, pattern: string): boolean { if (!_isValidRegex(pattern)) return false return new RegExp(pattern).test(url) diff --git a/src/utils/type-utils.ts b/src/utils/type-utils.ts index 76110a843..0a5dcbb64 100644 --- a/src/utils/type-utils.ts +++ b/src/utils/type-utils.ts @@ -16,12 +16,8 @@ export const _isUint8Array = function (x: unknown): x is Uint8Array { // fails on only one very rare and deliberate custom object: // let bomb = { toString : undefined, valueOf: function(o) { return "function BOMBA!"; }}; export const _isFunction = function (f: any): f is (...args: any[]) => any { - try { - // eslint-disable-next-line posthog-js/no-direct-function-check - return /^\s*\bfunction\b/.test(f) - } catch (x) { - return false - } + // eslint-disable-next-line posthog-js/no-direct-function-check + return typeof f === 'function' } // Underscore Addons export const _isObject = function (x: unknown): x is Record { @@ -46,6 +42,7 @@ export const _isString = function (x: unknown): x is string { // eslint-disable-next-line posthog-js/no-direct-string-check return toString.call(x) == '[object String]' } + export const _isNull = function (x: unknown): x is null { // eslint-disable-next-line posthog-js/no-direct-null-check return x === null