Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: capture network payloads (internal alpha) #886

Merged
merged 9 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
245 changes: 235 additions & 10 deletions cypress/e2e/session-recording.cy.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
/// <reference types="cypress" />

import { _isNull } from '../../src/utils/type-utils'

function onPageLoad() {
cy.posthogInit(given.options)
cy.wait('@decide')
cy.wait('@recorder')
}

describe('Session recording', () => {
given('options', () => ({}))

Expand All @@ -16,7 +24,7 @@ describe('Session recording', () => {
sessionRecording: {
endpoint: '/ses/',
},
supportedCompression: ['gzip', 'lz64'],
capture_performance: true,
},
}).as('decide')

Expand All @@ -25,15 +33,26 @@ describe('Session recording', () => {
cy.wait('@decide')
})

it('captures pageviews, autocapture, custom events', () => {
it('captures session events', () => {
cy.get('[data-cy-input]').type('hello world! ')
cy.wait(500)
cy.get('[data-cy-input]')
.type('hello posthog!')
.wait('@session-recording')
.then(() => {
const requests = cy.state('requests').filter(({ alias }) => alias === 'session-recording')
expect(requests.length).to.be.above(0).and.to.be.below(2)
cy.phCaptures({ full: true }).then((captures) => {
// should be a pageview and a $snapshot
expect(captures.map((c) => c.event)).to.deep.equal(['$pageview', '$snapshot'])
// the amount of captured data should be deterministic
// but of course that would be too easy
expect(captures[1]['properties']['$snapshot_data']).to.have.length.above(33).and.below(38)
// a meta and then a full snapshot
expect(captures[1]['properties']['$snapshot_data'][0].type).to.equal(4) // meta
expect(captures[1]['properties']['$snapshot_data'][1].type).to.equal(2) // full_snapshot
// Making a set from the rest should all be 3 - incremental snapshots
const incrementalSnapshots = captures[1]['properties']['$snapshot_data'].slice(2)
expect(new Set(incrementalSnapshots.map((s) => s.type))).to.deep.equal(new Set([3]))
})
})
})
})
Expand All @@ -52,24 +71,230 @@ describe('Session recording', () => {
endpoint: '/ses/',
},
supportedCompression: ['gzip', 'lz64'],
capture_performance: true,
},
}).as('decide')

cy.visit('./playground/cypress')
cy.posthogInit(given.options)
cy.wait('@decide')
cy.wait('@recorder')
onPageLoad()
})

it('captures session events', () => {
cy.get('[data-cy-input]').type('hello world! ')
cy.wait(500)
cy.get('[data-cy-input]')
.type('hello posthog!')
.wait('@session-recording')
.then(() => {
cy.phCaptures({ full: true }).then((captures) => {
// should be a pageview and a $snapshot
expect(captures.map((c) => c.event)).to.deep.equal(['$pageview', '$snapshot'])
// the amount of captured data should be deterministic
// but of course that would be too easy
expect(captures[1]['properties']['$snapshot_data']).to.have.length.above(33).and.below(38)
// a meta and then a full snapshot
expect(captures[1]['properties']['$snapshot_data'][0].type).to.equal(4) // meta
expect(captures[1]['properties']['$snapshot_data'][1].type).to.equal(2) // full_snapshot
// Making a set from the rest should all be 3 - incremental snapshots
expect(
new Set(captures[1]['properties']['$snapshot_data'].slice(2).map((s) => s.type))
).to.deep.equal(new Set([3]))
})
})
})

it('captures snapshots when the mouse moves', () => {
let sessionId = null

// cypress time handling can confuse when to run full snapshot, let's force that to happen...
cy.get('[data-cy-input]').type('hello world! ')
cy.wait('@session-recording').then(() => {
cy.phCaptures({ full: true }).then((captures) => {
captures.forEach((c) => {
if (_isNull(sessionId)) {
sessionId = c.properties['$session_id']
}
// all captures should be from one session
expect(c.properties['$session_id']).to.equal(sessionId)
})
expect(sessionId).not.to.be.null
})
})
// and then reset
cy.resetPhCaptures()

cy.get('body')
.trigger('mousemove', { clientX: 200, clientY: 300 })
.trigger('mousemove', { clientX: 210, clientY: 300 })
.trigger('mousemove', { clientX: 220, clientY: 300 })
.trigger('mousemove', { clientX: 240, clientY: 300 })

cy.wait('@session-recording').then(() => {
cy.phCaptures({ full: true }).then((captures) => {
// should be a $snapshot for the current session
expect(captures.map((c) => c.event)).to.deep.equal(['$snapshot'])
expect(captures[0].properties['$session_id']).to.equal(sessionId)

// the amount of captured data should be deterministic
// but of course that would be too easy
expect(captures[0]['properties']['$snapshot_data']).to.have.length.above(0)

/**
* the snapshots will look a little like:
* [
* {"type":3,"data":{"source":6,"positions":[{"x":58,"y":18,"id":15,"timeOffset":0}]},"timestamp":1699814887222},
* {"type":3,"data":{"source":6,"positions":[{"x":58,"y":18,"id":15,"timeOffset":-430}]},"timestamp":1699814887722}
* ]
*/

const xPositions = []
for (let i = 0; i < captures[0]['properties']['$snapshot_data'].length; i++) {
expect(captures[0]['properties']['$snapshot_data'][i].type).to.equal(3)
expect(captures[0]['properties']['$snapshot_data'][i].data.source).to.equal(
6,
JSON.stringify(captures[0]['properties']['$snapshot_data'][i])
)
xPositions.push(captures[0]['properties']['$snapshot_data'][i].data.positions[0].x)
}

// even though we trigger 4 events, only 2 snapshots should be captured
// I _think_ this is because Cypress is faking things and they happen too fast
expect(xPositions).to.have.length(2)
expect(xPositions[0]).to.equal(200)
// timing seems to vary if this value picks up 220 or 240
// given it's going to be hard to make it deterministic with Celery
// all we _really_ care about is that it's greater than the previous value
expect(xPositions[1]).to.be.above(xPositions[0])
})
})
})

it('captures pageviews, autocapture, custom events', () => {
it('continues capturing to the same session when the page reloads', () => {
let sessionId = null

// cypress time handling can confuse when to run full snapshot, let's force that to happen...
cy.get('[data-cy-input]').type('hello world! ')
cy.wait('@session-recording').then(() => {
cy.phCaptures({ full: true }).then((captures) => {
captures.forEach((c) => {
if (_isNull(sessionId)) {
sessionId = c.properties['$session_id']
}
// all captures should be from one session
expect(c.properties['$session_id']).to.equal(sessionId)
})
expect(sessionId).not.to.be.null
})
})
// and then reset
cy.resetPhCaptures()
// and refresh the page
cy.reload()
onPageLoad()

cy.get('body')
.trigger('mousemove', { clientX: 200, clientY: 300 })
.trigger('mousemove', { clientX: 210, clientY: 300 })
.trigger('mousemove', { clientX: 220, clientY: 300 })
.trigger('mousemove', { clientX: 240, clientY: 300 })

cy.wait('@session-recording').then(() => {
cy.phCaptures({ full: true }).then((captures) => {
// should be a $snapshot for the current session
expect(captures.map((c) => c.event)).to.deep.equal(['$pageview', '$snapshot'])
expect(captures[0].properties['$session_id']).to.equal(sessionId)
expect(captures[1].properties['$session_id']).to.equal(sessionId)

// the amount of captured data should be deterministic
// but of course that would be too easy
expect(captures[1]['properties']['$snapshot_data']).to.have.length.above(0)

/**
* the snapshots will look a little like:
* [
* {"type":3,"data":{"source":6,"positions":[{"x":58,"y":18,"id":15,"timeOffset":0}]},"timestamp":1699814887222},
* {"type":3,"data":{"source":6,"positions":[{"x":58,"y":18,"id":15,"timeOffset":-430}]},"timestamp":1699814887722}
* ]
*/

// page reloaded so we will start with a full snapshot
// a meta and then a full snapshot
expect(captures[1]['properties']['$snapshot_data'][0].type).to.equal(4) // meta
expect(captures[1]['properties']['$snapshot_data'][1].type).to.equal(2) // full_snapshot

const xPositions = []
for (let i = 2; i < captures[1]['properties']['$snapshot_data'].length; i++) {
expect(captures[1]['properties']['$snapshot_data'][i].type).to.equal(3)
expect(captures[1]['properties']['$snapshot_data'][i].data.source).to.equal(
6,
JSON.stringify(captures[1]['properties']['$snapshot_data'][i])
)
xPositions.push(captures[1]['properties']['$snapshot_data'][i].data.positions[0].x)
}

// even though we trigger 4 events, only 2 snapshots should be captured
// I _think_ this is because Cypress is faking things and they happen too fast
expect(xPositions).to.have.length(2)
expect(xPositions[0]).to.equal(200)
// timing seems to vary if this value picks up 220 or 240
// given it's going to be hard to make it deterministic with Celery
// all we _really_ care about is that it's greater than the previous value
expect(xPositions[1]).to.be.above(xPositions[0])
})
})
})

it('rotates sessions after 24 hours', () => {
let firstSessionId = null

// first we start a session and give it some activity
cy.get('[data-cy-input]').type('hello world! ')
cy.wait(500)
cy.get('[data-cy-input]')
.type('hello posthog!')
.wait('@session-recording')
.then(() => {
const requests = cy.state('requests').filter(({ alias }) => alias === 'session-recording')
expect(requests.length).to.be.above(0).and.to.be.below(2)
cy.phCaptures({ full: true }).then((captures) => {
// should be a pageview and a $snapshot
expect(captures.map((c) => c.event)).to.deep.equal(['$pageview', '$snapshot'])
expect(captures[1]['properties']['$session_id']).to.be.a('string')
firstSessionId = captures[1]['properties']['$session_id']
})
})

// then we reset the captures and move the session back in time
cy.resetPhCaptures()

cy.posthog().then((ph) => {
const activityTs = ph.sessionManager['_sessionActivityTimestamp']
const startTs = ph.sessionManager['_sessionStartTimestamp']
const timeout = ph.sessionManager['_sessionTimeoutMs']

// move the session values back,
// so that the next event appears to be greater than timeout since those values
ph.sessionManager['_sessionActivityTimestamp'] = activityTs - timeout - 1000
ph.sessionManager['_sessionStartTimestamp'] = startTs - timeout - 1000
})

// then we expect that user activity will rotate the session
cy.get('[data-cy-input]')
.type('hello posthog!')
.wait('@session-recording', { timeout: 10000 })
.then(() => {
cy.phCaptures({ full: true }).then((captures) => {
// should be a pageview and a $snapshot
expect(captures[0].event).to.equal('$snapshot')
// // the amount of captured data should be deterministic
// // but of course that would be too easy
// expect(captures[1]['properties']['$snapshot_data']).to.have.length.above(33).and.below(40)

expect(captures[0]['properties']['$session_id']).to.be.a('string')
expect(captures[0]['properties']['$session_id']).not.to.eq(firstSessionId)

expect(captures[0]['properties']['$snapshot_data']).to.have.length.above(0)
expect(captures[0]['properties']['$snapshot_data'][0].type).to.equal(4) // meta
expect(captures[0]['properties']['$snapshot_data'][1].type).to.equal(2) // full_snapshot
})
})
})
})
Expand Down
56 changes: 56 additions & 0 deletions src/__tests__/extensions/replay/config.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import { buildNetworkRequestOptions } from '../../../extensions/replay/network/record/default-options'
import { defaultConfig } from '../../../posthog-core'

describe('config', () => {
describe('network request options', () => {
describe('maskRequestFn', () => {
it('should remove the Authorization header from requests even if no other config is set', () => {
const networkOptions = buildNetworkRequestOptions(defaultConfig())
const cleaned = networkOptions.maskRequestFn!({
url: 'something',
requestHeaders: {
Authorization: 'Bearer 123',
'content-type': 'application/json',
},
})
expect(cleaned?.requestHeaders).toEqual({
'content-type': 'application/json',
})
})

it('should cope with no headers when even if no other config is set', () => {
const networkOptions = buildNetworkRequestOptions(defaultConfig())
const cleaned = networkOptions.maskRequestFn!({
url: 'something',
requestHeaders: undefined,
})
expect(cleaned?.requestHeaders).toBeUndefined()
})

it('should remove the Authorization header from requests even when a mask request fn is set', () => {
const posthogConfig = defaultConfig()
posthogConfig.session_recording.maskNetworkRequestFn = (data) => {
return {
...data,
requestHeaders: {
...(data.requestHeaders ? data.requestHeaders : {}),
'content-type': 'edited',
},
}
}
const networkOptions = buildNetworkRequestOptions(posthogConfig)

const cleaned = networkOptions.maskRequestFn!({
url: 'something',
requestHeaders: {
Authorization: 'Bearer 123',
'content-type': 'application/json',
},
})
expect(cleaned?.requestHeaders).toEqual({
'content-type': 'edited',
})
})
})
})
})
33 changes: 33 additions & 0 deletions src/extensions/replay/network/record/default-options.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { NetworkRequest, PostHogConfig } from '../../../../types'
import { defaultNetworkOptions, NetworkRecordOptions } from './index'
import { _isFunction } from '../../../../utils/type-utils'

const removeAuthorizationHeader = (data: NetworkRequest): NetworkRequest => {
delete data.requestHeaders?.['Authorization']
return data
}

/**
* whether a maskRequestFn is provided or not,
* we ensure that we remove the Authorization header from requests
* we _never_ want to record that header by accident
* if someone complains then we'll add an opt-in to let them override it
*/
export const buildNetworkRequestOptions = (instanceConfig: PostHogConfig): NetworkRecordOptions => {
const config = instanceConfig.session_recording as NetworkRecordOptions
config.maskRequestFn = _isFunction(instanceConfig.session_recording.maskNetworkRequestFn)
? (data) => {
const cleanedRequest = removeAuthorizationHeader(data)
return instanceConfig.session_recording.maskNetworkRequestFn?.(cleanedRequest) ?? undefined
}
: undefined

if (!config.maskRequestFn) {
config.maskRequestFn = removeAuthorizationHeader
}

return {
...defaultNetworkOptions,
...config,
}
}
Loading
Loading