Skip to content

Commit

Permalink
fix(sdk): work around memory leak with recursive SDK usage
Browse files Browse the repository at this point in the history
This works around a memory issue with a specific SKD usage pattern,
namely when SDK entry spans are created while the previous async context
(AsyncLocalStorage or cls-hooked) is still active. Since new context
objects are connected to the previously active context via prototypical
inheritance (that is, they are created via Object.create(activeContext)),
this pattern would create an ever-growing chain of contexts which
reference their predecessor context via .__proto__. Since the most
recent/currently active context is referenced by us, and all other
contexts are referenced by their successor, none of the context
objects can be garbage collected.

Thus, this usage pattern effectively results in memory leak. In turn,
that memory leak caused major GCs increasingly often, which in turn take
increasingly longer, because the garbage collector needs to walk
increasingly longer chains of context objects connected by their
prototype reference.

There is a more detailed internal analysis available at
https://www.notion.so/instana/SDK-Memory-Leak-a807a1b242c84976ac79d9a1a9037494

refs 106899
  • Loading branch information
basti1302 committed Nov 8, 2022
1 parent ab7dcea commit c9b35eb
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ class Namespace {
}

/**
* Creates a new CLS context in this namespace.
* Creates a new CLS context in this namespace as a child of the currently active context (or as a root context if no
* context is currently active).
* @returns {InstanaCLSContext}
*/
createContext() {
Expand All @@ -123,6 +124,19 @@ class Namespace {
return context;
}

/**
* Creates a new root CLS context in this namespace, independent of the currently active context.
*
* @returns {InstanaCLSContext}
*/
createRootContext() {
// Prototype inherit existing context if created a new child context within existing context.
const context = Object.create(Object.prototype);
context._ns_name = this.name;
context.id = executionAsyncId();
return context;
}

/**
* Runs a function in a new CLS context. The context is left after the function terminates. Asynchronous work started
* in this function will happen in that new context. The return value from that function (if any) is discarded.
Expand Down
13 changes: 12 additions & 1 deletion packages/core/src/tracing/clsHooked/context.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ Namespace.prototype.get = function get(key) {
};

/**
* Creates a new CLS context in this namespace.
* Creates a new CLS context in this namespace as a child of the currently active context (or as a root context if no
* context is currently active).
*/
Namespace.prototype.createContext = function createContext() {
// Prototype inherit existing context if created a new child context within existing context.
Expand All @@ -94,6 +95,16 @@ Namespace.prototype.createContext = function createContext() {
return context;
};

/**
* Creates a new root CLS context in this namespace, independent of the currently active context.
*/
Namespace.prototype.createRootContext = function createRootContext() {
let context = Object.create(Object.prototype);
context._ns_name = this.name;
context.id = currentUid;
return context;
};

/**
* Runs a function in a new CLS context. The context is left after the function terminates. Asynchronous work
* started in this function will happen in that new context. The return value from that function (if any) is discarded.
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/tracing/clsHooked/unset.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ function storeReducedSpan(context, key, span) {
// processed. By keeping the reduced span around, we can still provide trace continuity. The reduced span will
// automatically be removed when its async context is finally destroyed.
if (key === currentSpanKey && span != null) {
const gqd = span.gqd;
context[reducedSpanKey] = {
n: span.n,
t: span.t,
Expand All @@ -49,6 +48,7 @@ function storeReducedSpan(context, key, span) {
};

// Also keep captured GraphQL destination (host & port) for subscription updates, if present.
const gqd = span.gqd;
if (gqd) {
context[reducedSpanKey].gqd = gqd;
}
Expand Down
29 changes: 24 additions & 5 deletions packages/core/src/tracing/sdk/sdk.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ module.exports = function (isCallbackApi) {
/** @type {import('../cls')} */
let cls = null;
/** @type {Function} */
let wrapper = null;
let runInContext = null;

/**
* @param {string} name
Expand Down Expand Up @@ -63,6 +63,7 @@ module.exports = function (isCallbackApi) {
name,
constants.ENTRY,
constants.SDK.ENTRY,
true,
startEntrySpan,
/** @type {Object.<string, *> | null} */ (tags),
/** @type {string} */ (traceId),
Expand Down Expand Up @@ -142,6 +143,7 @@ module.exports = function (isCallbackApi) {
name,
constants.INTERMEDIATE,
constants.SDK.INTERMEDIATE,
false,
startIntermediateSpan,
tags,
null,
Expand Down Expand Up @@ -206,6 +208,7 @@ module.exports = function (isCallbackApi) {
name,
constants.EXIT,
constants.SDK.EXIT,
false,
startExitSpan,
tags,
null,
Expand Down Expand Up @@ -249,15 +252,27 @@ module.exports = function (isCallbackApi) {
* @param {string} name
* @param {number} kind
* @param {string} sdkKind
* @param {boolean} forceRootContext
* @param {Function} stackTraceRef
* @param {Object.<string, *>} tags
* @param {string} traceId
* @param {string} parentSpanId
* @param {Function} callback
* @returns {Function | Promise<*>}
*/
function startSdkSpan(name, kind, sdkKind, stackTraceRef, tags, traceId, parentSpanId, callback) {
return wrapper(() => {
function startSdkSpan(name, kind, sdkKind, forceRootContext, stackTraceRef, tags, traceId, parentSpanId, callback) {
const context = forceRootContext
? // SDK entry spans: Force ALS/CLS to use a new, separate root context. There is nothing we ever want to fetch
// from a parent/ancestor context for SDK entry spans. This avoids a memory leak when client code is calling
// sdk.startEntrySpan recursively in the same context over and over again, which creates an evergrowing chain
// of contexts, linked by prototypical inheritance.
cls.ns.createRootContext()
: // Let ALS/CLS create a new child context of the currently active context. This is the correct behavior for
// intermediate and exit spans. We need them to happen in a child/descendant context of the currently active
// entry span.
null;

return runInContext(() => {
const span = cls.startSpan('sdk', kind, traceId, parentSpanId);
span.stack = tracingUtil.getStackTrace(stackTraceRef);
span.data.sdk = {
Expand All @@ -271,7 +286,11 @@ module.exports = function (isCallbackApi) {
span.data.sdk.custom = { tags: Object.assign({}, tags) };
}
return callNext(callback);
});

// The additional argument `context` for runInContext (cls.ns.runAndReturn or cls.ns.runPromise, depending on the
// API type) allows us to force a specific context to be used by those functions. We use a new root context for
// SDK entry spans and null otherwise (intermediate spans, exit spans).
}, context);
}

/**
Expand Down Expand Up @@ -338,7 +357,7 @@ module.exports = function (isCallbackApi) {
*/
function init(_cls) {
cls = _cls;
wrapper = isCallbackApi ? cls.ns.runAndReturn.bind(cls.ns) : cls.ns.runPromise.bind(cls.ns);
runInContext = isCallbackApi ? cls.ns.runAndReturn.bind(cls.ns) : cls.ns.runPromise.bind(cls.ns);
}

function activate() {
Expand Down

0 comments on commit c9b35eb

Please sign in to comment.