From c9b35eb37f1e41f7b11442dd408ca53f5cb2ac13 Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Thu, 3 Nov 2022 12:42:08 +0100 Subject: [PATCH] fix(sdk): work around memory leak with recursive SDK usage This works around a memory issue with a specific SKD usage pattern, namely when SDK entry spans are created while the previous async context (AsyncLocalStorage or cls-hooked) is still active. Since new context objects are connected to the previously active context via prototypical inheritance (that is, they are created via Object.create(activeContext)), this pattern would create an ever-growing chain of contexts which reference their predecessor context via .__proto__. Since the most recent/currently active context is referenced by us, and all other contexts are referenced by their successor, none of the context objects can be garbage collected. Thus, this usage pattern effectively results in memory leak. In turn, that memory leak caused major GCs increasingly often, which in turn take increasingly longer, because the garbage collector needs to walk increasingly longer chains of context objects connected by their prototype reference. There is a more detailed internal analysis available at https://www.notion.so/instana/SDK-Memory-Leak-a807a1b242c84976ac79d9a1a9037494 refs 106899 --- .../clsHooked/async_local_storage_context.js | 16 +++++++++- .../core/src/tracing/clsHooked/context.js | 13 ++++++++- packages/core/src/tracing/clsHooked/unset.js | 2 +- packages/core/src/tracing/sdk/sdk.js | 29 +++++++++++++++---- 4 files changed, 52 insertions(+), 8 deletions(-) diff --git a/packages/core/src/tracing/clsHooked/async_local_storage_context.js b/packages/core/src/tracing/clsHooked/async_local_storage_context.js index d1442267db..2b5112add1 100644 --- a/packages/core/src/tracing/clsHooked/async_local_storage_context.js +++ b/packages/core/src/tracing/clsHooked/async_local_storage_context.js @@ -111,7 +111,8 @@ class Namespace { } /** - * Creates a new CLS context in this namespace. + * Creates a new CLS context in this namespace as a child of the currently active context (or as a root context if no + * context is currently active). * @returns {InstanaCLSContext} */ createContext() { @@ -123,6 +124,19 @@ class Namespace { return context; } + /** + * Creates a new root CLS context in this namespace, independent of the currently active context. + * + * @returns {InstanaCLSContext} + */ + createRootContext() { + // Prototype inherit existing context if created a new child context within existing context. + const context = Object.create(Object.prototype); + context._ns_name = this.name; + context.id = executionAsyncId(); + return context; + } + /** * Runs a function in a new CLS context. The context is left after the function terminates. Asynchronous work started * in this function will happen in that new context. The return value from that function (if any) is discarded. diff --git a/packages/core/src/tracing/clsHooked/context.js b/packages/core/src/tracing/clsHooked/context.js index e57b7b68d4..e9458671c9 100644 --- a/packages/core/src/tracing/clsHooked/context.js +++ b/packages/core/src/tracing/clsHooked/context.js @@ -84,7 +84,8 @@ Namespace.prototype.get = function get(key) { }; /** - * Creates a new CLS context in this namespace. + * Creates a new CLS context in this namespace as a child of the currently active context (or as a root context if no + * context is currently active). */ Namespace.prototype.createContext = function createContext() { // Prototype inherit existing context if created a new child context within existing context. @@ -94,6 +95,16 @@ Namespace.prototype.createContext = function createContext() { return context; }; +/** + * Creates a new root CLS context in this namespace, independent of the currently active context. + */ +Namespace.prototype.createRootContext = function createRootContext() { + let context = Object.create(Object.prototype); + context._ns_name = this.name; + context.id = currentUid; + return context; +}; + /** * Runs a function in a new CLS context. The context is left after the function terminates. Asynchronous work * started in this function will happen in that new context. The return value from that function (if any) is discarded. diff --git a/packages/core/src/tracing/clsHooked/unset.js b/packages/core/src/tracing/clsHooked/unset.js index 736134d5ba..ad4c0af2c6 100644 --- a/packages/core/src/tracing/clsHooked/unset.js +++ b/packages/core/src/tracing/clsHooked/unset.js @@ -39,7 +39,6 @@ function storeReducedSpan(context, key, span) { // processed. By keeping the reduced span around, we can still provide trace continuity. The reduced span will // automatically be removed when its async context is finally destroyed. if (key === currentSpanKey && span != null) { - const gqd = span.gqd; context[reducedSpanKey] = { n: span.n, t: span.t, @@ -49,6 +48,7 @@ function storeReducedSpan(context, key, span) { }; // Also keep captured GraphQL destination (host & port) for subscription updates, if present. + const gqd = span.gqd; if (gqd) { context[reducedSpanKey].gqd = gqd; } diff --git a/packages/core/src/tracing/sdk/sdk.js b/packages/core/src/tracing/sdk/sdk.js index 8d486483d9..d81f293df6 100644 --- a/packages/core/src/tracing/sdk/sdk.js +++ b/packages/core/src/tracing/sdk/sdk.js @@ -26,7 +26,7 @@ module.exports = function (isCallbackApi) { /** @type {import('../cls')} */ let cls = null; /** @type {Function} */ - let wrapper = null; + let runInContext = null; /** * @param {string} name @@ -63,6 +63,7 @@ module.exports = function (isCallbackApi) { name, constants.ENTRY, constants.SDK.ENTRY, + true, startEntrySpan, /** @type {Object. | null} */ (tags), /** @type {string} */ (traceId), @@ -142,6 +143,7 @@ module.exports = function (isCallbackApi) { name, constants.INTERMEDIATE, constants.SDK.INTERMEDIATE, + false, startIntermediateSpan, tags, null, @@ -206,6 +208,7 @@ module.exports = function (isCallbackApi) { name, constants.EXIT, constants.SDK.EXIT, + false, startExitSpan, tags, null, @@ -249,6 +252,7 @@ module.exports = function (isCallbackApi) { * @param {string} name * @param {number} kind * @param {string} sdkKind + * @param {boolean} forceRootContext * @param {Function} stackTraceRef * @param {Object.} tags * @param {string} traceId @@ -256,8 +260,19 @@ module.exports = function (isCallbackApi) { * @param {Function} callback * @returns {Function | Promise<*>} */ - function startSdkSpan(name, kind, sdkKind, stackTraceRef, tags, traceId, parentSpanId, callback) { - return wrapper(() => { + function startSdkSpan(name, kind, sdkKind, forceRootContext, stackTraceRef, tags, traceId, parentSpanId, callback) { + const context = forceRootContext + ? // SDK entry spans: Force ALS/CLS to use a new, separate root context. There is nothing we ever want to fetch + // from a parent/ancestor context for SDK entry spans. This avoids a memory leak when client code is calling + // sdk.startEntrySpan recursively in the same context over and over again, which creates an evergrowing chain + // of contexts, linked by prototypical inheritance. + cls.ns.createRootContext() + : // Let ALS/CLS create a new child context of the currently active context. This is the correct behavior for + // intermediate and exit spans. We need them to happen in a child/descendant context of the currently active + // entry span. + null; + + return runInContext(() => { const span = cls.startSpan('sdk', kind, traceId, parentSpanId); span.stack = tracingUtil.getStackTrace(stackTraceRef); span.data.sdk = { @@ -271,7 +286,11 @@ module.exports = function (isCallbackApi) { span.data.sdk.custom = { tags: Object.assign({}, tags) }; } return callNext(callback); - }); + + // The additional argument `context` for runInContext (cls.ns.runAndReturn or cls.ns.runPromise, depending on the + // API type) allows us to force a specific context to be used by those functions. We use a new root context for + // SDK entry spans and null otherwise (intermediate spans, exit spans). + }, context); } /** @@ -338,7 +357,7 @@ module.exports = function (isCallbackApi) { */ function init(_cls) { cls = _cls; - wrapper = isCallbackApi ? cls.ns.runAndReturn.bind(cls.ns) : cls.ns.runPromise.bind(cls.ns); + runInContext = isCallbackApi ? cls.ns.runAndReturn.bind(cls.ns) : cls.ns.runPromise.bind(cls.ns); } function activate() {