Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Additional diagnostis for lifetimes #465

Merged
merged 3 commits into from
Jan 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 127 additions & 11 deletions rd-net/Lifetimes/Lifetimes/LifetimeDefinition.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@
using System.Threading;
using System.Threading.Tasks;
using JetBrains.Annotations;
using JetBrains.Collections.Viewable;
using JetBrains.Collections;
using JetBrains.Core;
using JetBrains.Diagnostics;
using JetBrains.Threading;
using JetBrains.Util;
using JetBrains.Util.Internal;
using JetBrains.Util.Util;

namespace JetBrains.Lifetimes
Expand All @@ -22,12 +21,11 @@ public class LifetimeDefinition : IDisposable
{
#pragma warning disable 420
#region Statics

private static readonly Signal<Lifetime> ourExecutionWasNotCancelledByTimeout = new();
/// <summary>
/// Use this signal to improve diagnostics (e.g., to collect thread dumps etc.))
/// </summary>
public static ISource<Lifetime> ExecutionWasNotCancelledByTimeout => ourExecutionWasNotCancelledByTimeout;

#if !NET35
public static AdditionalDiagnosticsInfo? AdditionalDiagnostics { get; set; }
private static readonly AdditionalDiagnosticsInfoStorage ourAdditionalDiagnosticsStorage = new();
#endif

internal static readonly ILog Log = JetBrains.Diagnostics.Log.GetLog<Lifetime>();

Expand Down Expand Up @@ -463,7 +461,15 @@ public void Terminate()
+ "This is also possible if the thread waiting for the termination wasn't able to receive execution time during the wait in SpinWait.spinUntil, so it has missed the fact that the lifetime was terminated in time.");

ourLogErrorAfterExecution.InterlockedUpdate(ref myState, true);
ourExecutionWasNotCancelledByTimeout.Fire(Lifetime);
#if !NET35
if (AdditionalDiagnostics is { } additionalDiagnostics)
{
Log.Catch(() =>
{
ourAdditionalDiagnosticsStorage.AddData(this, additionalDiagnostics.GetAdditionalDiagnosticsIfExecutionWasNotCancelledByTimeoutAsync(Lifetime));
});
}
#endif
}

if (!IncrementStatusIfEqualsTo(LifetimeStatus.Canceling))
Expand Down Expand Up @@ -791,6 +797,9 @@ public void Dispose()
{
if (!Succeed)
return;

// must be called before the execution counter becomes 0 to ensure that additional diagnostic data is not cleared from AdditionalDiagnosticsInfoStorage
LogErrorIfCookieWasNotReleasedForTooLong();

Interlocked.Decrement(ref myDef.myState);

Expand All @@ -799,12 +808,36 @@ public void Dispose()

if (myAllowTerminationUnderExecuting)
ourAllowTerminationUnderExecutionThreadStatic--;
}

private void LogErrorIfCookieWasNotReleasedForTooLong()
{
if (ourLogErrorAfterExecution[myDef.myState])
{
string? additionalInfo = null;
#if !NET35
if (AdditionalDiagnostics is { } additionalDiagnostics)
{
var diagnostics = ourAdditionalDiagnosticsStorage.TryGetDiagnostics(myDef);
if (diagnostics is not { IsCompleted: true })
{
if (additionalDiagnostics.SuppressExecuteIfAliveTookTooMuchTimeErrorIfDiagnosticIsNotReadyWhenCookieIsReleased)
return;
}
else if (diagnostics is { Status: TaskStatus.RanToCompletion })
{
additionalInfo = diagnostics.Result;
}
}
#endif

var terminationTimeoutMs = GetTerminationTimeoutMs(myDef.TerminationTimeoutKind);
Log.Error($"ExecuteIfAlive after termination of {myDef} took too much time (>{terminationTimeoutMs}ms)");
}
var message = $"ExecuteIfAlive after termination of {myDef} took too much time (>{terminationTimeoutMs}ms)";
if (additionalInfo != null)
message += $"\nAdditionalInfo:\n{additionalInfo}";

Log.Error(message);
}
}
}

Expand Down Expand Up @@ -1077,6 +1110,89 @@ public void Dispose()


#endregion


#if !NET35

public class AdditionalDiagnosticsInfo
{
/// <summary>
/// Set this property to provide additional diagnostic for exception: `ExecuteIfAlive after termination of {myDef} took too much time (>{terminationTimeoutMs}ms)`
/// </summary>
public Func<Lifetime, Task<string>> GetAdditionalDiagnosticsIfExecutionWasNotCancelledByTimeoutAsync { get; }

public bool SuppressExecuteIfAliveTookTooMuchTimeErrorIfDiagnosticIsNotReadyWhenCookieIsReleased { get; }

public AdditionalDiagnosticsInfo(
bool suppressExecuteIfAliveTookTooMuchTimeErrorIfDiagnosticIsNotReadyWhenCookieIsReleased,
Func<Lifetime, Task<string>> getAdditionalDiagnosticsIfExecutionWasNotCancelledByTimeout)
{
GetAdditionalDiagnosticsIfExecutionWasNotCancelledByTimeoutAsync = getAdditionalDiagnosticsIfExecutionWasNotCancelledByTimeout ?? throw new ArgumentNullException(nameof(getAdditionalDiagnosticsIfExecutionWasNotCancelledByTimeout));
SuppressExecuteIfAliveTookTooMuchTimeErrorIfDiagnosticIsNotReadyWhenCookieIsReleased = suppressExecuteIfAliveTookTooMuchTimeErrorIfDiagnosticIsNotReadyWhenCookieIsReleased;
}
}

// Naive key-value collection implementation with fixed capacity
private class AdditionalDiagnosticsInfoStorage
{
// we don't expect many simultaneously blocked lifetimes on termination, so 4 should be enough for many cases. If it won't be enough we introduce mo complicated collection
private const int MaxStorageSize = 4;
private int myCount;
// we don't need week reference because we have limited storage and terminated lifetime doesn't keep any references and we won't have any memory leaks
private readonly KeyValuePair<LifetimeDefinition, Task<string>>[] myList =new KeyValuePair<LifetimeDefinition, Task<string>>[MaxStorageSize];

public void AddData(LifetimeDefinition lifetime, Task<string> task)
{
lock (myList)
{
if (TryEnsureCapacity())
{
myList[myCount++] = new(lifetime, task);
}
}
}

public Task<string>? TryGetDiagnostics(LifetimeDefinition definition)
{
lock (myList)
{
foreach (var (key, value) in myList)
{
if (ReferenceEquals(key, definition))
return value;
}

return null;
}
}

private bool TryEnsureCapacity()
{
lock (myList)
{
if (myCount < myList.Length)
return true;

var index = 0;
for (var i = 0; i < myList.Length; i++)
{
var value = myList[i];
if (value.Key is { } def && def.ExecutingCount != 0)
{
myList[index++] = value;
}
else
{
myList[i] = default;
}
}

myCount = index;
return index < myList.Length;
}
}
}
#endif



Expand Down
1 change: 1 addition & 0 deletions rd-net/RdFramework/Impl/ExtCreatedUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public static RdSignal<ExtCreationInfo> CreateExtSignal(this IRdDynamic @this)
);
var baseId = @this is IRdWireable wireable ? wireable.RdId : RdId.Nil;
signal.RdId = baseId.Mix(Protocol.ProtocolExtCreatedRdId);
signal.Async = true;
return signal;
}

Expand Down
64 changes: 58 additions & 6 deletions rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using JetBrains.Core;
Expand All @@ -9,7 +12,9 @@
using JetBrains.Lifetimes;
using NUnit.Framework;
#if !NET35
using System.Diagnostics;
using JetBrains.Threading;
using Microsoft.Diagnostics.Runtime;
#endif

// ReSharper disable MethodSupportsCancellation
Expand Down Expand Up @@ -217,8 +222,13 @@
{
const string expectedWarningText = "can't wait for `ExecuteIfAlive` completed on other thread";
const string expectedExceptionText = "ExecuteIfAlive after termination of";
bool warningReceived = false, exceptionReceived = false;
var warningReceived = false;
Exception? receivedException = null;

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (ubuntu-22.04)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (ubuntu-22.04)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (ubuntu-22.04)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (ubuntu-22.04)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (windows-2022)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (windows-2022)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (windows-2022)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (windows-2022)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (macos-12)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (macos-12)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (macos-12)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

Check warning on line 226 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (macos-12)

The annotation for nullable reference types should only be used in code within a '#nullable' annotations context.

#if !NET35
const string stackTraceHeader = "CurrentProcessThreadDumps:";
var executionWasNotCancelledByTimeoutReceived = false;
#endif

Lifetime.Using(lifetime =>
{
Expand All @@ -237,22 +247,27 @@
var lifetimeDefinition = lifetime.CreateNested();

var def2 = lifetime.CreateNested();

LifetimeDefinition.ExecutionWasNotCancelledByTimeout.Advise(lifetime, lf =>
#if !NET35
LifetimeDefinition.AdditionalDiagnostics = new LifetimeDefinition.AdditionalDiagnosticsInfo(false, async (lf) =>

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (ubuntu-22.04)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (ubuntu-22.04)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (ubuntu-22.04)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (ubuntu-22.04)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (windows-2022)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (windows-2022)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (windows-2022)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (windows-2022)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (macos-12)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (macos-12)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (macos-12)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.

Check warning on line 251 in rd-net/Test.Lifetimes/Lifetimes/LifetimeTest.cs

View workflow job for this annotation

GitHub Actions / build (macos-12)

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.
{
var stacks = GetCurrentProcessThreadDumps();
Assert.AreEqual(lifetimeDefinition.Lifetime, lf);
executionWasNotCancelledByTimeoutReceived = true;
return $"{stackTraceHeader}\n{stacks}";
});
#endif

def2.Terminate();
#if !NET35
Assert.IsFalse(executionWasNotCancelledByTimeoutReceived);
#endif

var lifetimeTerminatedEvent = new ManualResetEvent(false);
var backgroundThreadIsInTryExecuteEvent = new ManualResetEvent(false);
var thread = new Thread(() => lifetimeDefinition.Lifetime.TryExecute(() =>
{
backgroundThreadIsInTryExecuteEvent.Set();
lifetimeTerminatedEvent.WaitOne();
WaitForLifetimeTerminatedEvent(lifetimeTerminatedEvent);
}));
thread.Start();
backgroundThreadIsInTryExecuteEvent.WaitOne();
Expand All @@ -268,13 +283,50 @@
if (!e.Message.Contains(expectedExceptionText))
throw;

exceptionReceived = true;
receivedException = e;
}
});

Assert.IsTrue(warningReceived, "Warning `{0}` must have been logged", expectedWarningText);
Assert.IsTrue(exceptionReceived, "Exception `{0}` must have been logged", expectedExceptionText);
Assert.IsNotNull(receivedException, "Exception `{0}` must have been logged", expectedExceptionText);

#if !NET35
Assert.IsTrue(executionWasNotCancelledByTimeoutReceived);
Assert.IsTrue(receivedException.Message.Contains(stackTraceHeader), $"Exception `{expectedExceptionText}` doesn't contain {stackTraceHeader}");
Assert.IsTrue(receivedException.Message.Contains(nameof(WaitForLifetimeTerminatedEvent)), $"Exception `{expectedExceptionText}` doesn't contain {nameof(WaitForLifetimeTerminatedEvent)} method");

static string GetCurrentProcessThreadDumps()
{
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
// clrmd crashes the process if os is not Windows, so just return the name of the method
return nameof(WaitForLifetimeTerminatedEvent);
}

using var dataTarget = DataTarget.AttachToProcess(Process.GetCurrentProcess().Id, suspend: false);
var clrVersion = dataTarget.ClrVersions.SingleOrDefault() ?? throw new Exception("Failed to get single clr from current process");

using var runtime = clrVersion.CreateRuntime();
var output = new StringBuilder();
foreach (var clrThread in runtime.Threads)
{
if (!clrThread.IsAlive)
continue;
output.AppendLine($"Thread #{clrThread.ManagedThreadId}:");

foreach (var frame in clrThread.EnumerateStackTrace())
output.AppendLine($"\tat {frame}");
}

return output.ToString();
}
#endif
}

[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.NoOptimization)]
private static void WaitForLifetimeTerminatedEvent(ManualResetEvent lifetimeTerminatedEvent)
{
lifetimeTerminatedEvent.WaitOne();
}

[Test]
Expand Down
3 changes: 3 additions & 0 deletions rd-net/Test.Lifetimes/Test.Lifetimes.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,8 @@
<Version>0.12.1</Version>
</PackageReference>
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)' != 'net35'">
<PackageReference Include="Microsoft.Diagnostics.Runtime" Version="3.1.456101" />
</ItemGroup>

</Project>
Loading