Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure to use float reserved register for TYP_SIMD12 #108629

Merged
merged 5 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17424,7 +17424,7 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu
else
{
// Extract upper 4-bytes from data
regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider);
regNumber tmpReg = codeGen->internalRegisters.Extract(tmpRegProvider);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, this function checks whether we don't have any spare temps at all - it does the store using two shuffles without temps, otherwise, do a simpler routine with temp, but looks like in some cases we over-allocate temps for it? Given that diffs are fine I guess it's ok

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, we may have more than 1 temps and Extract make sure to get the first available and remove it from the pool.

emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2);

// 4-byte write
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ int LinearScan::BuildNode(GenTree* tree)
{
// We need an internal register different from targetReg in which 'tree' produces its result
// because both targetReg and internal reg will be in use at the same time.
buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
buildInternalIntRegisterDefForNode(tree);
setInternalRegsDelayFree = true;
buildInternalRegisterUses();
}
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4137,9 +4137,15 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
#ifdef FEATURE_SIMD
if (varTypeIsSIMD(storeLoc) && !op1->IsVectorZero() && (storeLoc->TypeGet() == TYP_SIMD12))
{
#ifdef TARGET_ARM64
// Need an additional register to extract upper 4 bytes of Vector3,
// it has to be float for x86.
buildInternalIntRegisterDefForNode(storeLoc);
#else
// Need an additional register to extract upper 4 bytes of Vector3,
// it has to be float for x86.
buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs());
#endif // TARGET_ARM64
}
#endif // FEATURE_SIMD

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lsraloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ int LinearScan::BuildNode(GenTree* tree)
{
// We need an internal register different from targetReg in which 'tree' produces its result
// because both targetReg and internal reg will be in use at the same time.
buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
buildInternalIntRegisterDefForNode(tree);
setInternalRegsDelayFree = true;
buildInternalRegisterUses();
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lsrariscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ int LinearScan::BuildNode(GenTree* tree)
{
// We need an internal register different from targetReg in which 'tree' produces its result
// because both targetReg and internal reg will be in use at the same time.
buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
buildInternalIntRegisterDefForNode(tree);
setInternalRegsDelayFree = true;
buildInternalRegisterUses();
}
Expand Down
101 changes: 101 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_108609/Runtime_108609.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

// Found by Antigen
// Reduced from 50.05 KB to 4.88 KB.
// Assertion failed 'genExactlyOneBit(availableSet)' in 'TestClass:Method0():this' during 'Generate code' (IL size 782; hash 0x46e9aa75; FullOpts)
//
// File: /Users/runner/work/1/s/src/coreclr/jit/codegencommon.cpp Line: 133


using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Numerics;
using Xunit;

public class Runtime_108609
{
public struct S1
{
public Vector3 v3_0;
public short short_1;
}
static bool s_bool_4 = true;
static int s_int_10 = 5;
static sbyte s_sbyte_12 = -5;
static Vector64<int> s_v64_int_23 = Vector64<int>.Zero;
static Vector64<double> s_v64_double_28 = Vector64.Create(-4.956521739130435);
static Vector128<sbyte> s_v128_sbyte_30 = Vector128<sbyte>.Zero;
static Vector128<short> s_v128_short_31 = Vector128.Create(0, 2, -5, 0, 1, 0, 2, 2);
static Vector128<ulong> s_v128_ulong_36 = Vector128<ulong>.Zero;
static Vector128<double> s_v128_double_38 = Vector128.Create(1.0769230769230769, 72.01666666666667);
static Vector<ulong> s_v_ulong_46 = Vector.Create((ulong)1);
static Vector<float> s_v_float_47 = Vector<float>.Zero;
static S1 s_s1_52 = new S1();
char char_56 = 'K';
int int_60 = -1;
Vector64<int> v64_int_73 = Vector64.Create(2147483646, -1);
Vector64<double> v64_double_78 = Vector64.Create(-0.9420289855072463);
Vector128<sbyte> v128_sbyte_80 = Vector128.Create(-2, -2, -1, 0, 5, -1, -1, -1, 1, 2, -1, 0, -1, 72, -5, 0);
Vector128<short> v128_short_81 = Vector128.CreateScalar((short)-1);
Vector128<ulong> v128_ulong_86 = Vector128<ulong>.AllBitsSet;
Vector<ulong> v_ulong_96 = Vector.Create((ulong)1);
Vector<float> v_float_97 = Vector.Create(2f);
Vector3 v3_100 = Vector3.Create(0.09090909f, 2.4f, -0.94f);
S1 s1_102 = new S1();
static int s_loopInvariant = 4;
private static List<string> toPrint = new List<string>();
[MethodImpl(MethodImplOptions.NoInlining)]
private decimal LeafMethod3()
{
return 19;
}

private Vector<float> Method2(decimal p_decimal_130, decimal p_decimal_131, sbyte p_sbyte_132, out Vector128<ulong> p_v128_ulong_133, S1 p_s1_134)
{
p_v128_ulong_133 = s_v128_ulong_36;
return Vector<float>.Zero;
}

private Vector128<sbyte> Method3(sbyte p_sbyte_152, Vector<ulong> p_v_ulong_153, S1 p_s1_154, uint p_uint_155, S1 p_s1_156, decimal p_decimal_157, Vector64<double> p_v64_double_158)
{
unchecked
{
return s_v128_sbyte_30 = s_v128_sbyte_30;
}
}

private void Method0()
{
unchecked
{
S1 s1_218 = new S1();
if (!s_bool_4)
{
int __loopvar0 = s_loopInvariant;
for (int __loopSecondaryVar0_0 = 15 - 4; s_int_10 < Vector64.GetElement(s_v64_int_23 - Vector64<int>.Zero * (15 + 4) | (s_v64_int_23 += AdvSimd.Max(Vector64<int>.Zero, v64_int_73)), 15 & 4); __loopvar0--, __loopSecondaryVar0_0++, s_s1_52.short_1 = Vector128.Sum(AdvSimd.ShiftLogicalRoundedSaturate(Vector128<short>.AllBitsSet, v128_short_81) & Vector128.AsInt16(s_v128_double_38) - (15 | 4) * (v128_short_81 *= s_v128_short_31)))
{
v3_100 *= s1_218.v3_0 - (s1_218.v3_0 = s1_218.v3_0 = s1_218.v3_0 *= s1_218.v3_0) * Vector3.RadiansToDegrees(Vector3.Zero + s_s1_52.v3_0 + v3_100 * v3_100) - Vector3.Sin(s1_218.v3_0 *= v3_100 * (s1_218.v3_0 *= s1_218.v3_0) - v3_100 * s_s1_52.v3_0);
}
}
v_float_97 = Method2(15 % 4, LeafMethod3(), s_sbyte_12, out v128_ulong_86, s1_218);
v128_sbyte_80 = Method3(15 % 4, s_v_ulong_46 *= s_v_ulong_46 += v_ulong_96 = s_v_ulong_46 ^ v_ulong_96 * (15 - 4), s1_218, 15 & 4, s1_102, LeafMethod3(), v64_double_78 * s_v64_double_28 - (s_v64_double_28 = v64_double_78) | Vector64.GreaterThan(v64_double_78, v64_double_78) | (s_v64_double_28 = Vector64<double>.Zero + v64_double_78) | (s_v64_double_28 = v64_double_78) | Vector64.GreaterThan(v64_double_78, v64_double_78) | (s_v64_double_28 = Vector64<double>.Zero + v64_double_78) | s_v64_double_28);
return;
}
}

[Fact]
public static void TestEntryPoint()
{
Antigen();
}

private static int Antigen()
{
new Runtime_108609().Method0();
return string.Join(Environment.NewLine, toPrint).GetHashCode();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>
Loading