Skip to content

Commit

Permalink
Ensure to use float reserved register for TYP_SIMD12 (#108629)
Browse files Browse the repository at this point in the history
* Use Float reserved registers

* add test case

* Use consistent internal register requirement for TYP_SIMD12

* Also do in loongarch and risc

* Use Extract to retrieve internal register for SIMD12
  • Loading branch information
kunalspathak authored Oct 10, 2024
1 parent f41e95c commit cb256e2
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17424,7 +17424,7 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu
else
{
// Extract upper 4-bytes from data
regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider);
regNumber tmpReg = codeGen->internalRegisters.Extract(tmpRegProvider);
emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2);

// 4-byte write
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ int LinearScan::BuildNode(GenTree* tree)
{
// We need an internal register different from targetReg in which 'tree' produces its result
// because both targetReg and internal reg will be in use at the same time.
buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
buildInternalIntRegisterDefForNode(tree);
setInternalRegsDelayFree = true;
buildInternalRegisterUses();
}
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4137,9 +4137,15 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
#ifdef FEATURE_SIMD
if (varTypeIsSIMD(storeLoc) && !op1->IsVectorZero() && (storeLoc->TypeGet() == TYP_SIMD12))
{
#ifdef TARGET_ARM64
// Need an additional register to extract upper 4 bytes of Vector3,
// it has to be float for x86.
buildInternalIntRegisterDefForNode(storeLoc);
#else
// Need an additional register to extract upper 4 bytes of Vector3,
// it has to be float for x86.
buildInternalFloatRegisterDefForNode(storeLoc, allSIMDRegs());
#endif // TARGET_ARM64
}
#endif // FEATURE_SIMD

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lsraloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ int LinearScan::BuildNode(GenTree* tree)
{
// We need an internal register different from targetReg in which 'tree' produces its result
// because both targetReg and internal reg will be in use at the same time.
buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
buildInternalIntRegisterDefForNode(tree);
setInternalRegsDelayFree = true;
buildInternalRegisterUses();
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lsrariscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ int LinearScan::BuildNode(GenTree* tree)
{
// We need an internal register different from targetReg in which 'tree' produces its result
// because both targetReg and internal reg will be in use at the same time.
buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
buildInternalIntRegisterDefForNode(tree);
setInternalRegsDelayFree = true;
buildInternalRegisterUses();
}
Expand Down
101 changes: 101 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_108609/Runtime_108609.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

// Found by Antigen
// Reduced from 50.05 KB to 4.88 KB.
// Assertion failed 'genExactlyOneBit(availableSet)' in 'TestClass:Method0():this' during 'Generate code' (IL size 782; hash 0x46e9aa75; FullOpts)
//
// File: /Users/runner/work/1/s/src/coreclr/jit/codegencommon.cpp Line: 133


using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Numerics;
using Xunit;

public class Runtime_108609
{
public struct S1
{
public Vector3 v3_0;
public short short_1;
}
static bool s_bool_4 = true;
static int s_int_10 = 5;
static sbyte s_sbyte_12 = -5;
static Vector64<int> s_v64_int_23 = Vector64<int>.Zero;
static Vector64<double> s_v64_double_28 = Vector64.Create(-4.956521739130435);
static Vector128<sbyte> s_v128_sbyte_30 = Vector128<sbyte>.Zero;
static Vector128<short> s_v128_short_31 = Vector128.Create(0, 2, -5, 0, 1, 0, 2, 2);
static Vector128<ulong> s_v128_ulong_36 = Vector128<ulong>.Zero;
static Vector128<double> s_v128_double_38 = Vector128.Create(1.0769230769230769, 72.01666666666667);
static Vector<ulong> s_v_ulong_46 = Vector.Create((ulong)1);
static Vector<float> s_v_float_47 = Vector<float>.Zero;
static S1 s_s1_52 = new S1();
char char_56 = 'K';
int int_60 = -1;
Vector64<int> v64_int_73 = Vector64.Create(2147483646, -1);
Vector64<double> v64_double_78 = Vector64.Create(-0.9420289855072463);
Vector128<sbyte> v128_sbyte_80 = Vector128.Create(-2, -2, -1, 0, 5, -1, -1, -1, 1, 2, -1, 0, -1, 72, -5, 0);
Vector128<short> v128_short_81 = Vector128.CreateScalar((short)-1);
Vector128<ulong> v128_ulong_86 = Vector128<ulong>.AllBitsSet;
Vector<ulong> v_ulong_96 = Vector.Create((ulong)1);
Vector<float> v_float_97 = Vector.Create(2f);
Vector3 v3_100 = Vector3.Create(0.09090909f, 2.4f, -0.94f);
S1 s1_102 = new S1();
static int s_loopInvariant = 4;
private static List<string> toPrint = new List<string>();
[MethodImpl(MethodImplOptions.NoInlining)]
private decimal LeafMethod3()
{
return 19;
}

private Vector<float> Method2(decimal p_decimal_130, decimal p_decimal_131, sbyte p_sbyte_132, out Vector128<ulong> p_v128_ulong_133, S1 p_s1_134)
{
p_v128_ulong_133 = s_v128_ulong_36;
return Vector<float>.Zero;
}

private Vector128<sbyte> Method3(sbyte p_sbyte_152, Vector<ulong> p_v_ulong_153, S1 p_s1_154, uint p_uint_155, S1 p_s1_156, decimal p_decimal_157, Vector64<double> p_v64_double_158)
{
unchecked
{
return s_v128_sbyte_30 = s_v128_sbyte_30;
}
}

private void Method0()
{
unchecked
{
S1 s1_218 = new S1();
if (!s_bool_4)
{
int __loopvar0 = s_loopInvariant;
for (int __loopSecondaryVar0_0 = 15 - 4; s_int_10 < Vector64.GetElement(s_v64_int_23 - Vector64<int>.Zero * (15 + 4) | (s_v64_int_23 += AdvSimd.Max(Vector64<int>.Zero, v64_int_73)), 15 & 4); __loopvar0--, __loopSecondaryVar0_0++, s_s1_52.short_1 = Vector128.Sum(AdvSimd.ShiftLogicalRoundedSaturate(Vector128<short>.AllBitsSet, v128_short_81) & Vector128.AsInt16(s_v128_double_38) - (15 | 4) * (v128_short_81 *= s_v128_short_31)))
{
v3_100 *= s1_218.v3_0 - (s1_218.v3_0 = s1_218.v3_0 = s1_218.v3_0 *= s1_218.v3_0) * Vector3.RadiansToDegrees(Vector3.Zero + s_s1_52.v3_0 + v3_100 * v3_100) - Vector3.Sin(s1_218.v3_0 *= v3_100 * (s1_218.v3_0 *= s1_218.v3_0) - v3_100 * s_s1_52.v3_0);
}
}
v_float_97 = Method2(15 % 4, LeafMethod3(), s_sbyte_12, out v128_ulong_86, s1_218);
v128_sbyte_80 = Method3(15 % 4, s_v_ulong_46 *= s_v_ulong_46 += v_ulong_96 = s_v_ulong_46 ^ v_ulong_96 * (15 - 4), s1_218, 15 & 4, s1_102, LeafMethod3(), v64_double_78 * s_v64_double_28 - (s_v64_double_28 = v64_double_78) | Vector64.GreaterThan(v64_double_78, v64_double_78) | (s_v64_double_28 = Vector64<double>.Zero + v64_double_78) | (s_v64_double_28 = v64_double_78) | Vector64.GreaterThan(v64_double_78, v64_double_78) | (s_v64_double_28 = Vector64<double>.Zero + v64_double_78) | s_v64_double_28);
return;
}
}

[Fact]
public static void TestEntryPoint()
{
Antigen();
}

private static int Antigen()
{
new Runtime_108609().Method0();
return string.Join(Environment.NewLine, toPrint).GetHashCode();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>

0 comments on commit cb256e2

Please sign in to comment.