Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[StackColoring] WRONG code #126252

Open
JonPsson1 opened this issue Feb 7, 2025 · 1 comment
Open

[StackColoring] WRONG code #126252

JonPsson1 opened this issue Feb 7, 2025 · 1 comment

Comments

@JonPsson1
Copy link
Contributor

JonPsson1 commented Feb 7, 2025

It seems that StackColoring marks a stack slot as dead even though it has its address taken and used.

The test case has enclosing braces and fails at -Os, but if I remove the braces the problem disapears:

wrong0.i: <> with braces removed

int printf(const char *, ...);                  int printf(const char *, ...);
char GlobCh0 = 0, Res = 0, GlobCh1 = 0;         char GlobCh0 = 0, Res = 0, GlobCh1 = 0;
int b = 0, g = 0;                               int b = 0, g = 0;
int *GlobIntPtr = 0;                            int *GlobIntPtr = 0;
long e = 0;                                     long e = 0;

void fun(int FunArg) {                          void fun(int FunArg) {
  char *FunPtr = &Res;                            char *FunPtr = &Res;
  *FunPtr = FunArg;                               *FunPtr = FunArg;
}                                               }

int main() {                                    int main() {
  {                                           |
    int MainInt0 = 0, MainInt1 = 0;                 int MainInt0 = 0, MainInt1 = 0;
    GlobCh1 = GlobCh0 + 1;                          GlobCh1 = GlobCh0 + 1;
    for (; b <= 0; b = GlobCh1) {                   for (; b <= 0; b = GlobCh1) {
      int *m = 0;                                     int *m = 0;
      int **o = &m;                                   int **o = &m;
      int ***n[7];                                    int ***n[7];
      n[g] = &o;                                      n[g] = &o;
      fun(((void*) (GlobIntPtr = &MainInt0))          fun(((void*) (GlobIntPtr = &MainInt0)) 
    }                                               }
    int p[7];                                       int p[7];
    for (; MainInt1 < 7; MainInt1++)                for (; MainInt1 < 7; MainInt1++)
      p[MainInt1] = 9;                                p[MainInt1] = 9;
    e = p[4];                                       e = p[4];
  }                                           |
  printf("%d\n", Res);                            printf("%d\n", Res);
}                                               }

clang -march=z15 -Os wrong0.i -o a.out

The generated IRs are identical, except for the lifetime.end intrinsics at the end:

for.end14:                                        ; preds = %   for.end14:                                        ; preds = %
  %arrayidx15 = getelementptr inbounds nuw i8, ptr %p, i64 16     %arrayidx15 = getelementptr inbounds nuw i8, ptr %p, i64 16
  %3 = load i32, ptr %arrayidx15, align 4, !tbaa !7               %3 = load i32, ptr %arrayidx15, align 4, !tbaa !7
  %conv16 = sext i32 %3 to i64                                    %conv16 = sext i32 %3 to i64
  store i64 %conv16, ptr @e, align 8, !tbaa !19                   store i64 %conv16, ptr @e, align 8, !tbaa !19
  call void @llvm.lifetime.end.p0(i64 28, ptr nonnull %p) #4  <
  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %MainInt0 <
  %4 = load i8, ptr @Res, align 2, !tbaa !4                       %4 = load i8, ptr @Res, align 2, !tbaa !4
  %conv17 = zext i8 %4 to i32                                     %conv17 = zext i8 %4 to i32
  %call = call signext i32 (ptr, ...) @printf(ptr noundef non     %call = call signext i32 (ptr, ...) @printf(ptr noundef non
                                                              >   call void @llvm.lifetime.end.p0(i64 28, ptr nonnull %p) #4
                                                              >   call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %MainInt0
  ret i32 0                                                       ret i32 0

This difference causes DSEPass to remove the initialization of MainInt0 with the reduced lifetime:

; *** IR Dump After DSEPass on main ***                                 ; *** IR Dump After DSEPass on main ***
; Function Attrs: nounwind optsize                                      ; Function Attrs: nounwind optsize
define dso_local signext i32 @main() local_unnamed_addr #2 {            define dso_local signext i32 @main() local_unnamed_addr #2 {
entry:                                                                  entry:
  %MainInt0 = alloca i32, align 4                                         %MainInt0 = alloca i32, align 4
  %m = alloca ptr, align 8                                                %m = alloca ptr, align 8
  %o = alloca ptr, align 8                                                %o = alloca ptr, align 8
  %n = alloca [7 x ptr], align 8                                          %n = alloca [7 x ptr], align 8
  %p = alloca [7 x i32], align 4                                          %p = alloca [7 x i32], align 4
  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %MainInt0) #4      call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %MainInt0) #4
                                                                     >    store i32 0, ptr %MainInt0, align 4, !tbaa !7
  %0 = load i8, ptr @GlobCh0, align 2, !tbaa !4                           %0 = load i8, ptr @GlobCh0, align 2, !tbaa !4
  %add = add i8 %0, 1                                                     %add = add i8 %0, 1

This is ok, as the only use of MainInt0 is to compare its address to another:

for.cond.for.end_crit_edge:                       ; preds = %for.bod    for.cond.for.end_crit_edge:                       ; preds = %for.bod
  %conv6 = zext i8 %add to i32                                            %conv6 = zext i8 %add to i32
  %cmp4 = icmp ne ptr %MainInt0, %2                                       %cmp4 = icmp ne ptr %MainInt0, %2
  %conv.i = zext i1 %cmp4 to i8                                           %conv.i = zext i1 %cmp4 to i8
  store ptr %MainInt0, ptr @GlobIntPtr, align 8, !tbaa !14                store ptr %MainInt0, ptr @GlobIntPtr, align 8, !tbaa !14
  store i8 %conv.i, ptr @Res, align 1, !tbaa !4                           store i8 %conv.i, ptr @Res, align 1, !tbaa !4
  store i32 %conv6, ptr @b, align 4, !tbaa !7                             store i32 %conv6, ptr @b, align 4, !tbaa !7
  br label %for.end                                                       br label %for.end

It then seems to me that this leads to problems with StackColoring, which marks the stack slot as dead and seemingly just replaces the use of it with another one, which can't be good:

# *** IR Dump After Merge disjoint stack slots (stack-coloring) ***:    # *** IR Dump After Merge disjoint stack slots (stack-coloring) ***:
# Machine code for function main: IsSSA, TracksLiveness                 # Machine code for function main: IsSSA, TracksLiveness
Frame Objects:                                                          Frame Objects:
  fi#0: dead                                                         |    fi#0: size=4, align=4, at location [SP]
  fi#1: size=8, align=8, at location [SP]                                 fi#1: size=8, align=8, at location [SP]
  fi#2: size=56, align=8, at location [SP]                                fi#2: size=56, align=8, at location [SP]

 ...

bb.2.for.body:                                                          bb.2.for.body:
; predecessors: %bb.1, %bb.2                                            ; predecessors: %bb.1, %bb.2
  successors: %bb.2(0x7c000000), %bb.3(0x04000000); %bb.2(96.88%), %      successors: %bb.2(0x7c000000), %bb.3(0x04000000); %bb.2(96.88%), %

  %15:gr64bit = LA %stack.1.o, 0, $noreg                                  %15:gr64bit = LA %stack.1.o, 0, $noreg
  STG killed %15:gr64bit, %1:addr64bit, 0, $noreg :: (store (s64) in      STG killed %15:gr64bit, %1:addr64bit, 0, $noreg :: (store (s64) in
  %2:gr64bit = LG %stack.2.n, 0, $noreg :: (dereferenceable load (s6      %2:gr64bit = LG %stack.2.n, 0, $noreg :: (dereferenceable load (s6
  TMLMux %0:grx32bit, 255, implicit-def $cc                               TMLMux %0:grx32bit, 255, implicit-def $cc
  BRC 15, 8, %bb.2, implicit $cc                                          BRC 15, 8, %bb.2, implicit $cc
  J %bb.3                                                                 J %bb.3

bb.3.for.cond.for.end_crit_edge:                                        bb.3.for.cond.for.end_crit_edge:
; predecessors: %bb.2                                                   ; predecessors: %bb.2
  successors: %bb.4(0x80000000); %bb.4(100.00%)                           successors: %bb.4(0x80000000); %bb.4(100.00%)

  %16:gr32bit = LLCRMux %0:grx32bit                                       %16:gr32bit = LLCRMux %0:grx32bit
  %17:gr64bit = LA %stack.1.o, 0, $noreg                             |    %17:gr64bit = LA %stack.0.MainInt0, 0, $noreg
  CGR %17:gr64bit, %2:gr64bit, implicit-def $cc                           CGR %17:gr64bit, %2:gr64bit, implicit-def $cc

It seems FI#0 should not be marked dead as there is a use of it, even just for comparing the final address with another.

@mohammed-nurulhoque @kazutakahirata @nikic @uweigand @fhahn

@efriedma-quic
Copy link
Collaborator

Your description sounds very similar to #45725.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants