llvm.org GIT mirror llvm / f11cae6
Extend trip count instead of truncating IV in LFTR, when legal When legal, extending trip count in the loop control logic generates better code compared to truncating IV. This is because (1) extending trip count is a loop invariant operation (see genLoopLimit where we prove trip count is loop invariant). (2) Scalar Evolution seems to have problems understanding trunc when computing loop trip count. So removing them allows better analysis performed in Scalar Evolution. (In particular this fixes PR 28363 which is the motivation for this change). I am not going to perform any performance test. Any degradation caused by this should be an indication of a bug elsewhere. To prove legality, we rely on SCEV to prove zext(trunc(IV)) == IV (or similarly for sext). If this holds, we can prove equivalence of trunc(IV)==ExitCnt (1) and IV == zext(ExitCnt). Simply take zext of boths sides of (1) and apply the proven equivalence. This commit contains changes in a newly added testcase which was not included in the previous commit (which was reverted later on). https://reviews.llvm.org/D23075 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278421 91177308-0d34-0410-b5e6-96231b3b80d8 Ehsan Amiri 3 years ago
5 changed file(s) with 196 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
19881988
19891989 DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
19901990 } else {
1991
1991 // We try to extend trip count first. If that doesn't work we truncate IV.
1992 // Zext(trunc(IV)) == IV implies equivalence of the following two:
1993 // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
1994 // one of the two holds, extend the trip count, otherwise we truncate IV.
1995 bool Extended = false;
1996 const SCEV *IV = SE->getSCEV(CmpIndVar);
1997 const SCEV *ZExtTrunc =
1998 SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1999 ExitCnt->getType()),
2000 CmpIndVar->getType());
2001
2002 if (ZExtTrunc == IV) {
2003 Extended = true;
2004 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2005 "wide.trip.count");
2006 } else {
2007 const SCEV *SExtTrunc =
2008 SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2009 ExitCnt->getType()),
2010 CmpIndVar->getType());
2011 if (SExtTrunc == IV) {
2012 Extended = true;
2013 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2014 "wide.trip.count");
2015 }
2016 }
2017
2018 if (!Extended)
19922019 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
19932020 "lftr.wideiv");
19942021 }
3939 %precond = icmp sgt i32 %limit, %init
4040 br i1 %precond, label %loop, label %return
4141 ; CHECK: loop:
42 ; CHECK-NOT: sext
43 ; CHECK: wide.trip.count = sext
4244 ; CHECK-NOT: sext
4345 ; CHECK: exit:
4446 loop:
None ; RUN: opt < %s -indvars -S | FileCheck %s
0 ; RUN: opt < %s -indvars -S | FileCheck %s --implicit-check-not sext --implicit-check-not zext
11
22 target datalayout = "p:64:64:64-n32:64"
33
66 ; the IV is considered signed or unsigned.
77 define void @foo(i32* %A, i32* %B, i32* %C, i32 %N) {
88 ; CHECK-LABEL: @foo(
9 ; CHECK-NOT: zext
10 ; CHECK-NOT: sext
9 ; CHECK: wide.trip.count = zext
10 ; CHECK: ret void
1111 entry:
1212 %cmp1 = icmp slt i32 0, %N
1313 br i1 %cmp1, label %for.body.lr.ph, label %for.end
4444
4545 define void @foo1(i32* %A, i32* %B, i32* %C, i32 %N) {
4646 ; CHECK-LABEL: @foo1(
47 ; CHECK-NOT: zext
48 ; CHECK-NOT: sext
47 ; CHECK: wide.trip.count = zext
48 ; CHECK: ret void
4949 entry:
5050 %cmp1 = icmp slt i32 0, %N
5151 br i1 %cmp1, label %for.body.lr.ph, label %for.end
0 ; RUN: opt -S -indvars < %s | FileCheck %s
1
2 ; Provide legal integer types.
3 target datalayout = "n8:16:32:64"
4
5
6 define void @test1(float* %autoc,
7 float* %data,
8 float %d, i32 %data_len, i32 %sample) nounwind {
9 entry:
10 %sub = sub i32 %data_len, %sample
11 %cmp4 = icmp eq i32 %data_len, %sample
12 br i1 %cmp4, label %for.end, label %for.body
13
14 for.body: ; preds = %entry, %for.body
15 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 68719476736, %entry ]
16 %temp = trunc i64 %indvars.iv to i32
17 %add = add i32 %temp, %sample
18 %idxprom = zext i32 %add to i64
19 %arrayidx = getelementptr inbounds float, float* %data, i64 %idxprom
20 %temp1 = load float, float* %arrayidx, align 4
21 %mul = fmul float %temp1, %d
22 %arrayidx2 = getelementptr inbounds float, float* %autoc, i64 %indvars.iv
23 %temp2 = load float, float* %arrayidx2, align 4
24 %add3 = fadd float %temp2, %mul
25 store float %add3, float* %arrayidx2, align 4
26 %indvars.iv.next = add i64 %indvars.iv, 1
27 %temp3 = trunc i64 %indvars.iv.next to i32
28 %cmp = icmp ult i32 %temp3, %sub
29 br i1 %cmp, label %for.body, label %for.end
30
31 for.end: ; preds = %for.body, %entry
32 ret void
33
34 ; CHECK-LABEL: @test1(
35
36 ; With the given initial value for IV, it is not legal to widen
37 ; trip count to IV size
38 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
39 ; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
40 ; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub
41 ; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit
42 }
43
44 define float @test2(float* %a,
45 float* %b,
46 i32 zeroext %m) local_unnamed_addr #0 {
47 entry:
48 %cmp5 = icmp ugt i32 %m, 500
49 br i1 %cmp5, label %for.body.preheader, label %for.end
50
51 for.body.preheader: ; preds = %entry
52 br label %for.body
53
54 for.body: ; preds = %for.body.preheader, %for.body
55 %sum.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
56 %i.06 = phi i32 [ %inc, %for.body ], [ 500, %for.body.preheader ]
57 %idxprom = zext i32 %i.06 to i64
58 %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom
59 %temp = load float, float* %arrayidx, align 4
60 %arrayidx2 = getelementptr inbounds float, float* %a, i64 %idxprom
61 %temp1 = load float, float* %arrayidx2, align 4
62 %mul = fmul float %temp, %temp1
63 %add = fadd float %sum.07, %mul
64 %inc = add i32 %i.06, 1
65 %cmp = icmp ult i32 %inc, %m
66 br i1 %cmp, label %for.body, label %for.end.loopexit
67
68 for.end.loopexit: ; preds = %for.body
69 br label %for.end
70
71 for.end: ; preds = %for.end.loopexit, %entry
72 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.end.loopexit ]
73 ret float %sum.0.lcssa
74
75 ; CHECK-LABEL: @test2(
76 ; Trip count should be widened and LFTR should canonicalize the condition
77 ; CHECK: %wide.trip.count = zext
78 ; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
79 ; CHECK: br i1 %exitcond
80 }
81
82 define float @test3(float* %b,
83 i32 signext %m) local_unnamed_addr #0 {
84 entry:
85 %cmp5 = icmp sgt i32 %m, -10
86 br i1 %cmp5, label %for.body.preheader, label %for.end
87
88 for.body.preheader: ; preds = %entry
89 br label %for.body
90
91 for.body: ; preds = %for.body.preheader, %for.body
92 %sum.07 = phi float [ %add1, %for.body ], [ 0.000000e+00, %for.body.preheader ]
93 %i.06 = phi i32 [ %inc, %for.body ], [ -10, %for.body.preheader ]
94 %add = add nsw i32 %i.06, 20
95 %idxprom = sext i32 %add to i64
96 %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom
97 %temp = load float, float* %arrayidx, align 4
98 %conv = sitofp i32 %i.06 to float
99 %mul = fmul float %conv, %temp
100 %add1 = fadd float %sum.07, %mul
101 %inc = add nsw i32 %i.06, 1
102 %cmp = icmp slt i32 %inc, %m
103 br i1 %cmp, label %for.body, label %for.end.loopexit
104
105 for.end.loopexit: ; preds = %for.body
106 br label %for.end
107
108 for.end: ; preds = %for.end.loopexit, %entry
109 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add1, %for.end.loopexit ]
110 ret float %sum.0.lcssa
111
112 ; CHECK-LABEL: @test3(
113 ; Trip count should be widened and LFTR should canonicalize the condition
114 ; CHECK: %wide.trip.count = sext
115 ; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
116 ; CHECK: br i1 %exitcond
117 }
118
119 define float @test4(float* %b,
120 i32 signext %m) local_unnamed_addr #0 {
121 entry:
122 %cmp5 = icmp sgt i32 %m, 10
123 br i1 %cmp5, label %for.body.preheader, label %for.end
124
125 for.body.preheader: ; preds = %entry
126 br label %for.body
127
128 for.body: ; preds = %for.body.preheader, %for.body
129 %sum.07 = phi float [ %add1, %for.body ], [ 0.000000e+00, %for.body.preheader ]
130 %i.06 = phi i32 [ %inc, %for.body ], [ 10, %for.body.preheader ]
131 %add = add nsw i32 %i.06, 20
132 %idxprom = sext i32 %add to i64
133 %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom
134 %temp = load float, float* %arrayidx, align 4
135 %conv = sitofp i32 %i.06 to float
136 %mul = fmul float %conv, %temp
137 %add1 = fadd float %sum.07, %mul
138 %inc = add nsw i32 %i.06, 1
139 %cmp = icmp slt i32 %inc, %m
140 br i1 %cmp, label %for.body, label %for.end.loopexit
141
142 for.end.loopexit: ; preds = %for.body
143 %add1.lcssa = phi float [ %add1, %for.body ]
144 br label %for.end
145
146 for.end: ; preds = %for.end.loopexit, %entry
147 %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add1.lcssa, %for.end.loopexit ]
148 ret float %sum.0.lcssa
149
150 ; CHECK-LABEL: @test4(
151 ; Trip count should be widened and LFTR should canonicalize the condition
152 ; CHECK: %wide.trip.count = zext
153 ; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
154 ; CHECK: br i1 %exitcond
155 }
156
157
3232 ; CHECK-LABEL: @test1(
3333
3434 ; check that we turn the IV test into an eq.
35 ; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
36 ; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %su
35 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
36 ; CHECK: %wide.trip.count = zext i32 %sub to i64
37 ; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
3738 ; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit
3839 }
3940