llvm.org GIT mirror llvm / 45788be
Implement sext(C1 + C2*X) --> sext(C1) + sext(C2*X) and sext{C1,+,C2} --> sext(C1) + sext{0,+,C2} transformation in Scalar Evolution. That helps SLP-vectorizer to recognize consecutive loads/stores. <rdar://problem/14860614> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209568 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Zolotukhin 5 years ago
2 changed file(s) with 210 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
12001200 return getTruncateOrSignExtend(X, Ty);
12011201 }
12021202
1203 // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
1204 if (auto SA = dyn_cast(Op)) {
1205 if (SA->getNumOperands() == 2) {
1206 auto SC1 = dyn_cast(SA->getOperand(0));
1207 auto SMul = dyn_cast(SA->getOperand(1));
1208 if (SMul && SC1) {
1209 if (auto SC2 = dyn_cast(SMul->getOperand(0))) {
1210 APInt C1 = SC1->getValue()->getValue();
1211 APInt C2 = SC2->getValue()->getValue();
1212 APInt CDiff = C2 - C1;
1213 if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
1214 CDiff.isStrictlyPositive() && C2.isPowerOf2())
1215 return getAddExpr(getSignExtendExpr(SC1, Ty),
1216 getSignExtendExpr(SMul, Ty));
1217 }
1218 }
1219 }
1220 }
12031221 // If the input value is a chrec scev, and we can prove that the value
12041222 // did not overflow the old, smaller, value, we can sign extend all of the
12051223 // operands (often constants). This allows analysis of something like
12891307 return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
12901308 getSignExtendExpr(Step, Ty),
12911309 L, AR->getNoWrapFlags());
1310 }
1311 }
1312 // If Start and Step are constants, check if we can apply this
1313 // transformation:
1314 // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
1315 auto SC1 = dyn_cast(Start);
1316 auto SC2 = dyn_cast(Step);
1317 if (SC1 && SC2) {
1318 APInt C1 = SC1->getValue()->getValue();
1319 APInt C2 = SC2->getValue()->getValue();
1320 APInt CDiff = C2 - C1;
1321 if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
1322 CDiff.isStrictlyPositive() && C2.isPowerOf2()) {
1323 Start = getSignExtendExpr(Start, Ty);
1324 const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step,
1325 L, AR->getNoWrapFlags());
1326 return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
12921327 }
12931328 }
12941329 }
0 ; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
1 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-apple-macosx10.9.0"
3
4 @A = common global [2000 x double] zeroinitializer, align 16
5 @B = common global [2000 x double] zeroinitializer, align 16
6 @C = common global [2000 x float] zeroinitializer, align 16
7 @D = common global [2000 x float] zeroinitializer, align 16
8
9 ; Currently SCEV isn't smart enough to figure out that accesses
10 ; A[3*i], A[3*i+1] and A[3*i+2] are consecutive, but in future
11 ; that would hopefully be fixed. For now, check that this isn't
12 ; vectorized.
13 ; CHECK-LABEL: foo_3double
14 ; CHECK-NOT: x double>
15 ; Function Attrs: nounwind ssp uwtable
16 define void @foo_3double(i32 %u) #0 {
17 entry:
18 %u.addr = alloca i32, align 4
19 store i32 %u, i32* %u.addr, align 4
20 %mul = mul nsw i32 %u, 3
21 %idxprom = sext i32 %mul to i64
22 %arrayidx = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom
23 %0 = load double* %arrayidx, align 8
24 %arrayidx4 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom
25 %1 = load double* %arrayidx4, align 8
26 %add5 = fadd double %0, %1
27 store double %add5, double* %arrayidx, align 8
28 %add11 = add nsw i32 %mul, 1
29 %idxprom12 = sext i32 %add11 to i64
30 %arrayidx13 = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom12
31 %2 = load double* %arrayidx13, align 8
32 %arrayidx17 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom12
33 %3 = load double* %arrayidx17, align 8
34 %add18 = fadd double %2, %3
35 store double %add18, double* %arrayidx13, align 8
36 %add24 = add nsw i32 %mul, 2
37 %idxprom25 = sext i32 %add24 to i64
38 %arrayidx26 = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom25
39 %4 = load double* %arrayidx26, align 8
40 %arrayidx30 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom25
41 %5 = load double* %arrayidx30, align 8
42 %add31 = fadd double %4, %5
43 store double %add31, double* %arrayidx26, align 8
44 ret void
45 }
46
47 ; SCEV should be able to tell that accesses A[C1 + C2*i], A[C1 + C2*i], ...
48 ; A[C1 + C2*i] are consecutive, if C2 is a power of 2, and C2 > C1 > 0.
49 ; Thus, the following code should be vectorized.
50 ; CHECK-LABEL: foo_2double
51 ; CHECK: x double>
52 ; Function Attrs: nounwind ssp uwtable
53 define void @foo_2double(i32 %u) #0 {
54 entry:
55 %u.addr = alloca i32, align 4
56 store i32 %u, i32* %u.addr, align 4
57 %mul = mul nsw i32 %u, 2
58 %idxprom = sext i32 %mul to i64
59 %arrayidx = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom
60 %0 = load double* %arrayidx, align 8
61 %arrayidx4 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom
62 %1 = load double* %arrayidx4, align 8
63 %add5 = fadd double %0, %1
64 store double %add5, double* %arrayidx, align 8
65 %add11 = add nsw i32 %mul, 1
66 %idxprom12 = sext i32 %add11 to i64
67 %arrayidx13 = getelementptr inbounds [2000 x double]* @A, i32 0, i64 %idxprom12
68 %2 = load double* %arrayidx13, align 8
69 %arrayidx17 = getelementptr inbounds [2000 x double]* @B, i32 0, i64 %idxprom12
70 %3 = load double* %arrayidx17, align 8
71 %add18 = fadd double %2, %3
72 store double %add18, double* %arrayidx13, align 8
73 ret void
74 }
75
76 ; Similar to the previous test, but with different datatype.
77 ; CHECK-LABEL: foo_4float
78 ; CHECK: x float>
79 ; Function Attrs: nounwind ssp uwtable
80 define void @foo_4float(i32 %u) #0 {
81 entry:
82 %u.addr = alloca i32, align 4
83 store i32 %u, i32* %u.addr, align 4
84 %mul = mul nsw i32 %u, 4
85 %idxprom = sext i32 %mul to i64
86 %arrayidx = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom
87 %0 = load float* %arrayidx, align 4
88 %arrayidx4 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom
89 %1 = load float* %arrayidx4, align 4
90 %add5 = fadd float %0, %1
91 store float %add5, float* %arrayidx, align 4
92 %add11 = add nsw i32 %mul, 1
93 %idxprom12 = sext i32 %add11 to i64
94 %arrayidx13 = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom12
95 %2 = load float* %arrayidx13, align 4
96 %arrayidx17 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom12
97 %3 = load float* %arrayidx17, align 4
98 %add18 = fadd float %2, %3
99 store float %add18, float* %arrayidx13, align 4
100 %add24 = add nsw i32 %mul, 2
101 %idxprom25 = sext i32 %add24 to i64
102 %arrayidx26 = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom25
103 %4 = load float* %arrayidx26, align 4
104 %arrayidx30 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom25
105 %5 = load float* %arrayidx30, align 4
106 %add31 = fadd float %4, %5
107 store float %add31, float* %arrayidx26, align 4
108 %add37 = add nsw i32 %mul, 3
109 %idxprom38 = sext i32 %add37 to i64
110 %arrayidx39 = getelementptr inbounds [2000 x float]* @C, i32 0, i64 %idxprom38
111 %6 = load float* %arrayidx39, align 4
112 %arrayidx43 = getelementptr inbounds [2000 x float]* @D, i32 0, i64 %idxprom38
113 %7 = load float* %arrayidx43, align 4
114 %add44 = fadd float %6, %7
115 store float %add44, float* %arrayidx39, align 4
116 ret void
117 }
118
119 ; Similar to the previous tests, but now we are dealing with AddRec SCEV.
120 ; CHECK-LABEL: foo_loop
121 ; CHECK: x double>
122 ; Function Attrs: nounwind ssp uwtable
123 define i32 @foo_loop(double* %A, i32 %n) #0 {
124 entry:
125 %A.addr = alloca double*, align 8
126 %n.addr = alloca i32, align 4
127 %sum = alloca double, align 8
128 %i = alloca i32, align 4
129 store double* %A, double** %A.addr, align 8
130 store i32 %n, i32* %n.addr, align 4
131 store double 0.000000e+00, double* %sum, align 8
132 store i32 0, i32* %i, align 4
133 %cmp1 = icmp slt i32 0, %n
134 br i1 %cmp1, label %for.body.lr.ph, label %for.end
135
136 for.body.lr.ph: ; preds = %entry
137 br label %for.body
138
139 for.body: ; preds = %for.body.lr.ph, %for.body
140 %0 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
141 %1 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add7, %for.body ]
142 %mul = mul nsw i32 %0, 2
143 %idxprom = sext i32 %mul to i64
144 %arrayidx = getelementptr inbounds double* %A, i64 %idxprom
145 %2 = load double* %arrayidx, align 8
146 %mul1 = fmul double 7.000000e+00, %2
147 %add = add nsw i32 %mul, 1
148 %idxprom3 = sext i32 %add to i64
149 %arrayidx4 = getelementptr inbounds double* %A, i64 %idxprom3
150 %3 = load double* %arrayidx4, align 8
151 %mul5 = fmul double 7.000000e+00, %3
152 %add6 = fadd double %mul1, %mul5
153 %add7 = fadd double %1, %add6
154 store double %add7, double* %sum, align 8
155 %inc = add nsw i32 %0, 1
156 store i32 %inc, i32* %i, align 4
157 %cmp = icmp slt i32 %inc, %n
158 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
159
160 for.cond.for.end_crit_edge: ; preds = %for.body
161 %split = phi double [ %add7, %for.body ]
162 br label %for.end
163
164 for.end: ; preds = %for.cond.for.end_crit_edge, %entry
165 %.lcssa = phi double [ %split, %for.cond.for.end_crit_edge ], [ 0.000000e+00, %entry ]
166 %conv = fptosi double %.lcssa to i32
167 ret i32 %conv
168 }
169
170 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
171
172 !llvm.ident = !{!0}
173
174 !0 = metadata !{metadata !"clang version 3.5.0 "}