llvm.org GIT mirror llvm / fae3cc4
[AggressiveInstCombine] convert rotate with guard branch into funnel shift (PR34924) Now, that we have funnel shift intrinsics, it should be safe to convert this form of rotate to it. In the worst case (a target that doesn't have rotate instructions), we will expand this into a branch-less sequence of ALU ops (neg/and/and/lshr/shl/or) in the backend, so it's still very likely to be a perf improvement over the original code. The motivating source code pattern for this is shown in: https://bugs.llvm.org/show_bug.cgi?id=34924 Background: I looked at several different options before deciding where to try this - instcombine, simplifycfg, CGP - because it doesn't fit cleanly anywhere AFAIK. The backend (CGP, SDAG, GlobalIsel?) is too late for what we're trying to accomplish. We want to have the IR converted before we reach things like vectorization because the reduced code can make a loop much simpler to transform. Technically, this could be included in instcombine, but it's a large pattern match that includes control-flow, so it just felt wrong to stuff into there (although I have a draft of that patch). Similarly, this could be part of simplifycfg, but all of this pattern matching is a stretch. So we're left with our relatively new dumping ground for homeless transforms: aggressive-instcombine. This only runs at -O3, but that seems like a reasonable limitation given that source code has many options to avoid this pattern (including the recently added clang intrinsics for rotates). I'm including a PhaseOrdering test because we require the teamwork of 3 passes (aggressive-instcombine, instcombine, simplifycfg) to get this into the minimal IR form that we want. That test shows a bug with the new pass manager that's independent of this change (but it will be masked if we canonicalize harder to funnel shift intrinsics in instcombine). Differential Revision: https://reviews.llvm.org/D55604 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349396 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 9 months ago
3 changed file(s) with 410 addition(s) and 76 deletion(s). Raw diff Collapse all Expand all
5757 bool runOnFunction(Function &F) override;
5858 };
5959 } // namespace
60
61 /// Match a pattern for a bitwise rotate operation that partially guards
62 /// against undefined behavior by branching around the rotation when the shift
63 /// amount is 0.
64 static bool foldGuardedRotateToFunnelShift(Instruction &I) {
65 if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
66 return false;
67
68 // As with the one-use checks below, this is not strictly necessary, but we
69 // are being cautious to avoid potential perf regressions on targets that
70 // do not actually have a rotate instruction (where the funnel shift would be
71 // expanded back into math/shift/logic ops).
72 if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
73 return false;
74
75 // Match V to funnel shift left/right and capture the source operand and
76 // shift amount in X and Y.
77 auto matchRotate = [](Value *V, Value *&X, Value *&Y) {
78 Value *L0, *L1, *R0, *R1;
79 unsigned Width = V->getType()->getScalarSizeInBits();
80 auto Sub = m_Sub(m_SpecificInt(Width), m_Value(R1));
81
82 // rotate_left(X, Y) == (X << Y) | (X >> (Width - Y))
83 auto RotL = m_OneUse(m_c_Or(m_Shl(m_Value(L0), m_Value(L1)),
84 m_LShr(m_Value(R0), Sub)));
85 if (RotL.match(V) && L0 == R0 && L1 == R1) {
86 X = L0;
87 Y = L1;
88 return Intrinsic::fshl;
89 }
90
91 // rotate_right(X, Y) == (X >> Y) | (X << (Width - Y))
92 auto RotR = m_OneUse(m_c_Or(m_LShr(m_Value(L0), m_Value(L1)),
93 m_Shl(m_Value(R0), Sub)));
94 if (RotR.match(V) && L0 == R0 && L1 == R1) {
95 X = L0;
96 Y = L1;
97 return Intrinsic::fshr;
98 }
99
100 return Intrinsic::not_intrinsic;
101 };
102
103 // One phi operand must be a rotate operation, and the other phi operand must
104 // be the source value of that rotate operation:
105 // phi [ rotate(RotSrc, RotAmt), RotBB ], [ RotSrc, GuardBB ]
106 PHINode &Phi = cast(I);
107 Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
108 Value *RotSrc, *RotAmt;
109 Intrinsic::ID IID = matchRotate(P0, RotSrc, RotAmt);
110 if (IID == Intrinsic::not_intrinsic || RotSrc != P1) {
111 IID = matchRotate(P1, RotSrc, RotAmt);
112 if (IID == Intrinsic::not_intrinsic || RotSrc != P0)
113 return false;
114 assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
115 "Pattern must match funnel shift left or right");
116 }
117
118 // The incoming block with our source operand must be the "guard" block.
119 // That must contain a cmp+branch to avoid the rotate when the shift amount
120 // is equal to 0. The other incoming block is the block with the rotate.
121 BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1);
122 BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1);
123 Instruction *TermI = GuardBB->getTerminator();
124 BasicBlock *TrueBB, *FalseBB;
125 ICmpInst::Predicate Pred;
126 if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()),
127 TrueBB, FalseBB)))
128 return false;
129
130 BasicBlock *PhiBB = Phi.getParent();
131 if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB)
132 return false;
133
134 // We matched a variation of this IR pattern:
135 // GuardBB:
136 // %cmp = icmp eq i32 %RotAmt, 0
137 // br i1 %cmp, label %PhiBB, label %RotBB
138 // RotBB:
139 // %sub = sub i32 32, %RotAmt
140 // %shr = lshr i32 %X, %sub
141 // %shl = shl i32 %X, %RotAmt
142 // %rot = or i32 %shr, %shl
143 // br label %PhiBB
144 // PhiBB:
145 // %cond = phi i32 [ %rot, %RotBB ], [ %X, %GuardBB ]
146 // -->
147 // llvm.fshl.i32(i32 %X, i32 %RotAmt)
148 IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
149 Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
150 Phi.replaceAllUsesWith(Builder.CreateCall(F, {RotSrc, RotSrc, RotAmt}));
151 return true;
152 }
60153
61154 /// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
62155 /// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
173266 // Also, we want to avoid matching partial patterns.
174267 // TODO: It would be more efficient if we removed dead instructions
175268 // iteratively in this loop rather than waiting until the end.
176 for (Instruction &I : make_range(BB.rbegin(), BB.rend()))
269 for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
177270 MadeChange |= foldAnyOrAllBitsSet(I);
271 MadeChange |= foldGuardedRotateToFunnelShift(I);
272 }
178273 }
179274
180275 // We're done with transforms, so remove dead instructions.
88 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
99 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
1010 ; CHECK: rotbb:
11 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
12 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
13 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]]
14 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
15 ; CHECK-NEXT: br label [[END]]
16 ; CHECK: end:
17 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ]
18 ; CHECK-NEXT: ret i32 [[COND]]
11 ; CHECK-NEXT: br label [[END]]
12 ; CHECK: end:
13 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
14 ; CHECK-NEXT: ret i32 [[TMP0]]
1915 ;
2016 entry:
2117 %cmp = icmp eq i32 %b, 0
3935 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
4036 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
4137 ; CHECK: rotbb:
42 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
43 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
44 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]]
45 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
46 ; CHECK-NEXT: br label [[END]]
47 ; CHECK: end:
48 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
49 ; CHECK-NEXT: ret i32 [[COND]]
38 ; CHECK-NEXT: br label [[END]]
39 ; CHECK: end:
40 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
41 ; CHECK-NEXT: ret i32 [[TMP0]]
5042 ;
5143 entry:
5244 %cmp = icmp eq i32 %b, 0
7062 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
7163 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
7264 ; CHECK: rotbb:
73 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
74 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
75 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]]
76 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
77 ; CHECK-NEXT: br label [[END]]
78 ; CHECK: end:
79 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
80 ; CHECK-NEXT: ret i32 [[COND]]
65 ; CHECK-NEXT: br label [[END]]
66 ; CHECK: end:
67 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
68 ; CHECK-NEXT: ret i32 [[TMP0]]
8169 ;
8270 entry:
8371 %cmp = icmp eq i32 %b, 0
10391 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
10492 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
10593 ; CHECK: rotbb:
106 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
107 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
108 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]]
109 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
110 ; CHECK-NEXT: br label [[END]]
111 ; CHECK: end:
112 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ]
113 ; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[ROTBB]] ], [ 2, [[ENTRY]] ]
114 ; CHECK-NEXT: [[RES:%.*]] = or i32 [[COND]], [[OTHER]]
94 ; CHECK-NEXT: br label [[END]]
95 ; CHECK: end:
96 ; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[ROTBB]] ], [ 2, [[ENTRY:%.*]] ]
97 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
98 ; CHECK-NEXT: [[RES:%.*]] = or i32 [[TMP0]], [[OTHER]]
11599 ; CHECK-NEXT: ret i32 [[RES]]
116100 ;
117101 entry:
138122 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
139123 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
140124 ; CHECK: rotbb:
141 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
142 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
143 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
144 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
145 ; CHECK-NEXT: br label [[END]]
146 ; CHECK: end:
147 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ]
148 ; CHECK-NEXT: ret i32 [[COND]]
125 ; CHECK-NEXT: br label [[END]]
126 ; CHECK: end:
127 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
128 ; CHECK-NEXT: ret i32 [[TMP0]]
149129 ;
150130 entry:
151131 %cmp = icmp eq i32 %b, 0
169149 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
170150 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
171151 ; CHECK: rotbb:
172 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
173 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
174 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
175 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
176 ; CHECK-NEXT: br label [[END]]
177 ; CHECK: end:
178 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
179 ; CHECK-NEXT: ret i32 [[COND]]
152 ; CHECK-NEXT: br label [[END]]
153 ; CHECK: end:
154 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
155 ; CHECK-NEXT: ret i32 [[TMP0]]
180156 ;
181157 entry:
182158 %cmp = icmp eq i32 %b, 0
200176 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
201177 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
202178 ; CHECK: rotbb:
179 ; CHECK-NEXT: br label [[END]]
180 ; CHECK: end:
181 ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
182 ; CHECK-NEXT: ret i32 [[TMP0]]
183 ;
184 entry:
185 %cmp = icmp eq i32 %b, 0
186 br i1 %cmp, label %end, label %rotbb
187
188 rotbb:
189 %sub = sub i32 32, %b
190 %shl = shl i32 %a, %sub
191 %shr = lshr i32 %a, %b
192 %or = or i32 %shl, %shr
193 br label %end
194
195 end:
196 %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
197 ret i32 %cond
198 }
199
200 ; Negative test - non-power-of-2 might require urem expansion in the backend.
201
202 define i12 @could_be_rotr_weird_type(i12 %a, i12 %b) {
203 ; CHECK-LABEL: @could_be_rotr_weird_type(
204 ; CHECK-NEXT: entry:
205 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i12 [[B:%.*]], 0
206 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
207 ; CHECK: rotbb:
208 ; CHECK-NEXT: [[SUB:%.*]] = sub i12 12, [[B]]
209 ; CHECK-NEXT: [[SHL:%.*]] = shl i12 [[A:%.*]], [[SUB]]
210 ; CHECK-NEXT: [[SHR:%.*]] = lshr i12 [[A]], [[B]]
211 ; CHECK-NEXT: [[OR:%.*]] = or i12 [[SHL]], [[SHR]]
212 ; CHECK-NEXT: br label [[END]]
213 ; CHECK: end:
214 ; CHECK-NEXT: [[COND:%.*]] = phi i12 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
215 ; CHECK-NEXT: ret i12 [[COND]]
216 ;
217 entry:
218 %cmp = icmp eq i12 %b, 0
219 br i1 %cmp, label %end, label %rotbb
220
221 rotbb:
222 %sub = sub i12 12, %b
223 %shl = shl i12 %a, %sub
224 %shr = lshr i12 %a, %b
225 %or = or i12 %shl, %shr
226 br label %end
227
228 end:
229 %cond = phi i12 [ %a, %entry ], [ %or, %rotbb ]
230 ret i12 %cond
231 }
232
233 ; Negative test - wrong phi ops.
234
235 define i32 @not_rotr_1(i32 %a, i32 %b) {
236 ; CHECK-LABEL: @not_rotr_1(
237 ; CHECK-NEXT: entry:
238 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
239 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
240 ; CHECK: rotbb:
203241 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
204242 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
205243 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
206244 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
207245 ; CHECK-NEXT: br label [[END]]
208246 ; CHECK: end:
247 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
248 ; CHECK-NEXT: ret i32 [[COND]]
249 ;
250 entry:
251 %cmp = icmp eq i32 %b, 0
252 br i1 %cmp, label %end, label %rotbb
253
254 rotbb:
255 %sub = sub i32 32, %b
256 %shl = shl i32 %a, %sub
257 %shr = lshr i32 %a, %b
258 %or = or i32 %shl, %shr
259 br label %end
260
261 end:
262 %cond = phi i32 [ %b, %entry ], [ %or, %rotbb ]
263 ret i32 %cond
264 }
265
266 ; Negative test - too many phi ops.
267
268 define i32 @not_rotr_2(i32 %a, i32 %b, i32 %c) {
269 ; CHECK-LABEL: @not_rotr_2(
270 ; CHECK-NEXT: entry:
271 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
272 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
273 ; CHECK: rotbb:
274 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
275 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
276 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
277 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
278 ; CHECK-NEXT: [[CMP42:%.*]] = icmp ugt i32 [[OR]], 42
279 ; CHECK-NEXT: br i1 [[CMP42]], label [[END]], label [[BOGUS:%.*]]
280 ; CHECK: bogus:
281 ; CHECK-NEXT: br label [[END]]
282 ; CHECK: end:
283 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ], [ [[C:%.*]], [[BOGUS]] ]
284 ; CHECK-NEXT: ret i32 [[COND]]
285 ;
286 entry:
287 %cmp = icmp eq i32 %b, 0
288 br i1 %cmp, label %end, label %rotbb
289
290 rotbb:
291 %sub = sub i32 32, %b
292 %shl = shl i32 %a, %sub
293 %shr = lshr i32 %a, %b
294 %or = or i32 %shl, %shr
295 %cmp42 = icmp ugt i32 %or, 42
296 br i1 %cmp42, label %end, label %bogus
297
298 bogus:
299 br label %end
300
301 end:
302 %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ], [ %c, %bogus ]
303 ret i32 %cond
304 }
305
306 ; Negative test - wrong cmp (but this should match?).
307
308 define i32 @not_rotr_3(i32 %a, i32 %b) {
309 ; CHECK-LABEL: @not_rotr_3(
310 ; CHECK-NEXT: entry:
311 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[B:%.*]], 0
312 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
313 ; CHECK: rotbb:
314 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
315 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
316 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
317 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
318 ; CHECK-NEXT: br label [[END]]
319 ; CHECK: end:
209320 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
210321 ; CHECK-NEXT: ret i32 [[COND]]
211322 ;
212323 entry:
213 %cmp = icmp eq i32 %b, 0
214 br i1 %cmp, label %end, label %rotbb
215
216 rotbb:
217 %sub = sub i32 32, %b
218 %shl = shl i32 %a, %sub
219 %shr = lshr i32 %a, %b
220 %or = or i32 %shl, %shr
221 br label %end
222
223 end:
224 %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
225 ret i32 %cond
226 }
227
324 %cmp = icmp sle i32 %b, 0
325 br i1 %cmp, label %end, label %rotbb
326
327 rotbb:
328 %sub = sub i32 32, %b
329 %shl = shl i32 %a, %sub
330 %shr = lshr i32 %a, %b
331 %or = or i32 %shl, %shr
332 br label %end
333
334 end:
335 %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
336 ret i32 %cond
337 }
338
339 ; Negative test - wrong shift.
340
341 define i32 @not_rotr_4(i32 %a, i32 %b) {
342 ; CHECK-LABEL: @not_rotr_4(
343 ; CHECK-NEXT: entry:
344 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
345 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
346 ; CHECK: rotbb:
347 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
348 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
349 ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[A]], [[B]]
350 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
351 ; CHECK-NEXT: br label [[END]]
352 ; CHECK: end:
353 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
354 ; CHECK-NEXT: ret i32 [[COND]]
355 ;
356 entry:
357 %cmp = icmp eq i32 %b, 0
358 br i1 %cmp, label %end, label %rotbb
359
360 rotbb:
361 %sub = sub i32 32, %b
362 %shl = shl i32 %a, %sub
363 %shr = ashr i32 %a, %b
364 %or = or i32 %shl, %shr
365 br label %end
366
367 end:
368 %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
369 ret i32 %cond
370 }
371
372 ; Negative test - wrong shift.
373
374 define i32 @not_rotr_5(i32 %a, i32 %b) {
375 ; CHECK-LABEL: @not_rotr_5(
376 ; CHECK-NEXT: entry:
377 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
378 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
379 ; CHECK: rotbb:
380 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
381 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[B]], [[SUB]]
382 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[B]]
383 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
384 ; CHECK-NEXT: br label [[END]]
385 ; CHECK: end:
386 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
387 ; CHECK-NEXT: ret i32 [[COND]]
388 ;
389 entry:
390 %cmp = icmp eq i32 %b, 0
391 br i1 %cmp, label %end, label %rotbb
392
393 rotbb:
394 %sub = sub i32 32, %b
395 %shl = shl i32 %b, %sub
396 %shr = lshr i32 %a, %b
397 %or = or i32 %shl, %shr
398 br label %end
399
400 end:
401 %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
402 ret i32 %cond
403 }
404
405 ; Negative test - wrong sub.
406
407 define i32 @not_rotr_6(i32 %a, i32 %b) {
408 ; CHECK-LABEL: @not_rotr_6(
409 ; CHECK-NEXT: entry:
410 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
411 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
412 ; CHECK: rotbb:
413 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 8, [[B]]
414 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
415 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
416 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
417 ; CHECK-NEXT: br label [[END]]
418 ; CHECK: end:
419 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
420 ; CHECK-NEXT: ret i32 [[COND]]
421 ;
422 entry:
423 %cmp = icmp eq i32 %b, 0
424 br i1 %cmp, label %end, label %rotbb
425
426 rotbb:
427 %sub = sub i32 8, %b
428 %shl = shl i32 %a, %sub
429 %shr = lshr i32 %a, %b
430 %or = or i32 %shl, %shr
431 br label %end
432
433 end:
434 %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
435 ret i32 %cond
436 }
437
438 ; Negative test - extra use. Technically, we could transform this
439 ; because it doesn't increase the instruction count, but we're
440 ; being cautious not to cause a potential perf pessimization for
441 ; targets that do not have a rotate instruction.
442
443 define i32 @could_be_rotr(i32 %a, i32 %b, i32* %p) {
444 ; CHECK-LABEL: @could_be_rotr(
445 ; CHECK-NEXT: entry:
446 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
447 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
448 ; CHECK: rotbb:
449 ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
450 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
451 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
452 ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
453 ; CHECK-NEXT: store i32 [[OR]], i32* [[P:%.*]]
454 ; CHECK-NEXT: br label [[END]]
455 ; CHECK: end:
456 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
457 ; CHECK-NEXT: ret i32 [[COND]]
458 ;
459 entry:
460 %cmp = icmp eq i32 %b, 0
461 br i1 %cmp, label %end, label %rotbb
462
463 rotbb:
464 %sub = sub i32 32, %b
465 %shl = shl i32 %a, %sub
466 %shr = lshr i32 %a, %b
467 %or = or i32 %shl, %shr
468 store i32 %or, i32* %p
469 br label %end
470
471 end:
472 %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
473 ret i32 %cond
474 }
475
88 define i32 @rotl(i32 %a, i32 %b) {
99 ; OLDPM-LABEL: @rotl(
1010 ; OLDPM-NEXT: entry:
11 ; OLDPM-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
12 ; OLDPM-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
13 ; OLDPM: rotbb:
14 ; OLDPM-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
15 ; OLDPM-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
16 ; OLDPM-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]]
17 ; OLDPM-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
18 ; OLDPM-NEXT: br label [[END]]
19 ; OLDPM: end:
20 ; OLDPM-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ]
21 ; OLDPM-NEXT: ret i32 [[COND]]
11 ; OLDPM-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B:%.*]])
12 ; OLDPM-NEXT: ret i32 [[TMP0]]
2213 ;
2314 ; NEWPM-LABEL: @rotl(
2415 ; NEWPM-NEXT: entry: