llvm.org GIT mirror llvm / 8b48b04
The patch adds CTLZ idiom recognition. Summary: The following loops should be recognized: i = 0; while (n) { n = n >> 1; i++; body(); } use(i); And replaced with builtin_ctlz(n) if body() is empty or for CPUs that have CTLZ instruction converted to countable: for (j = 0; j < builtin_ctlz(n); j++) { n = n >> 1; i++; body(); } use(builtin_ctlz(n)); Reviewers: rengolin, joerg Differential Revision: http://reviews.llvm.org/D32605 From: Evgeny Stupachenko <evstupac@gmail.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303102 91177308-0d34-0410-b5e6-96231b3b80d8 Evgeny Stupachenko 3 years ago
3 changed file(s) with 664 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
143143 bool recognizePopcount();
144144 void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
145145 PHINode *CntPhi, Value *Var);
146 bool recognizeAndInsertCTLZ();
147 void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst,
148 PHINode *CntPhi, Value *Var, const DebugLoc DL,
149 bool ZeroCheck, bool IsCntPhiUsedOutsideLoop);
146150
147151 /// @}
148152 };
993997 }
994998
995999 bool LoopIdiomRecognize::runOnNoncountableLoop() {
996 return recognizePopcount();
1000 return recognizePopcount() || recognizeAndInsertCTLZ();
9971001 }
9981002
9991003 /// Check if the given conditional branch is based on the comparison between
11581162 return true;
11591163 }
11601164
1165 /// Return true if the idiom is detected in the loop.
1166 ///
1167 /// Additionally:
1168 /// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1169 /// or nullptr if there is no such.
1170 /// 2) \p CntPhi is set to the corresponding phi node
1171 /// or nullptr if there is no such.
1172 /// 3) \p Var is set to the value whose CTLZ could be used.
1173 /// 4) \p DefX is set to the instruction calculating Loop exit condition.
1174 ///
1175 /// The core idiom we are trying to detect is:
1176 /// \code
1177 /// if (x0 == 0)
1178 /// goto loop-exit // the precondition of the loop
1179 /// cnt0 = init-val;
1180 /// do {
1181 /// x = phi (x0, x.next); //PhiX
1182 /// cnt = phi(cnt0, cnt.next);
1183 ///
1184 /// cnt.next = cnt + 1;
1185 /// ...
1186 /// x.next = x >> 1; // DefX
1187 /// ...
1188 /// } while(x.next != 0);
1189 ///
1190 /// loop-exit:
1191 /// \endcode
1192 static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,
1193 Instruction *&CntInst, PHINode *&CntPhi,
1194 Instruction *&DefX) {
1195 BasicBlock *LoopEntry;
1196 Value *VarX = nullptr;
1197
1198 DefX = nullptr;
1199 PhiX = nullptr;
1200 CntInst = nullptr;
1201 CntPhi = nullptr;
1202 LoopEntry = *(CurLoop->block_begin());
1203
1204 // step 1: Check if the loop-back branch is in desirable form.
1205 if (Value *T = matchCondition(
1206 dyn_cast(LoopEntry->getTerminator()), LoopEntry))
1207 DefX = dyn_cast(T);
1208 else
1209 return false;
1210
1211 // step 2: detect instructions corresponding to "x.next = x >> 1"
1212 if (!DefX || DefX->getOpcode() != Instruction::AShr)
1213 return false;
1214 if (ConstantInt *Shft = dyn_cast(DefX->getOperand(1)))
1215 if (!Shft || !Shft->isOne())
1216 return false;
1217 VarX = DefX->getOperand(0);
1218
1219 // step 3: Check the recurrence of variable X
1220 PhiX = dyn_cast(VarX);
1221 if (!PhiX || (PhiX->getOperand(0) != DefX && PhiX->getOperand(1) != DefX))
1222 return false;
1223
1224 // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1225 // TODO: We can skip the step. If loop trip count is known (CTLZ),
1226 // then all uses of "cnt.next" could be optimized to the trip count
1227 // plus "cnt0". Currently it is not optimized.
1228 // This step could be used to detect POPCNT instruction:
1229 // cnt.next = cnt + (x.next & 1)
1230 for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
1231 IterE = LoopEntry->end();
1232 Iter != IterE; Iter++) {
1233 Instruction *Inst = &*Iter;
1234 if (Inst->getOpcode() != Instruction::Add)
1235 continue;
1236
1237 ConstantInt *Inc = dyn_cast(Inst->getOperand(1));
1238 if (!Inc || !Inc->isOne())
1239 continue;
1240
1241 PHINode *Phi = dyn_cast(Inst->getOperand(0));
1242 if (!Phi || Phi->getParent() != LoopEntry)
1243 continue;
1244
1245 CntInst = Inst;
1246 CntPhi = Phi;
1247 break;
1248 }
1249 if (!CntInst)
1250 return false;
1251
1252 return true;
1253 }
1254
1255 /// Recognize CTLZ idiom in a non-countable loop and convert the loop
1256 /// to countable (with CTLZ trip count).
1257 /// If CTLZ inserted as a new trip count returns true; otherwise, returns false.
1258 bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {
1259 // Give up if the loop has multiple blocks or multiple backedges.
1260 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1261 return false;
1262
1263 Instruction *CntInst, *DefX;
1264 PHINode *CntPhi, *PhiX;
1265 if (!detectCTLZIdiom(CurLoop, PhiX, CntInst, CntPhi, DefX))
1266 return false;
1267
1268 bool IsCntPhiUsedOutsideLoop = false;
1269 for (User *U : CntPhi->users())
1270 if (!CurLoop->contains(dyn_cast(U))) {
1271 IsCntPhiUsedOutsideLoop = true;
1272 break;
1273 }
1274 bool IsCntInstUsedOutsideLoop = false;
1275 for (User *U : CntInst->users())
1276 if (!CurLoop->contains(dyn_cast(U))) {
1277 IsCntInstUsedOutsideLoop = true;
1278 break;
1279 }
1280 // If both CntInst and CntPhi are used outside the loop the profitability
1281 // is questionable.
1282 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)
1283 return false;
1284
1285 // For some CPUs result of CTLZ(X) intrinsic is undefined
1286 // when X is 0. If we can not guarantee X != 0, we need to check this
1287 // when expand.
1288 bool ZeroCheck = false;
1289 // It is safe to assume Preheader exist as it was checked in
1290 // parent function RunOnLoop.
1291 BasicBlock *PH = CurLoop->getLoopPreheader();
1292 Value *InitX = PhiX->getIncomingValueForBlock(PH);
1293 // If we check X != 0 before entering the loop we don't need a zero
1294 // check in CTLZ intrinsic.
1295 if (BasicBlock *PreCondBB = PH->getSinglePredecessor())
1296 if (BranchInst *PreCondBr =
1297 dyn_cast(PreCondBB->getTerminator())) {
1298 if (matchCondition(PreCondBr, PH) == InitX)
1299 ZeroCheck = true;
1300 }
1301
1302 // Check if CTLZ intrinsic is profitable. Assume it is always profitable
1303 // if we delete the loop (the loop has only 6 instructions):
1304 // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1305 // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1306 // %shr = ashr %n.addr.0, 1
1307 // %tobool = icmp eq %shr, 0
1308 // %inc = add nsw %i.0, 1
1309 // br i1 %tobool
1310
1311 IRBuilder<> Builder(PH->getTerminator());
1312 SmallVector Ops =
1313 {InitX, ZeroCheck ? Builder.getTrue() : Builder.getFalse()};
1314 ArrayRef Args(Ops);
1315 if (CurLoop->getHeader()->size() != 6 &&
1316 TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) >
1317 TargetTransformInfo::TCC_Basic)
1318 return false;
1319
1320 const DebugLoc DL = DefX->getDebugLoc();
1321 transformLoopToCountable(PH, CntInst, CntPhi, InitX, DL, ZeroCheck,
1322 IsCntPhiUsedOutsideLoop);
1323 return true;
1324 }
1325
11611326 /// Recognizes a population count idiom in a non-countable loop.
11621327 ///
11631328 /// If detected, transforms the relevant code to issue the popcount intrinsic
12191384 CI->setDebugLoc(DL);
12201385
12211386 return CI;
1387 }
1388
1389 static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
1390 const DebugLoc &DL, bool ZeroCheck) {
1391 Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()};
1392 Type *Tys[] = {Val->getType()};
1393
1394 Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
1395 Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys);
1396 CallInst *CI = IRBuilder.CreateCall(Func, Ops);
1397 CI->setDebugLoc(DL);
1398
1399 return CI;
1400 }
1401
1402 /// Transform the following loop:
1403 /// loop:
1404 /// CntPhi = PHI [Cnt0, CntInst]
1405 /// PhiX = PHI [InitX, DefX]
1406 /// CntInst = CntPhi + 1
1407 /// DefX = PhiX >> 1
1408 // LOOP_BODY
1409 /// Br: loop if (DefX != 0)
1410 /// Use(CntPhi) or Use(CntInst)
1411 ///
1412 /// Into:
1413 /// If CntPhi used outside the loop:
1414 /// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1)
1415 /// Count = CountPrev + 1
1416 /// else
1417 /// Count = BitWidth(InitX) - CTLZ(InitX)
1418 /// loop:
1419 /// CntPhi = PHI [Cnt0, CntInst]
1420 /// PhiX = PHI [InitX, DefX]
1421 /// PhiCount = PHI [Count, Dec]
1422 /// CntInst = CntPhi + 1
1423 /// DefX = PhiX >> 1
1424 /// Dec = PhiCount - 1
1425 /// LOOP_BODY
1426 /// Br: loop if (Dec != 0)
1427 /// Use(CountPrev + Cnt0) // Use(CntPhi)
1428 /// or
1429 /// Use(Count + Cnt0) // Use(CntInst)
1430 ///
1431 /// If LOOP_BODY is empty the loop will be deleted.
1432 /// If CntInst and DefX are not used in LOOP_BODY they will be removed.
1433 void LoopIdiomRecognize::transformLoopToCountable(
1434 BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX,
1435 const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
1436 BranchInst *PreheaderBr = dyn_cast(Preheader->getTerminator());
1437
1438 // Step 1: Insert the CTLZ instruction at the end of the preheader block
1439 // Count = BitWidth - CTLZ(InitX);
1440 // If there are uses of CntPhi create:
1441 // CountPrev = BitWidth - CTLZ(InitX >> 1);
1442 IRBuilder<> Builder(PreheaderBr);
1443 Builder.SetCurrentDebugLocation(DL);
1444 Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext;
1445
1446 if (IsCntPhiUsedOutsideLoop)
1447 InitXNext = Builder.CreateAShr(InitX,
1448 ConstantInt::get(InitX->getType(), 1));
1449 else
1450 InitXNext = InitX;
1451 CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);
1452 Count = Builder.CreateSub(
1453 ConstantInt::get(CTLZ->getType(),
1454 CTLZ->getType()->getIntegerBitWidth()),
1455 CTLZ);
1456 if (IsCntPhiUsedOutsideLoop) {
1457 CountPrev = Count;
1458 Count = Builder.CreateAdd(
1459 CountPrev,
1460 ConstantInt::get(CountPrev->getType(), 1));
1461 }
1462 if (IsCntPhiUsedOutsideLoop)
1463 NewCount = Builder.CreateZExtOrTrunc(CountPrev,
1464 cast(CntInst->getType()));
1465 else
1466 NewCount = Builder.CreateZExtOrTrunc(Count,
1467 cast(CntInst->getType()));
1468
1469 // If the CTLZ counter's initial value is not zero, insert Add Inst.
1470 Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader);
1471 ConstantInt *InitConst = dyn_cast(CntInitVal);
1472 if (!InitConst || !InitConst->isZero())
1473 NewCount = Builder.CreateAdd(NewCount, CntInitVal);
1474
1475 // Step 2: Insert new IV and loop condition:
1476 // loop:
1477 // ...
1478 // PhiCount = PHI [Count, Dec]
1479 // ...
1480 // Dec = PhiCount - 1
1481 // ...
1482 // Br: loop if (Dec != 0)
1483 BasicBlock *Body = *(CurLoop->block_begin());
1484 auto *LbBr = dyn_cast(Body->getTerminator());
1485 ICmpInst *LbCond = cast(LbBr->getCondition());
1486 Type *Ty = Count->getType();
1487
1488 PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
1489
1490 Builder.SetInsertPoint(LbCond);
1491 Instruction *TcDec = cast(
1492 Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
1493 "tcdec", false, true));
1494
1495 TcPhi->addIncoming(Count, Preheader);
1496 TcPhi->addIncoming(TcDec, Body);
1497
1498 CmpInst::Predicate Pred =
1499 (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
1500 LbCond->setPredicate(Pred);
1501 LbCond->setOperand(0, TcDec);
1502 LbCond->setOperand(1, ConstantInt::get(Ty, 0));
1503
1504 // Step 3: All the references to the original counter outside
1505 // the loop are replaced with the NewCount -- the value returned from
1506 // __builtin_ctlz(x).
1507 if (IsCntPhiUsedOutsideLoop)
1508 CntPhi->replaceUsesOutsideBlock(NewCount, Body);
1509 else
1510 CntInst->replaceUsesOutsideBlock(NewCount, Body);
1511
1512 // step 4: Forget the "non-computable" trip-count SCEV associated with the
1513 // loop. The loop would otherwise not be deleted even if it becomes empty.
1514 SE->forgetLoop(CurLoop);
12221515 }
12231516
12241517 void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
0 ; RUN: opt -loop-idiom -mtriple=armv7a < %s -S | FileCheck -check-prefix=LZCNT --check-prefix=ALL %s
1 ; RUN: opt -loop-idiom -mtriple=armv4t < %s -S | FileCheck -check-prefix=NOLZCNT --check-prefix=ALL %s
2
3 ; Recognize CTLZ builtin pattern.
4 ; Here we'll just convert loop to countable,
5 ; so do not insert builtin if CPU do not support CTLZ
6 ;
7 ; int ctlz_and_other(int n, char *a)
8 ; {
9 ; int i = 0, n0 = n;
10 ; while(n >>= 1) {
11 ; a[i] = (n0 & (1 << i)) ? 1 : 0;
12 ; i++;
13 ; }
14 ; return i;
15 ; }
16 ;
17 ; LZCNT: entry
18 ; LZCNT: %0 = call i32 @llvm.ctlz.i32(i32 %shr8, i1 true)
19 ; LZCNT-NEXT: %1 = sub i32 32, %0
20 ; LZCNT-NEXT: %2 = zext i32 %1 to i64
21 ; LZCNT: %indvars.iv.next.lcssa = phi i64 [ %2, %while.body ]
22 ; LZCNT: %4 = trunc i64 %indvars.iv.next.lcssa to i32
23 ; LZCNT: %i.0.lcssa = phi i32 [ 0, %entry ], [ %4, %while.end.loopexit ]
24 ; LZCNT: ret i32 %i.0.lcssa
25
26 ; NOLZCNT: entry
27 ; NOLZCNT-NOT: @llvm.ctlz
28
29 ; Function Attrs: norecurse nounwind uwtable
30 define i32 @ctlz_and_other(i32 %n, i8* nocapture %a) {
31 entry:
32 %shr8 = ashr i32 %n, 1
33 %tobool9 = icmp eq i32 %shr8, 0
34 br i1 %tobool9, label %while.end, label %while.body.preheader
35
36 while.body.preheader: ; preds = %entry
37 br label %while.body
38
39 while.body: ; preds = %while.body.preheader, %while.body
40 %indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
41 %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
42 %0 = trunc i64 %indvars.iv to i32
43 %shl = shl i32 1, %0
44 %and = and i32 %shl, %n
45 %tobool1 = icmp ne i32 %and, 0
46 %conv = zext i1 %tobool1 to i8
47 %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
48 store i8 %conv, i8* %arrayidx, align 1
49 %indvars.iv.next = add nuw i64 %indvars.iv, 1
50 %shr = ashr i32 %shr11, 1
51 %tobool = icmp eq i32 %shr, 0
52 br i1 %tobool, label %while.end.loopexit, label %while.body
53
54 while.end.loopexit: ; preds = %while.body
55 %1 = trunc i64 %indvars.iv.next to i32
56 br label %while.end
57
58 while.end: ; preds = %while.end.loopexit, %entry
59 %i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
60 ret i32 %i.0.lcssa
61 }
62
63 ; Recognize CTLZ builtin pattern.
64 ; Here it will replace the loop -
65 ; assume builtin is always profitable.
66 ;
67 ; int ctlz_zero_check(int n)
68 ; {
69 ; int i = 0;
70 ; while(n) {
71 ; n >>= 1;
72 ; i++;
73 ; }
74 ; return i;
75 ; }
76 ;
77 ; ALL: entry
78 ; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
79 ; ALL-NEXT: %1 = sub i32 32, %0
80 ; ALL: %inc.lcssa = phi i32 [ %1, %while.body ]
81 ; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
82 ; ALL: ret i32 %i.0.lcssa
83
84 ; Function Attrs: norecurse nounwind readnone uwtable
85 define i32 @ctlz_zero_check(i32 %n) {
86 entry:
87 %tobool4 = icmp eq i32 %n, 0
88 br i1 %tobool4, label %while.end, label %while.body.preheader
89
90 while.body.preheader: ; preds = %entry
91 br label %while.body
92
93 while.body: ; preds = %while.body.preheader, %while.body
94 %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
95 %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ]
96 %shr = ashr i32 %n.addr.05, 1
97 %inc = add nsw i32 %i.06, 1
98 %tobool = icmp eq i32 %shr, 0
99 br i1 %tobool, label %while.end.loopexit, label %while.body
100
101 while.end.loopexit: ; preds = %while.body
102 br label %while.end
103
104 while.end: ; preds = %while.end.loopexit, %entry
105 %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
106 ret i32 %i.0.lcssa
107 }
108
109 ; Recognize CTLZ builtin pattern.
110 ; Here it will replace the loop -
111 ; assume builtin is always profitable.
112 ;
113 ; int ctlz(int n)
114 ; {
115 ; int i = 0;
116 ; while(n >>= 1) {
117 ; i++;
118 ; }
119 ; return i;
120 ; }
121 ;
122 ; ALL: entry
123 ; ALL: %0 = ashr i32 %n, 1
124 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
125 ; ALL-NEXT: %2 = sub i32 32, %1
126 ; ALL-NEXT: %3 = add i32 %2, 1
127 ; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
128 ; ALL: ret i32 %i.0.lcssa
129
130 ; Function Attrs: norecurse nounwind readnone uwtable
131 define i32 @ctlz(i32 %n) {
132 entry:
133 br label %while.cond
134
135 while.cond: ; preds = %while.cond, %entry
136 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
137 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
138 %shr = ashr i32 %n.addr.0, 1
139 %tobool = icmp eq i32 %shr, 0
140 %inc = add nsw i32 %i.0, 1
141 br i1 %tobool, label %while.end, label %while.cond
142
143 while.end: ; preds = %while.cond
144 ret i32 %i.0
145 }
146
147 ; Recognize CTLZ builtin pattern.
148 ; Here it will replace the loop -
149 ; assume builtin is always profitable.
150 ;
151 ; int ctlz_add(int n, int i0)
152 ; {
153 ; int i = i0;
154 ; while(n >>= 1) {
155 ; i++;
156 ; }
157 ; return i;
158 ; }
159 ;
160 ; ALL: entry
161 ; ALL: %0 = ashr i32 %n, 1
162 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
163 ; ALL-NEXT: %2 = sub i32 32, %1
164 ; ALL-NEXT: %3 = add i32 %2, 1
165 ; ALL-NEXT: %4 = add i32 %2, %i0
166 ; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ]
167 ; ALL: ret i32 %i.0.lcssa
168 ;
169 ; Function Attrs: norecurse nounwind readnone uwtable
170 define i32 @ctlz_add(i32 %n, i32 %i0) {
171 entry:
172 br label %while.cond
173
174 while.cond: ; preds = %while.cond, %entry
175 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
176 %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
177 %shr = ashr i32 %n.addr.0, 1
178 %tobool = icmp eq i32 %shr, 0
179 %inc = add nsw i32 %i.0, 1
180 br i1 %tobool, label %while.end, label %while.cond
181
182 while.end: ; preds = %while.cond
183 ret i32 %i.0
184 }
0 ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck -check-prefix=LZCNT --check-prefix=ALL %s
1 ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=corei7 < %s -S | FileCheck -check-prefix=NOLZCNT --check-prefix=ALL %s
2
3 ; Recognize CTLZ builtin pattern.
4 ; Here we'll just convert loop to countable,
5 ; so do not insert builtin if CPU do not support CTLZ
6 ;
7 ; int ctlz_and_other(int n, char *a)
8 ; {
9 ; int i = 0, n0 = n;
10 ; while(n >>= 1) {
11 ; a[i] = (n0 & (1 << i)) ? 1 : 0;
12 ; i++;
13 ; }
14 ; return i;
15 ; }
16 ;
17 ; LZCNT: entry
18 ; LZCNT: %0 = call i32 @llvm.ctlz.i32(i32 %shr8, i1 true)
19 ; LZCNT-NEXT: %1 = sub i32 32, %0
20 ; LZCNT-NEXT: %2 = zext i32 %1 to i64
21 ; LZCNT: %indvars.iv.next.lcssa = phi i64 [ %2, %while.body ]
22 ; LZCNT: %4 = trunc i64 %indvars.iv.next.lcssa to i32
23 ; LZCNT: %i.0.lcssa = phi i32 [ 0, %entry ], [ %4, %while.end.loopexit ]
24 ; LZCNT: ret i32 %i.0.lcssa
25
26 ; NOLZCNT: entry
27 ; NOLZCNT-NOT: @llvm.ctlz
28
29 ; Function Attrs: norecurse nounwind uwtable
30 define i32 @ctlz_and_other(i32 %n, i8* nocapture %a) {
31 entry:
32 %shr8 = ashr i32 %n, 1
33 %tobool9 = icmp eq i32 %shr8, 0
34 br i1 %tobool9, label %while.end, label %while.body.preheader
35
36 while.body.preheader: ; preds = %entry
37 br label %while.body
38
39 while.body: ; preds = %while.body.preheader, %while.body
40 %indvars.iv = phi i64 [ %indvars.iv.next, %while.body ], [ 0, %while.body.preheader ]
41 %shr11 = phi i32 [ %shr, %while.body ], [ %shr8, %while.body.preheader ]
42 %0 = trunc i64 %indvars.iv to i32
43 %shl = shl i32 1, %0
44 %and = and i32 %shl, %n
45 %tobool1 = icmp ne i32 %and, 0
46 %conv = zext i1 %tobool1 to i8
47 %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
48 store i8 %conv, i8* %arrayidx, align 1
49 %indvars.iv.next = add nuw i64 %indvars.iv, 1
50 %shr = ashr i32 %shr11, 1
51 %tobool = icmp eq i32 %shr, 0
52 br i1 %tobool, label %while.end.loopexit, label %while.body
53
54 while.end.loopexit: ; preds = %while.body
55 %1 = trunc i64 %indvars.iv.next to i32
56 br label %while.end
57
58 while.end: ; preds = %while.end.loopexit, %entry
59 %i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %while.end.loopexit ]
60 ret i32 %i.0.lcssa
61 }
62
63 ; Recognize CTLZ builtin pattern.
64 ; Here it will replace the loop -
65 ; assume builtin is always profitable.
66 ;
67 ; int ctlz_zero_check(int n)
68 ; {
69 ; int i = 0;
70 ; while(n) {
71 ; n >>= 1;
72 ; i++;
73 ; }
74 ; return i;
75 ; }
76 ;
77 ; ALL: entry
78 ; ALL: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
79 ; ALL-NEXT: %1 = sub i32 32, %0
80 ; ALL: %inc.lcssa = phi i32 [ %1, %while.body ]
81 ; ALL: %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ]
82 ; ALL: ret i32 %i.0.lcssa
83
84 ; Function Attrs: norecurse nounwind readnone uwtable
85 define i32 @ctlz_zero_check(i32 %n) {
86 entry:
87 %tobool4 = icmp eq i32 %n, 0
88 br i1 %tobool4, label %while.end, label %while.body.preheader
89
90 while.body.preheader: ; preds = %entry
91 br label %while.body
92
93 while.body: ; preds = %while.body.preheader, %while.body
94 %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ]
95 %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ]
96 %shr = ashr i32 %n.addr.05, 1
97 %inc = add nsw i32 %i.06, 1
98 %tobool = icmp eq i32 %shr, 0
99 br i1 %tobool, label %while.end.loopexit, label %while.body
100
101 while.end.loopexit: ; preds = %while.body
102 br label %while.end
103
104 while.end: ; preds = %while.end.loopexit, %entry
105 %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ]
106 ret i32 %i.0.lcssa
107 }
108
109 ; Recognize CTLZ builtin pattern.
110 ; Here it will replace the loop -
111 ; assume builtin is always profitable.
112 ;
113 ; int ctlz(int n)
114 ; {
115 ; int i = 0;
116 ; while(n >>= 1) {
117 ; i++;
118 ; }
119 ; return i;
120 ; }
121 ;
122 ; ALL: entry
123 ; ALL: %0 = ashr i32 %n, 1
124 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
125 ; ALL-NEXT: %2 = sub i32 32, %1
126 ; ALL-NEXT: %3 = add i32 %2, 1
127 ; ALL: %i.0.lcssa = phi i32 [ %2, %while.cond ]
128 ; ALL: ret i32 %i.0.lcssa
129
130 ; Function Attrs: norecurse nounwind readnone uwtable
131 define i32 @ctlz(i32 %n) {
132 entry:
133 br label %while.cond
134
135 while.cond: ; preds = %while.cond, %entry
136 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
137 %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ]
138 %shr = ashr i32 %n.addr.0, 1
139 %tobool = icmp eq i32 %shr, 0
140 %inc = add nsw i32 %i.0, 1
141 br i1 %tobool, label %while.end, label %while.cond
142
143 while.end: ; preds = %while.cond
144 ret i32 %i.0
145 }
146
147 ; Recognize CTLZ builtin pattern.
148 ; Here it will replace the loop -
149 ; assume builtin is always profitable.
150 ;
151 ; int ctlz_add(int n, int i0)
152 ; {
153 ; int i = i0;
154 ; while(n >>= 1) {
155 ; i++;
156 ; }
157 ; return i;
158 ; }
159 ;
160 ; ALL: entry
161 ; ALL: %0 = ashr i32 %n, 1
162 ; ALL-NEXT: %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false)
163 ; ALL-NEXT: %2 = sub i32 32, %1
164 ; ALL-NEXT: %3 = add i32 %2, 1
165 ; ALL-NEXT: %4 = add i32 %2, %i0
166 ; ALL: %i.0.lcssa = phi i32 [ %4, %while.cond ]
167 ; ALL: ret i32 %i.0.lcssa
168 ;
169 ; Function Attrs: norecurse nounwind readnone uwtable
170 define i32 @ctlz_add(i32 %n, i32 %i0) {
171 entry:
172 br label %while.cond
173
174 while.cond: ; preds = %while.cond, %entry
175 %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
176 %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ]
177 %shr = ashr i32 %n.addr.0, 1
178 %tobool = icmp eq i32 %shr, 0
179 %inc = add nsw i32 %i.0, 1
180 br i1 %tobool, label %while.end, label %while.cond
181
182 while.end: ; preds = %while.cond
183 ret i32 %i.0
184 }