llvm.org GIT mirror llvm / 84fca61
rdar://12100355 (part 1) This revision attempts to recognize following population-count pattern: while(a) { c++; ... ; a &= a - 1; ... }, where <c> and <a>could be used multiple times in the loop body. TODO: On X8664 and ARM, __buildin_ctpop() are not expanded to a efficent instruction sequence, which need to be improved in the following commits. Reviewed by Nadav, really appreciate! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168931 91177308-0d34-0410-b5e6-96231b3b80d8 Shuxin Yang 6 years ago
7 changed file(s) with 634 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
2525 /// ScalarTargetTransformInfo interface. Different targets can implement
2626 /// this interface differently.
2727 class ScalarTargetTransformImpl : public ScalarTargetTransformInfo {
28 private:
28 protected:
2929 const TargetLowering *TLI;
3030
3131 public:
7474 /// LSR, and LowerInvoke use this interface.
7575 class ScalarTargetTransformInfo {
7676 public:
77 /// PopcntHwSupport - Hardware support for population count. Compared to the
78 /// SW implementation, HW support is supposed to significantly boost the
79 /// performance when the population is dense, and it may or not may degrade
80 /// performance if the population is sparse. A HW support is considered as
81 /// "Fast" if it can outperform, or is on a par with, SW implementaion when
82 /// the population is sparse; otherwise, it is considered as "Slow".
83 enum PopcntHwSupport {
84 None,
85 Fast,
86 Slow
87 };
88
7789 virtual ~ScalarTargetTransformInfo() {}
7890
7991 /// isLegalAddImmediate - Return true if the specified immediate is legal
120132 /// lookup tables for the target.
121133 virtual bool shouldBuildLookupTables() const {
122134 return true;
135 }
136
137 /// getPopcntHwSupport - Return hardware support for population count.
138 virtual PopcntHwSupport getPopcntHwSupport(unsigned IntTyWidthInBit) const {
139 return None;
123140 }
124141 };
125142
1766917669 return -1;
1767017670 }
1767117671
17672 ScalarTargetTransformInfo::PopcntHwSupport
17673 X86ScalarTargetTransformImpl::getPopcntHwSupport(unsigned TyWidth) const {
17674 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
17675 const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget();
17676
17677 // TODO: Currently the __builtin_popcount() implementation using SSE3
17678 // instructions is inefficient. Once the problem is fixed, we should
17679 // call ST.hasSSE3() instead of ST.hasSSE4().
17680 return ST.hasSSE41() ? Fast : None;
17681 }
17682
1767217683 unsigned
1767317684 X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
1767417685 Type *Ty) const {
932932 const TargetLibraryInfo *libInfo);
933933 }
934934
935 class X86ScalarTargetTransformImpl : public ScalarTargetTransformImpl {
936 public:
937 explicit X86ScalarTargetTransformImpl(const TargetLowering *TL) :
938 ScalarTargetTransformImpl(TL) {};
939
940 virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const;
941 };
942
935943 class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
936944 public:
937945 explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
117117 X86SelectionDAGInfo TSInfo;
118118 X86TargetLowering TLInfo;
119119 X86JITInfo JITInfo;
120 ScalarTargetTransformImpl STTI;
120 X86ScalarTargetTransformImpl STTI;
121121 X86VectorTargetTransformInfo VTTI;
122122 public:
123123 X86_64TargetMachine(const Target &T, StringRef TT,
5555 #include "llvm/Support/raw_ostream.h"
5656 #include "llvm/DataLayout.h"
5757 #include "llvm/Target/TargetLibraryInfo.h"
58 #include "llvm/TargetTransformInfo.h"
5859 #include "llvm/Transforms/Utils/Local.h"
5960 using namespace llvm;
6061
6263 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
6364
6465 namespace {
66
67 class LoopIdiomRecognize;
68
69 /// This class defines some utility functions for loop idiom recognization.
70 class LIRUtil {
71 public:
72 /// Return true iff the block contains nothing but an uncondition branch
73 /// (aka goto instruction).
74 static bool isAlmostEmpty(BasicBlock *);
75
76 static BranchInst *getBranch(BasicBlock *BB) {
77 return dyn_cast(BB->getTerminator());
78 }
79
80 /// Return the condition of the branch terminating the given basic block.
81 static Value *getBrCondtion(BasicBlock *);
82
83 /// Derive the precondition block (i.e the block that guards the loop
84 /// preheader) from the given preheader.
85 static BasicBlock *getPrecondBb(BasicBlock *PreHead);
86 };
87
88 /// This class is to recoginize idioms of population-count conducted in
89 /// a noncountable loop. Currently it only recognizes this pattern:
90 /// \code
91 /// while(x) {cnt++; ...; x &= x - 1; ...}
92 /// \endcode
93 class NclPopcountRecognize {
94 LoopIdiomRecognize &LIR;
95 Loop *CurLoop;
96 BasicBlock *PreCondBB;
97
98 typedef IRBuilder<> IRBuilderTy;
99
100 public:
101 explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR);
102 bool recognize();
103
104 private:
105 /// Take a glimpse of the loop to see if we need to go ahead recoginizing
106 /// the idiom.
107 bool preliminaryScreen();
108
109 /// Check if the given conditional branch is based on the comparison
110 /// beween a variable and zero, and if the variable is non-zero, the
111 /// control yeilds to the loop entry. If the branch matches the behavior,
112 /// the variable involved in the comparion is returned. This function will
113 /// be called to see if the precondition and postcondition of the loop
114 /// are in desirable form.
115 Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const;
116
117 /// Return true iff the idiom is detected in the loop. and 1) \p CntInst
118 /// is set to the instruction counting the pupulation bit. 2) \p CntPhi
119 /// is set to the corresponding phi node. 3) \p Var is set to the value
120 /// whose population bits are being counted.
121 bool detectIdiom
122 (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
123
124 /// Insert ctpop intrinsic function and some obviously dead instructions.
125 void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var);
126
127 /// Create llvm.ctpop.* intrinsic function.
128 CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
129 };
130
65131 class LoopIdiomRecognize : public LoopPass {
66132 Loop *CurLoop;
67133 const DataLayout *TD;
68134 DominatorTree *DT;
69135 ScalarEvolution *SE;
70136 TargetLibraryInfo *TLI;
137 const ScalarTargetTransformInfo *STTI;
71138 public:
72139 static char ID;
73140 explicit LoopIdiomRecognize() : LoopPass(ID) {
74141 initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
142 TD = 0; DT = 0; SE = 0; TLI = 0; STTI = 0;
75143 }
76144
77145 bool runOnLoop(Loop *L, LPPassManager &LPM);
109177 AU.addRequired();
110178 AU.addRequired();
111179 }
180
181 const DataLayout *getDataLayout() {
182 return TD ? TD : TD=getAnalysisIfAvailable();
183 }
184
185 DominatorTree *getDominatorTree() {
186 return DT ? DT : (DT=&getAnalysis());
187 }
188
189 ScalarEvolution *getScalarEvolution() {
190 return SE ? SE : (SE = &getAnalysis());
191 }
192
193 TargetLibraryInfo *getTargetLibraryInfo() {
194 return TLI ? TLI : (TLI = &getAnalysis());
195 }
196
197 const ScalarTargetTransformInfo *getScalarTargetTransformInfo() {
198 if (!STTI) {
199 TargetTransformInfo *TTI = getAnalysisIfAvailable();
200 if (TTI) STTI = TTI->getScalarTargetTransformInfo();
201 }
202 return STTI;
203 }
204
205 Loop *getLoop() const { return CurLoop; }
206
207 private:
208 bool runOnNoncountableLoop();
209 bool runOnCountableLoop();
112210 };
113211 }
114212
171269 deleteDeadInstruction(I, SE, TLI);
172270 }
173271
174 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
175 CurLoop = L;
176
177 // If the loop could not be converted to canonical form, it must have an
178 // indirectbr in it, just give up.
179 if (!L->getLoopPreheader())
180 return false;
181
182 // Disable loop idiom recognition if the function's name is a common idiom.
183 StringRef Name = L->getHeader()->getParent()->getName();
184 if (Name == "memset" || Name == "memcpy")
185 return false;
186
187 // The trip count of the loop must be analyzable.
188 SE = &getAnalysis();
189 if (!SE->hasLoopInvariantBackedgeTakenCount(L))
190 return false;
191 const SCEV *BECount = SE->getBackedgeTakenCount(L);
272 //===----------------------------------------------------------------------===//
273 //
274 // Implementation of LIRUtil
275 //
276 //===----------------------------------------------------------------------===//
277
278 // This fucntion will return true iff the given block contains nothing but goto.
279 // A typical usage of this function is to check if the preheader fucntion is
280 // "almost" empty such that generated intrinsic function can be moved across
281 // preheader and to be placed at the end of the preconditiona block without
282 // concerning of breaking data dependence.
283 bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
284 if (BranchInst *Br = getBranch(BB)) {
285 return Br->isUnconditional() && BB->size() == 1;
286 }
287 return false;
288 }
289
290 Value *LIRUtil::getBrCondtion(BasicBlock *BB) {
291 BranchInst *Br = getBranch(BB);
292 return Br ? Br->getCondition() : 0;
293 }
294
295 BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
296 if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
297 BranchInst *Br = getBranch(BB);
298 return Br && Br->isConditional() ? BB : 0;
299 }
300 return 0;
301 }
302
303 //===----------------------------------------------------------------------===//
304 //
305 // Implementation of NclPopcountRecognize
306 //
307 //===----------------------------------------------------------------------===//
308
309 NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
310 LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) {
311 }
312
313 bool NclPopcountRecognize::preliminaryScreen() {
314 const ScalarTargetTransformInfo *STTI = LIR.getScalarTargetTransformInfo();
315 if (STTI->getPopcntHwSupport(32) != ScalarTargetTransformInfo::Fast)
316 return false;
317
318 // Counting population are usually conducted by few arithmetic instrutions.
319 // Such instructions can be easilly "absorbed" by vacant slots in a
320 // non-compact loop. Therefore, recognizing popcount idiom only makes sense
321 // in a compact loop.
322
323 // Give up if the loop has multiple blocks or multiple backedges.
324 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
325 return false;
326
327 BasicBlock *LoopBody = *(CurLoop->block_begin());
328 if (LoopBody->size() >= 20) {
329 // The loop is too big, bail out.
330 return false;
331 }
332
333 // It should have a preheader containing nothing but a goto instruction.
334 BasicBlock *PreHead = CurLoop->getLoopPreheader();
335 if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead))
336 return false;
337
338 // It should have a precondition block where the generated popcount instrinsic
339 // function will be inserted.
340 PreCondBB = LIRUtil::getPrecondBb(PreHead);
341 if (!PreCondBB)
342 return false;
343
344 return true;
345 }
346
347 Value *NclPopcountRecognize::matchCondition (BranchInst *Br,
348 BasicBlock *LoopEntry) const {
349 if (!Br || !Br->isConditional())
350 return 0;
351
352 ICmpInst *Cond = dyn_cast(Br->getCondition());
353 if (!Cond)
354 return 0;
355
356 ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1));
357 if (!CmpZero || !CmpZero->isZero())
358 return 0;
359
360 ICmpInst::Predicate Pred = Cond->getPredicate();
361 if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
362 (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
363 return Cond->getOperand(0);
364
365 return 0;
366 }
367
368 bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
369 PHINode *&CntPhi,
370 Value *&Var) const {
371 // Following code tries to detect this idiom:
372 //
373 // if (x0 != 0)
374 // goto loop-exit // the precondition of the loop
375 // cnt0 = init-val;
376 // do {
377 // x1 = phi (x0, x2);
378 // cnt1 = phi(cnt0, cnt2);
379 //
380 // cnt2 = cnt1 + 1;
381 // ...
382 // x2 = x1 & (x1 - 1);
383 // ...
384 // } while(x != 0);
385 //
386 // loop-exit:
387 //
388
389 // step 1: Check to see if the look-back branch match this pattern:
390 // "if (a!=0) goto loop-entry".
391 BasicBlock *LoopEntry;
392 Instruction *DefX2, *CountInst;
393 Value *VarX1, *VarX0;
394 PHINode *PhiX, *CountPhi;
395
396 DefX2 = CountInst = 0;
397 VarX1 = VarX0 = 0;
398 PhiX = CountPhi = 0;
399 LoopEntry = *(CurLoop->block_begin());
400
401 // step 1: Check if the loop-back branch is in desirable form.
402 {
403 if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry))
404 DefX2 = dyn_cast(T);
405 else
406 return false;
407 }
408
409 // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
410 {
411 if (DefX2->getOpcode() != Instruction::And)
412 return false;
413
414 BinaryOperator *SubOneOp;
415
416 if ((SubOneOp = dyn_cast(DefX2->getOperand(0))))
417 VarX1 = DefX2->getOperand(1);
418 else {
419 VarX1 = DefX2->getOperand(0);
420 SubOneOp = dyn_cast(DefX2->getOperand(1));
421 }
422 if (!SubOneOp)
423 return false;
424
425 Instruction *SubInst = cast(SubOneOp);
426 ConstantInt *Dec = dyn_cast(SubInst->getOperand(1));
427 if (!Dec ||
428 !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
429 (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) {
430 return false;
431 }
432 }
433
434 // step 3: Check the recurrence of variable X
435 {
436 PhiX = dyn_cast(VarX1);
437 if (!PhiX ||
438 (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
439 return false;
440 }
441 }
442
443 // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
444 {
445 CountInst = NULL;
446 for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
447 IterE = LoopEntry->end(); Iter != IterE; Iter++) {
448 Instruction *Inst = Iter;
449 if (Inst->getOpcode() != Instruction::Add)
450 continue;
451
452 ConstantInt *Inc = dyn_cast(Inst->getOperand(1));
453 if (!Inc || !Inc->isOne())
454 continue;
455
456 PHINode *Phi = dyn_cast(Inst->getOperand(0));
457 if (!Phi && Phi->getParent() != LoopEntry)
458 continue;
459
460 // Check if the result of the instruction is live of the loop.
461 bool LiveOutLoop = false;
462 for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
463 I != E; I++) {
464 if ((cast(*I))->getParent() != LoopEntry) {
465 LiveOutLoop = true; break;
466 }
467 }
468
469 if (LiveOutLoop) {
470 CountInst = Inst;
471 CountPhi = Phi;
472 break;
473 }
474 }
475
476 if (!CountInst)
477 return false;
478 }
479
480 // step 5: check if the precondition is in this form:
481 // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
482 {
483 BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
484 Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader());
485 if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
486 return false;
487
488 CntInst = CountInst;
489 CntPhi = CountPhi;
490 Var = T;
491 }
492
493 return true;
494 }
495
496 void NclPopcountRecognize::transform(Instruction *CntInst,
497 PHINode *CntPhi, Value *Var) {
498
499 ScalarEvolution *SE = LIR.getScalarEvolution();
500 TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo();
501 BasicBlock *PreHead = CurLoop->getLoopPreheader();
502 BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
503 const DebugLoc DL = CntInst->getDebugLoc();
504
505 // Assuming before transformation, the loop is following:
506 // if (x) // the precondition
507 // do { cnt++; x &= x - 1; } while(x);
508
509 // Step 1: Insert the ctpop instruction at the end of the precondition block
510 IRBuilderTy Builder(PreCondBr);
511 Value *PopCnt, *PopCntZext, *NewCount;
512 {
513 PopCnt = createPopcntIntrinsic(Builder, Var, DL);
514 NewCount = PopCntZext =
515 Builder.CreateZExtOrTrunc(PopCnt, cast(CntPhi->getType()));
516
517 if (NewCount != PopCnt)
518 (cast(NewCount))->setDebugLoc(DL);
519
520 // If the popoulation counter's initial value is not zero, insert Add Inst.
521 Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
522 ConstantInt *InitConst = dyn_cast(CntInitVal);
523 if (!InitConst || !InitConst->isZero()) {
524 NewCount = Builder.CreateAdd(PopCnt, InitConst);
525 (cast(NewCount))->setDebugLoc(DL);
526 }
527 }
528
529 // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to
530 // "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic
531 // function would be partial dead code, and downstream passes will drag
532 // it back from the precondition block to the preheader.
533 {
534 ICmpInst *PreCond = cast(PreCondBr->getCondition());
535
536 Value *Opnd0 = PopCntZext;
537 Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
538 if (PreCond->getOperand(0) != Var)
539 std::swap(Opnd0, Opnd1);
540
541 ICmpInst *NewPreCond =
542 cast(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
543 PreCond->replaceAllUsesWith(NewPreCond);
544
545 deleteDeadInstruction(PreCond, *SE, TLI);
546 }
547
548 // Step 3: Note that the population count is exactly the trip count of the
549 // loop in question, which enble us to to convert the loop from noncountable
550 // loop into a countable one. The benefit is twofold:
551 //
552 // - If the loop only counts population, the entire loop become dead after
553 // the transformation. It is lots easier to prove a countable loop dead
554 // than to prove a noncountable one. (In some C dialects, a infite loop
555 // isn't dead even if it computes nothing useful. In general, DCE needs
556 // to prove a noncountable loop finite before safely delete it.)
557 //
558 // - If the loop also performs something else, it remains alive.
559 // Since it is transformed to countable form, it can be aggressively
560 // optimized by some optimizations which are in general not applicable
561 // to a noncountable loop.
562 //
563 // After this step, this loop (conceptually) would look like following:
564 // newcnt = __builtin_ctpop(x);
565 // t = newcnt;
566 // if (x)
567 // do { cnt++; x &= x-1; t--) } while (t > 0);
568 BasicBlock *Body = *(CurLoop->block_begin());
569 {
570 BranchInst *LbBr = LIRUtil::getBranch(Body);
571 ICmpInst *LbCond = cast(LbBr->getCondition());
572 Type *Ty = NewCount->getType();
573
574 PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin());
575
576 Builder.SetInsertPoint(LbCond);
577 Value *Opnd1 = cast(TcPhi);
578 Value *Opnd2 = cast(ConstantInt::get(Ty, 1));
579 Instruction *TcDec =
580 cast(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true));
581
582 TcPhi->addIncoming(NewCount, PreHead);
583 TcPhi->addIncoming(TcDec, Body);
584
585 CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ?
586 CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
587 LbCond->setPredicate(Pred);
588 LbCond->setOperand(0, TcDec);
589 LbCond->setOperand(1, cast(ConstantInt::get(Ty, 0)));
590 }
591
592 // Step 4: All the references to the original population counter outside
593 // the loop are replaced with the NewCount -- the value returned from
594 // __builtin_ctpop().
595 {
596 SmallVector CntUses;
597 for (Value::use_iterator I = CntInst->use_begin(), E = CntInst->use_end();
598 I != E; I++) {
599 if (cast(*I)->getParent() != Body)
600 CntUses.push_back(*I);
601 }
602 for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) {
603 (cast(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount);
604 }
605 }
606
607 // step 5: Forget the "non-computable" trip-count SCEV associated with the
608 // loop. The loop would otherwise not be deleted even if it becomes empty.
609 SE->forgetLoop(CurLoop);
610 }
611
612 CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder,
613 Value *Val, DebugLoc DL) {
614 Value *Ops[] = { Val };
615 Type *Tys[] = { Val->getType() };
616
617 Module *M = (*(CurLoop->block_begin()))->getParent()->getParent();
618 Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
619 CallInst *CI = IRBuilder.CreateCall(Func, Ops);
620 CI->setDebugLoc(DL);
621
622 return CI;
623 }
624
625 /// recognize - detect population count idiom in a non-countable loop. If
626 /// detected, transform the relevant code to popcount intrinsic function
627 /// call, and return true; otherwise, return false.
628 bool NclPopcountRecognize::recognize() {
629
630 if (!LIR.getScalarTargetTransformInfo())
631 return false;
632
633 LIR.getScalarEvolution();
634
635 if (!preliminaryScreen())
636 return false;
637
638 Instruction *CntInst;
639 PHINode *CntPhi;
640 Value *Val;
641 if (!detectIdiom(CntInst, CntPhi, Val))
642 return false;
643
644 transform(CntInst, CntPhi, Val);
645 return true;
646 }
647
648 //===----------------------------------------------------------------------===//
649 //
650 // Implementation of LoopIdiomRecognize
651 //
652 //===----------------------------------------------------------------------===//
653
654 bool LoopIdiomRecognize::runOnCountableLoop() {
655 const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
192656 if (isa(BECount)) return false;
193657
194658 // If this loop executes exactly one time, then it should be peeled, not
198662 return false;
199663
200664 // We require target data for now.
201 TD = getAnalysisIfAvailable();
202 if (TD == 0) return false;
203
204 DT = &getAnalysis();
665 if (!getDataLayout())
666 return false;
667
668 getDominatorTree();
669
205670 LoopInfo &LI = getAnalysis();
206671 TLI = &getAnalysis();
207672
673 getTargetLibraryInfo();
674
208675 SmallVector ExitBlocks;
209676 CurLoop->getUniqueExitBlocks(ExitBlocks);
210677
211678 DEBUG(dbgs() << "loop-idiom Scanning: F["
212 << L->getHeader()->getParent()->getName()
213 << "] Loop %" << L->getHeader()->getName() << "\n");
679 << CurLoop->getHeader()->getParent()->getName()
680 << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
214681
215682 bool MadeChange = false;
216683 // Scan all the blocks in the loop that are not in subloops.
217 for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
218 ++BI) {
684 for (Loop::block_iterator BI = CurLoop->block_begin(),
685 E = CurLoop->block_end(); BI != E; ++BI) {
219686 // Ignore blocks in subloops.
220687 if (LI.getLoopFor(*BI) != CurLoop)
221688 continue;
223690 MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
224691 }
225692 return MadeChange;
693 }
694
695 bool LoopIdiomRecognize::runOnNoncountableLoop() {
696 NclPopcountRecognize Popcount(*this);
697 if (Popcount.recognize())
698 return true;
699
700 return false;
701 }
702
703 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
704 CurLoop = L;
705
706 // If the loop could not be converted to canonical form, it must have an
707 // indirectbr in it, just give up.
708 if (!L->getLoopPreheader())
709 return false;
710
711 // Disable loop idiom recognition if the function's name is a common idiom.
712 StringRef Name = L->getHeader()->getParent()->getName();
713 if (Name == "memset" || Name == "memcpy")
714 return false;
715
716 SE = &getAnalysis();
717 if (SE->hasLoopInvariantBackedgeTakenCount(L))
718 return runOnCountableLoop();
719 return runOnNoncountableLoop();
226720 }
227721
228722 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
0 ; RUN: opt -loop-idiom < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -S | FileCheck %s
1
2 ;To recognize this pattern:
3 ;int popcount(unsigned long long a) {
4 ; int c = 0;
5 ; while (a) {
6 ; c++;
7 ; a &= a - 1;
8 ; }
9 ; return c;
10 ;}
11 ;
12 ; CHECK: entry
13 ; CHECK: llvm.ctpop.i64
14 ; CHECK: ret
15 define i32 @popcount(i64 %a) nounwind uwtable readnone ssp {
16 entry:
17 %tobool3 = icmp eq i64 %a, 0
18 br i1 %tobool3, label %while.end, label %while.body
19
20 while.body: ; preds = %entry, %while.body
21 %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
22 %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
23 %inc = add nsw i32 %c.05, 1
24 %sub = add i64 %a.addr.04, -1
25 %and = and i64 %sub, %a.addr.04
26 %tobool = icmp eq i64 %and, 0
27 br i1 %tobool, label %while.end, label %while.body
28
29 while.end: ; preds = %while.body, %entry
30 %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
31 ret i32 %c.0.lcssa
32 }
33
34 ; To recognize this pattern:
35 ;int popcount(unsigned long long a, int mydata1, int mydata2) {
36 ; int c = 0;
37 ; while (a) {
38 ; c++;
39 ; a &= a - 1;
40 ; mydata1 *= c;
41 ; mydata2 *= (int)a;
42 ; }
43 ; return c + mydata1 + mydata2;
44 ;}
45 ; CHECK: entry
46 ; CHECK: llvm.ctpop.i64
47 ; CHECK: ret
48 define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
49 entry:
50 %tobool9 = icmp eq i64 %a, 0
51 br i1 %tobool9, label %while.end, label %while.body
52
53 while.body: ; preds = %entry, %while.body
54 %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
55 %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
56 %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
57 %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
58 %inc = add nsw i32 %c.013, 1
59 %sub = add i64 %a.addr.010, -1
60 %and = and i64 %sub, %a.addr.010
61 %mul = mul nsw i32 %inc, %mydata1.addr.011
62 %conv = trunc i64 %and to i32
63 %mul1 = mul nsw i32 %conv, %mydata2.addr.012
64 %tobool = icmp eq i64 %and, 0
65 br i1 %tobool, label %while.end, label %while.body
66
67 while.end: ; preds = %while.body, %entry
68 %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
69 %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
70 %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
71 %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
72 %add2 = add i32 %add, %c.0.lcssa
73 ret i32 %add2
74 }
75