llvm.org GIT mirror llvm / b1fd3cd
Implement PPC counter loops as a late IR-level pass The old PPCCTRLoops pass, like the Hexagon pass version from which it was derived, could only handle some simple loops in canonical form. We cannot directly adapt the new Hexagon hardware loops pass, however, because the Hexagon pass contains a fundamental assumption that non-constant-trip-count loops will contain a guard, and this is not always true (the result being that incorrect negative counts can be generated). With this commit, we replace the pass with a late IR-level pass which makes use of SE to calculate the backedge-taken counts and safely generate the loop-count expressions (including any necessary max() parts). This IR level pass inserts custom intrinsics that are lowered into the desired decrement-and-branch instructions. The most fragile part of this new implementation is that interfering uses of the counter register must be detected on the IR level (and, on PPC, this also includes any indirect branches in addition to function calls). Also, to make all of this work, we need a variant of the mtctr instruction that is marked as having side effects. Without this, machine-code level CSE, DCE, etc. illegally transform the resulting code. Hopefully, this can be improved in the future. This new pass is smaller than the original (and much smaller than the new Hexagon hardware loops pass), and can handle many additional cases correctly. In addition, the preheader-creation code has been copied from LoopSimplify, and after we decide on where it belongs, this code will be refactored so that it can be explicitly shared (making this implementation even smaller). The new test-case files ctrloop-{le,lt,ne}.ll have been adapted from tests for the new Hexagon pass. There are a few classes of loops that this pass does not transform (noted by FIXMEs in the files), but these deficiencies can be addressed within the SE infrastructure (thus helping many other passes as well). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181927 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 7 years ago
15 changed file(s) with 1869 addition(s) and 709 deletion(s). Raw diff Collapse all Expand all
2929
3030 // sync instruction
3131 def int_ppc_sync : Intrinsic<[], [], []>;
32
33 // Intrinsics used to generate ctr-based loops. These should only be
34 // generated by the PowerPC backend!
35 def int_ppc_mtctr : Intrinsic<[], [llvm_anyint_ty], []>;
36 def int_ppc_is_decremented_ctr_nonzero : Intrinsic<[llvm_i1_ty], [], []>;
3237 }
3338
3439
2929 class AsmPrinter;
3030 class MCInst;
3131
32 FunctionPass *createPPCCTRLoops();
32 FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM);
3333 FunctionPass *createPPCEarlyReturnPass();
3434 FunctionPass *createPPCBranchSelectionPass();
3535 FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
88 //
99 // This pass identifies loops where we can generate the PPC branch instructions
1010 // that decrement and test the count register (CTR) (bdnz and friends).
11 // This pass is based on the HexagonHardwareLoops pass.
1211 //
1312 // The pattern that defines the induction variable can changed depending on
1413 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
1514 // normalizes induction variables, and the Loop Strength Reduction pass
1615 // run by 'llc' may also make changes to the induction variable.
17 // The pattern detected by this phase is due to running Strength Reduction.
1816 //
1917 // Criteria for CTR loops:
2018 // - Countable loops (w/ ind. var for a trip count)
21 // - Assumes loops are normalized by IndVarSimplify
2219 // - Try inner-most loops first
2320 // - No nested CTR loops.
2421 // - No function calls in loops.
2522 //
26 // Note: As with unconverted loops, PPCBranchSelector must be run after this
27 // pass in order to convert long-displacement jumps into jump pairs.
28 //
2923 //===----------------------------------------------------------------------===//
3024
3125 #define DEBUG_TYPE "ctrloops"
26
27 #include "llvm/Transforms/Scalar.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/Analysis/Dominators.h"
31 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/Analysis/ScalarEvolutionExpander.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/PassSupport.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ValueHandle.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
44 #include "llvm/Transforms/Utils/Local.h"
45 #include "llvm/Target/TargetLibraryInfo.h"
46 #include "PPCTargetMachine.h"
3247 #include "PPC.h"
33 #include "MCTargetDesc/PPCPredicates.h"
34 #include "PPCTargetMachine.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/Statistic.h"
37 #include "llvm/CodeGen/MachineDominators.h"
38 #include "llvm/CodeGen/MachineFunction.h"
39 #include "llvm/CodeGen/MachineFunctionPass.h"
40 #include "llvm/CodeGen/MachineInstrBuilder.h"
41 #include "llvm/CodeGen/MachineLoopInfo.h"
42 #include "llvm/CodeGen/MachineRegisterInfo.h"
43 #include "llvm/CodeGen/Passes.h"
44 #include "llvm/CodeGen/RegisterScavenging.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/PassSupport.h"
47 #include "llvm/Support/Debug.h"
48 #include "llvm/Support/raw_ostream.h"
49 #include "llvm/Target/TargetInstrInfo.h"
48
5049 #include
50 #include
5151
5252 using namespace llvm;
53
54 #ifndef NDEBUG
55 static cl::opt CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
56 #endif
5357
5458 STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
5559
5862 }
5963
6064 namespace {
61 class CountValue;
62 struct PPCCTRLoops : public MachineFunctionPass {
63 MachineLoopInfo *MLI;
64 MachineRegisterInfo *MRI;
65 const TargetInstrInfo *TII;
65 struct PPCCTRLoops : public FunctionPass {
66
67 #ifndef NDEBUG
68 static int Counter;
69 #endif
6670
6771 public:
68 static char ID; // Pass identification, replacement for typeid
69
70 PPCCTRLoops() : MachineFunctionPass(ID) {
72 static char ID;
73
74 PPCCTRLoops() : FunctionPass(ID), TM(0) {
7175 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
7276 }
73
74 virtual bool runOnMachineFunction(MachineFunction &MF);
75
76 const char *getPassName() const { return "PPC CTR Loops"; }
77 PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
78 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
79 }
80
81 virtual bool runOnFunction(Function &F);
7782
7883 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
79 AU.setPreservesCFG();
80 AU.addRequired();
81 AU.addPreserved();
82 AU.addRequired();
83 AU.addPreserved();
84 MachineFunctionPass::getAnalysisUsage(AU);
84 AU.addRequired();
85 AU.addPreserved();
86 AU.addRequired();
87 AU.addPreserved();
88 AU.addRequired();
8589 }
8690
8791 private:
88 /// getCanonicalInductionVariable - Check to see if the loop has a canonical
89 /// induction variable.
90 /// Should be defined in MachineLoop. Based upon version in class Loop.
91 void getCanonicalInductionVariable(MachineLoop *L,
92 SmallVector &IVars,
93 SmallVector &IOps) const;
94
95 /// getTripCount - Return a loop-invariant LLVM register indicating the
96 /// number of times the loop will be executed. If the trip-count cannot
97 /// be determined, this return null.
98 CountValue *getTripCount(MachineLoop *L,
99 SmallVector &OldInsts) const;
100
101 /// isInductionOperation - Return true if the instruction matches the
102 /// pattern for an opertion that defines an induction variable.
103 bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
104
105 /// isInvalidOperation - Return true if the instruction is not valid within
106 /// a CTR loop.
107 bool isInvalidLoopOperation(const MachineInstr *MI) const;
108
109 /// containsInavlidInstruction - Return true if the loop contains an
110 /// instruction that inhibits using the CTR loop.
111 bool containsInvalidInstruction(MachineLoop *L) const;
112
113 /// converToCTRLoop - Given a loop, check if we can convert it to a
114 /// CTR loop. If so, then perform the conversion and return true.
115 bool convertToCTRLoop(MachineLoop *L);
116
117 /// isDead - Return true if the instruction is now dead.
118 bool isDead(const MachineInstr *MI,
119 SmallVector &DeadPhis) const;
120
121 /// removeIfDead - Remove the instruction if it is now dead.
122 void removeIfDead(MachineInstr *MI);
92 // FIXME: Copied from LoopSimplify.
93 BasicBlock *InsertPreheaderForLoop(Loop *L);
94 void PlaceSplitBlockCarefully(BasicBlock *NewBB,
95 SmallVectorImpl &SplitPreds,
96 Loop *L);
97
98 bool convertToCTRLoop(Loop *L);
99 private:
100 PPCTargetMachine *TM;
101 LoopInfo *LI;
102 ScalarEvolution *SE;
103 DataLayout *TD;
104 DominatorTree *DT;
105 const TargetLibraryInfo *LibInfo;
123106 };
124107
125108 char PPCCTRLoops::ID = 0;
126
127
128 // CountValue class - Abstraction for a trip count of a loop. A
129 // smaller vesrsion of the MachineOperand class without the concerns
130 // of changing the operand representation.
131 class CountValue {
132 public:
133 enum CountValueType {
134 CV_Register,
135 CV_Immediate
136 };
137 private:
138 CountValueType Kind;
139 union Values {
140 unsigned RegNum;
141 int64_t ImmVal;
142 Values(unsigned r) : RegNum(r) {}
143 Values(int64_t i) : ImmVal(i) {}
144 } Contents;
145 bool isNegative;
146
147 public:
148 CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
149 isNegative(neg) {}
150 explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
151 isNegative(i < 0) {}
152 CountValueType getType() const { return Kind; }
153 bool isReg() const { return Kind == CV_Register; }
154 bool isImm() const { return Kind == CV_Immediate; }
155 bool isNeg() const { return isNegative; }
156
157 unsigned getReg() const {
158 assert(isReg() && "Wrong CountValue accessor");
159 return Contents.RegNum;
160 }
161 void setReg(unsigned Val) {
162 Contents.RegNum = Val;
163 }
164 int64_t getImm() const {
165 assert(isImm() && "Wrong CountValue accessor");
166 if (isNegative) {
167 return -Contents.ImmVal;
168 }
169 return Contents.ImmVal;
170 }
171 void setImm(int64_t Val) {
172 Contents.ImmVal = Val;
173 }
174
175 void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
176 if (isReg()) { OS << PrintReg(getReg()); }
177 if (isImm()) { OS << getImm(); }
178 }
179 };
109 #ifndef NDEBUG
110 int PPCCTRLoops::Counter = 0;
111 #endif
180112 } // end anonymous namespace
181113
182114 INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
183115 false, false)
184 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
185 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
116 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
117 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
118 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
186119 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
187120 false, false)
188121
189 /// isCompareEquals - Returns true if the instruction is a compare equals
190 /// instruction with an immediate operand.
191 static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
192 bool &Int64Cmp) {
193 if (MI->getOpcode() == PPC::CMPWI) {
194 SignedCmp = true;
195 Int64Cmp = false;
196 return true;
197 } else if (MI->getOpcode() == PPC::CMPDI) {
198 SignedCmp = true;
199 Int64Cmp = true;
200 return true;
201 } else if (MI->getOpcode() == PPC::CMPLWI) {
202 SignedCmp = false;
203 Int64Cmp = false;
204 return true;
205 } else if (MI->getOpcode() == PPC::CMPLDI) {
206 SignedCmp = false;
207 Int64Cmp = true;
208 return true;
209 }
210
211 return false;
122 FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
123 return new PPCCTRLoops(TM);
212124 }
213125
214
215 /// createPPCCTRLoops - Factory for creating
216 /// the CTR loop phase.
217 FunctionPass *llvm::createPPCCTRLoops() {
218 return new PPCCTRLoops();
126 bool PPCCTRLoops::runOnFunction(Function &F) {
127 LI = &getAnalysis();
128 SE = &getAnalysis();
129 DT = &getAnalysis();
130 TD = getAnalysisIfAvailable();
131 LibInfo = getAnalysisIfAvailable();
132
133 bool MadeChange = false;
134
135 for (LoopInfo::iterator I = LI->begin(), E = LI->end();
136 I != E; ++I) {
137 Loop *L = *I;
138 if (!L->getParentLoop())
139 MadeChange |= convertToCTRLoop(L);
140 }
141
142 return MadeChange;
219143 }
220144
221
222 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
223 DEBUG(dbgs() << "********* PPC CTR Loops *********\n");
224
225 bool Changed = false;
226
227 // get the loop information
228 MLI = &getAnalysis();
229 // get the register information
230 MRI = &MF.getRegInfo();
231 // the target specific instructio info.
232 TII = MF.getTarget().getInstrInfo();
233
234 for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
235 I != E; ++I) {
236 MachineLoop *L = *I;
237 if (!L->getParentLoop()) {
238 Changed |= convertToCTRLoop(L);
239 }
240 }
241
242 return Changed;
243 }
244
245 /// getCanonicalInductionVariable - Check to see if the loop has a canonical
246 /// induction variable. We check for a simple recurrence pattern - an
247 /// integer recurrence that decrements by one each time through the loop and
248 /// ends at zero. If so, return the phi node that corresponds to it.
249 ///
250 /// Based upon the similar code in LoopInfo except this code is specific to
251 /// the machine.
252 /// This method assumes that the IndVarSimplify pass has been run by 'opt'.
253 ///
254 void
255 PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L,
256 SmallVector &IVars,
257 SmallVector &IOps) const {
258 MachineBasicBlock *TopMBB = L->getTopBlock();
259 MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
260 assert(PI != TopMBB->pred_end() &&
261 "Loop must have more than one incoming edge!");
262 MachineBasicBlock *Backedge = *PI++;
263 if (PI == TopMBB->pred_end()) return; // dead loop
264 MachineBasicBlock *Incoming = *PI++;
265 if (PI != TopMBB->pred_end()) return; // multiple backedges?
266
267 // make sure there is one incoming and one backedge and determine which
268 // is which.
269 if (L->contains(Incoming)) {
270 if (L->contains(Backedge))
271 return;
272 std::swap(Incoming, Backedge);
273 } else if (!L->contains(Backedge))
274 return;
275
276 // Loop over all of the PHI nodes, looking for a canonical induction variable:
277 // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
278 // - The recurrence comes from the backedge.
279 // - the definition is an induction operatio.n
280 for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
281 I != E && I->isPHI(); ++I) {
282 MachineInstr *MPhi = &*I;
283 unsigned DefReg = MPhi->getOperand(0).getReg();
284 for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
285 // Check each operand for the value from the backedge.
286 MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
287 if (L->contains(MBB)) { // operands comes from the backedge
288 // Check if the definition is an induction operation.
289 MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
290 if (isInductionOperation(DI, DefReg)) {
291 IOps.push_back(DI);
292 IVars.push_back(MPhi);
293 }
294 }
295 }
296 }
297 return;
298 }
299
300 /// getTripCount - Return a loop-invariant LLVM value indicating the
301 /// number of times the loop will be executed. The trip count can
302 /// be either a register or a constant value. If the trip-count
303 /// cannot be determined, this returns null.
304 ///
305 /// We find the trip count from the phi instruction that defines the
306 /// induction variable. We follow the links to the CMP instruction
307 /// to get the trip count.
308 ///
309 /// Based upon getTripCount in LoopInfo.
310 ///
311 CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
312 SmallVector &OldInsts) const {
313 MachineBasicBlock *LastMBB = L->getExitingBlock();
314 // Don't generate a CTR loop if the loop has more than one exit.
315 if (LastMBB == 0)
316 return 0;
317
318 MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
319 if (LastI->getOpcode() != PPC::BCC)
320 return 0;
321
322 // We need to make sure that this compare is defining the condition
323 // register actually used by the terminating branch.
324
325 unsigned PredReg = LastI->getOperand(1).getReg();
326 DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI);
327
328 unsigned PredCond = LastI->getOperand(0).getImm();
329 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
330 return 0;
331
332 // Check that the loop has a induction variable.
333 SmallVector IVars, IOps;
334 getCanonicalInductionVariable(L, IVars, IOps);
335 for (unsigned i = 0; i < IVars.size(); ++i) {
336 MachineInstr *IOp = IOps[i];
337 MachineInstr *IV_Inst = IVars[i];
338
339 // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
340 // if Imm is 0, get the count from the PHI opnd
341 // if Imm is -M, than M is the count
342 // Otherwise, Imm is the count
343 MachineOperand *IV_Opnd;
344 const MachineOperand *InitialValue;
345 if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
346 InitialValue = &IV_Inst->getOperand(1);
347 IV_Opnd = &IV_Inst->getOperand(3);
348 } else {
349 InitialValue = &IV_Inst->getOperand(3);
350 IV_Opnd = &IV_Inst->getOperand(1);
351 }
352
353 DEBUG(dbgs() << "Considering:\n");
354 DEBUG(dbgs() << " induction operation: " << *IOp);
355 DEBUG(dbgs() << " induction variable: " << *IV_Inst);
356 DEBUG(dbgs() << " initial value: " << *InitialValue << "\n");
357
358 // Look for the cmp instruction to determine if we
359 // can get a useful trip count. The trip count can
360 // be either a register or an immediate. The location
361 // of the value depends upon the type (reg or imm).
362 for (MachineRegisterInfo::reg_iterator
363 RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
364 RI != RE; ++RI) {
365 IV_Opnd = &RI.getOperand();
366 bool SignedCmp, Int64Cmp;
367 MachineInstr *MI = IV_Opnd->getParent();
368 if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
369 MI->getOperand(0).getReg() == PredReg) {
370
371 OldInsts.push_back(MI);
372 OldInsts.push_back(IOp);
373
374 DEBUG(dbgs() << " compare: " << *MI);
375
376 const MachineOperand &MO = MI->getOperand(2);
377 assert(MO.isImm() && "IV Cmp Operand should be an immediate");
378
379 int64_t ImmVal;
380 if (SignedCmp)
381 ImmVal = (short) MO.getImm();
382 else
383 ImmVal = MO.getImm();
384
385 const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
386 assert(L->contains(IV_DefInstr->getParent()) &&
387 "IV definition should occurs in loop");
388 int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm();
389
390 assert(InitialValue->isReg() && "Expecting register for init value");
391 unsigned InitialValueReg = InitialValue->getReg();
392
393 MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
394
395 // Here we need to look for an immediate load (an li or lis/ori pair).
396 if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
397 DefInstr->getOpcode() == PPC::ORI)) {
398 int64_t start = DefInstr->getOperand(2).getImm();
399 MachineInstr *DefInstr2 =
400 MRI->getVRegDef(DefInstr->getOperand(1).getReg());
401 if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
402 DefInstr2->getOpcode() == PPC::LIS)) {
403 DEBUG(dbgs() << " initial constant: " << *DefInstr);
404 DEBUG(dbgs() << " initial constant: " << *DefInstr2);
405
406 start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16;
407
408 int64_t count = ImmVal - start;
409 if ((count % iv_value) != 0) {
410 return 0;
411 }
412
413 OldInsts.push_back(DefInstr);
414 OldInsts.push_back(DefInstr2);
415
416 // count/iv_value, the trip count, should be positive here. If it
417 // is negative, that indicates that the counter will wrap.
418 if (Int64Cmp)
419 return new CountValue(count/iv_value);
420 else
421 return new CountValue(uint32_t(count/iv_value));
422 }
423 } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
424 DefInstr->getOpcode() == PPC::LI)) {
425 DEBUG(dbgs() << " initial constant: " << *DefInstr);
426
427 int64_t count = ImmVal -
428 int64_t(short(DefInstr->getOperand(1).getImm()));
429 if ((count % iv_value) != 0) {
430 return 0;
431 }
432
433 OldInsts.push_back(DefInstr);
434
435 if (Int64Cmp)
436 return new CountValue(count/iv_value);
437 else
438 return new CountValue(uint32_t(count/iv_value));
439 } else if (iv_value == 1 || iv_value == -1) {
440 // We can't determine a constant starting value.
441 if (ImmVal == 0) {
442 return new CountValue(InitialValueReg, iv_value > 0);
443 }
444 // FIXME: handle non-zero end value.
445 }
446 // FIXME: handle non-unit increments (we might not want to introduce
447 // division but we can handle some 2^n cases with shifts).
448
449 }
450 }
451 }
452 return 0;
453 }
454
455 /// isInductionOperation - return true if the operation is matches the
456 /// pattern that defines an induction variable:
457 /// addi iv, c
458 ///
459 bool
460 PPCCTRLoops::isInductionOperation(const MachineInstr *MI,
461 unsigned IVReg) const {
462 return ((MI->getOpcode() == PPC::ADDI || MI->getOpcode() == PPC::ADDI8) &&
463 MI->getOperand(1).isReg() && // could be a frame index instead
464 MI->getOperand(1).getReg() == IVReg);
465 }
466
467 /// isInvalidOperation - Return true if the operation is invalid within
468 /// CTR loop.
469 bool
470 PPCCTRLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
471
472 // call is not allowed because the callee may use a CTR loop
473 if (MI->getDesc().isCall()) {
474 return true;
475 }
476 // check if the instruction defines a CTR loop register
477 // (this will also catch nested CTR loops)
478 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
479 const MachineOperand &MO = MI->getOperand(i);
480 if (MO.isReg() && MO.isDef() &&
481 (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) {
482 return true;
483 }
484 }
485 return false;
486 }
487
488 /// containsInvalidInstruction - Return true if the loop contains
489 /// an instruction that inhibits the use of the CTR loop function.
490 ///
491 bool PPCCTRLoops::containsInvalidInstruction(MachineLoop *L) const {
492 const std::vector Blocks = L->getBlocks();
493 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
494 MachineBasicBlock *MBB = Blocks[i];
495 for (MachineBasicBlock::iterator
496 MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
497 const MachineInstr *MI = &*MII;
498 if (isInvalidLoopOperation(MI)) {
499 return true;
500 }
501 }
502 }
503 return false;
504 }
505
506 /// isDead returns true if the instruction is dead
507 /// (this was essentially copied from DeadMachineInstructionElim::isDead, but
508 /// with special cases for inline asm, physical registers and instructions with
509 /// side effects removed)
510 bool PPCCTRLoops::isDead(const MachineInstr *MI,
511 SmallVector &DeadPhis) const {
512 // Examine each operand.
513 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
514 const MachineOperand &MO = MI->getOperand(i);
515 if (MO.isReg() && MO.isDef()) {
516 unsigned Reg = MO.getReg();
517 if (!MRI->use_nodbg_empty(Reg)) {
518 // This instruction has users, but if the only user is the phi node for
519 // the parent block, and the only use of that phi node is this
520 // instruction, then this instruction is dead: both it (and the phi
521 // node) can be removed.
522 MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
523 if (llvm::next(I) == MRI->use_end() &&
524 I.getOperand().getParent()->isPHI()) {
525 MachineInstr *OnePhi = I.getOperand().getParent();
526
527 for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
528 const MachineOperand &OPO = OnePhi->getOperand(j);
529 if (OPO.isReg() && OPO.isDef()) {
530 unsigned OPReg = OPO.getReg();
531
532 MachineRegisterInfo::use_iterator nextJ;
533 for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg),
534 E = MRI->use_end(); J!=E; J=nextJ) {
535 nextJ = llvm::next(J);
536 MachineOperand& Use = J.getOperand();
537 MachineInstr *UseMI = Use.getParent();
538
539 if (MI != UseMI) {
540 // The phi node has a user that is not MI, bail...
541 return false;
542 }
543 }
145 bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
146 bool MadeChange = false;
147
148 Triple TT = Triple(L->getHeader()->getParent()->getParent()->
149 getTargetTriple());
150 if (!TT.isArch32Bit() && !TT.isArch64Bit())
151 return MadeChange; // Unknown arch. type.
152
153 // Process nested loops first.
154 for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
155 MadeChange |= convertToCTRLoop(*I);
156 }
157
158 // If a nested loop has been converted, then we can't convert this loop.
159 if (MadeChange)
160 return MadeChange;
161
162 #ifndef NDEBUG
163 // Stop trying after reaching the limit (if any).
164 int Limit = CTRLoopLimit;
165 if (Limit >= 0) {
166 if (Counter >= CTRLoopLimit)
167 return false;
168 Counter++;
169 }
170 #endif
171
172 // We don't want to spill/restore the counter register, and so we don't
173 // want to use the counter register if the loop contains calls.
174 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
175 I != IE; ++I) {
176 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
177 J != JE; ++J) {
178 if (CallInst *CI = dyn_cast(J)) {
179 if (!TM)
180 return MadeChange;
181 const TargetLowering *TLI = TM->getTargetLowering();
182
183 if (Function *F = CI->getCalledFunction()) {
184 // Most intrinsics don't become function calls, but some might.
185 // sin, cos, exp and log are always calls.
186 unsigned Opcode;
187 if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
188 switch (F->getIntrinsicID()) {
189 default: continue;
190 case Intrinsic::setjmp:
191 case Intrinsic::longjmp:
192 case Intrinsic::memcpy:
193 case Intrinsic::memmove:
194 case Intrinsic::memset:
195 case Intrinsic::powi:
196 case Intrinsic::log:
197 case Intrinsic::log2:
198 case Intrinsic::log10:
199 case Intrinsic::exp:
200 case Intrinsic::exp2:
201 case Intrinsic::pow:
202 case Intrinsic::sin:
203 case Intrinsic::cos:
204 return MadeChange;
205 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
206 case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
207 case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
208 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
209 case Intrinsic::rint: Opcode = ISD::FRINT; break;
210 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
544211 }
545212 }
546213
547 DeadPhis.push_back(OnePhi);
548 } else {
549 // This def has a non-debug use. Don't delete the instruction!
550 return false;
214 // PowerPC does not use [US]DIVREM or other library calls for
215 // operations on regular types which are not otherwise library calls
216 // (i.e. soft float or atomics). If adapting for targets that do,
217 // additional care is required here.
218
219 LibFunc::Func Func;
220 if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
221 LibInfo->getLibFunc(F->getName(), Func) &&
222 LibInfo->hasOptimizedCodeGen(Func)) {
223 // Non-read-only functions are never treated as intrinsics.
224 if (!CI->onlyReadsMemory())
225 return MadeChange;
226
227 // Conversion happens only for FP calls.
228 if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
229 return MadeChange;
230
231 switch (Func) {
232 default: return MadeChange;
233 case LibFunc::copysign:
234 case LibFunc::copysignf:
235 case LibFunc::copysignl:
236 continue; // ISD::FCOPYSIGN is never a library call.
237 case LibFunc::fabs:
238 case LibFunc::fabsf:
239 case LibFunc::fabsl:
240 continue; // ISD::FABS is never a library call.
241 case LibFunc::sqrt:
242 case LibFunc::sqrtf:
243 case LibFunc::sqrtl:
244 Opcode = ISD::FSQRT; break;
245 case LibFunc::floor:
246 case LibFunc::floorf:
247 case LibFunc::floorl:
248 Opcode = ISD::FFLOOR; break;
249 case LibFunc::nearbyint:
250 case LibFunc::nearbyintf:
251 case LibFunc::nearbyintl:
252 Opcode = ISD::FNEARBYINT; break;
253 case LibFunc::ceil:
254 case LibFunc::ceilf:
255 case LibFunc::ceill:
256 Opcode = ISD::FCEIL; break;
257 case LibFunc::rint:
258 case LibFunc::rintf:
259 case LibFunc::rintl:
260 Opcode = ISD::FRINT; break;
261 case LibFunc::trunc:
262 case LibFunc::truncf:
263 case LibFunc::truncl:
264 Opcode = ISD::FTRUNC; break;
265 }
266
267 MVT VTy =
268 TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
269 if (VTy == MVT::Other)
270 return MadeChange;
271
272 if (TLI->isOperationLegalOrCustom(Opcode, VTy))
273 continue;
274 else if (VTy.isVector() &&
275 TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
276 continue;
277
278 return MadeChange;
279 }
551280 }
281
282 return MadeChange;
283 } else if (isa(J) &&
284 J->getType()->getScalarType()->isPPC_FP128Ty()) {
285 // Most operations on ppc_f128 values become calls.
286 return MadeChange;
287 } else if (isa(J) || isa(J) ||
288 isa(J) || isa(J)) {
289 CastInst *CI = cast(J);
290 if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
291 CI->getDestTy()->getScalarType()->isPPC_FP128Ty())
292 return MadeChange;
293 } else if (isa(J) || isa(J)) {
294 // On PowerPC, indirect jumps use the counter register.
295 return MadeChange;
296 } else if (SwitchInst *SI = dyn_cast(J)) {
297 if (!TM)
298 return MadeChange;
299 const TargetLowering *TLI = TM->getTargetLowering();
300
301 if (TLI->supportJumpTables() &&
302 SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
303 return MadeChange;
552304 }
553305 }
554306 }
555307
556 // If there are no defs with uses, the instruction is dead.
557 return true;
308 SmallVector ExitingBlocks;
309 L->getExitingBlocks(ExitingBlocks);
310
311 BasicBlock *CountedExitBlock = 0;
312 const SCEV *ExitCount = 0;
313 BranchInst *CountedExitBranch = 0;
314 for (SmallVector::iterator I = ExitingBlocks.begin(),
315 IE = ExitingBlocks.end(); I != IE; ++I) {
316 const SCEV *EC = SE->getExitCount(L, *I);
317 DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
318 (*I)->getName() << ": " << *EC << "\n");
319 if (isa(EC))
320 continue;
321 if (const SCEVConstant *ConstEC = dyn_cast(EC)) {
322 if (ConstEC->getValue()->isZero())
323 continue;
324 } else if (!SE->isLoopInvariant(EC, L))
325 continue;
326
327 // We now have a loop-invariant count of loop iterations (which is not the
328 // constant zero) for which we know that this loop will not exit via this
329 // exisiting block.
330
331 // We need to make sure that this block will run on every loop iteration.
332 // For this to be true, we must dominate all blocks with backedges. Such
333 // blocks are in-loop predecessors to the header block.
334 bool NotAlways = false;
335 for (pred_iterator PI = pred_begin(L->getHeader()),
336 PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
337 if (!L->contains(*PI))
338 continue;
339
340 if (!DT->dominates(*I, *PI)) {
341 NotAlways = true;
342 break;
343 }
344 }
345
346 if (NotAlways)
347 continue;
348
349 // Make sure this blocks ends with a conditional branch.
350 Instruction *TI = (*I)->getTerminator();
351 if (!TI)
352 continue;
353
354 if (BranchInst *BI = dyn_cast(TI)) {
355 if (!BI->isConditional())
356 continue;
357
358 CountedExitBranch = BI;
359 } else
360 continue;
361
362 // Note that this block may not be the loop latch block, even if the loop
363 // has a latch block.
364 CountedExitBlock = *I;
365 ExitCount = EC;
366 break;
367 }
368
369 if (!CountedExitBlock)
370 return MadeChange;
371
372 BasicBlock *Preheader = L->getLoopPreheader();
373 if (!Preheader)
374 Preheader = InsertPreheaderForLoop(L);
375 if (!Preheader)
376 return MadeChange;
377
378 DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
379
380 // Insert the count into the preheader and replace the condition used by the
381 // selected branch.
382 MadeChange = true;
383
384 SCEVExpander SCEVE(*SE, "loopcnt");
385 LLVMContext &C = SE->getContext();
386 Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
387 Type::getInt32Ty(C);
388 if (!ExitCount->getType()->isPointerTy() &&
389 ExitCount->getType() != CountType)
390 ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
391 ExitCount = SE->getAddExpr(ExitCount,
392 SE->getConstant(CountType, 1));
393 Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
394 Preheader->getTerminator());
395
396 IRBuilder<> CountBuilder(Preheader->getTerminator());
397 Module *M = Preheader->getParent()->getParent();
398 Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
399 CountType);
400 CountBuilder.CreateCall(MTCTRFunc, ECValue);
401
402 IRBuilder<> CondBuilder(CountedExitBranch);
403 Value *DecFunc =
404 Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
405 Value *NewCond = CondBuilder.CreateCall(DecFunc);
406 Value *OldCond = CountedExitBranch->getCondition();
407 CountedExitBranch->setCondition(NewCond);
408
409 // The false branch must exit the loop.
410 if (!L->contains(CountedExitBranch->getSuccessor(0)))
411 CountedExitBranch->swapSuccessors();
412
413 // The old condition may be dead now, and may have even created a dead PHI
414 // (the original induction variable).
415 RecursivelyDeleteTriviallyDeadInstructions(OldCond);
416 DeleteDeadPHIs(CountedExitBlock);
417
418 ++NumCTRLoops;
419 return MadeChange;
558420 }
559421
560 void PPCCTRLoops::removeIfDead(MachineInstr *MI) {
561 // This procedure was essentially copied from DeadMachineInstructionElim
562
563 SmallVector DeadPhis;
564 if (isDead(MI, DeadPhis)) {
565 DEBUG(dbgs() << "CTR looping will remove: " << *MI);
566
567 // It is possible that some DBG_VALUE instructions refer to this
568 // instruction. Examine each def operand for such references;
569 // if found, mark the DBG_VALUE as undef (but don't delete it).
570 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
571 const MachineOperand &MO = MI->getOperand(i);
572 if (!MO.isReg() || !MO.isDef())
573 continue;
574 unsigned Reg = MO.getReg();
575 MachineRegisterInfo::use_iterator nextI;
576 for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
577 E = MRI->use_end(); I!=E; I=nextI) {
578 nextI = llvm::next(I); // I is invalidated by the setReg
579 MachineOperand& Use = I.getOperand();
580 MachineInstr *UseMI = Use.getParent();
581 if (UseMI==MI)
582 continue;
583 if (Use.isDebug()) // this might also be a instr -> phi -> instr case
584 // which can also be removed.
585 UseMI->getOperand(0).setReg(0U);
586 }
587 }
588
589 MI->eraseFromParent();
590 for (unsigned i = 0; i < DeadPhis.size(); ++i) {
591 DeadPhis[i]->eraseFromParent();
592 }
593 }
422 // FIXME: Copied from LoopSimplify.
423 BasicBlock *PPCCTRLoops::InsertPreheaderForLoop(Loop *L) {
424 BasicBlock *Header = L->getHeader();
425
426 // Compute the set of predecessors of the loop that are not in the loop.
427 SmallVector OutsideBlocks;
428 for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
429 PI != PE; ++PI) {
430 BasicBlock *P = *PI;
431 if (!L->contains(P)) { // Coming in from outside the loop?
432 // If the loop is branched to from an indirect branch, we won't
433 // be able to fully transform the loop, because it prohibits
434 // edge splitting.
435 if (isa(P->getTerminator())) return 0;
436
437 // Keep track of it.
438 OutsideBlocks.push_back(P);
439 }
440 }
441
442 // Split out the loop pre-header.
443 BasicBlock *PreheaderBB;
444 if (!Header->isLandingPad()) {
445 PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
446 this);
447 } else {
448 SmallVector NewBBs;
449 SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
450 ".split-lp", this, NewBBs);
451 PreheaderBB = NewBBs[0];
452 }
453
454 PreheaderBB->getTerminator()->setDebugLoc(
455 Header->getFirstNonPHI()->getDebugLoc());
456 DEBUG(dbgs() << "Creating pre-header "
457 << PreheaderBB->getName() << "\n");
458
459 // Make sure that NewBB is put someplace intelligent, which doesn't mess up
460 // code layout too horribly.
461 PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
462
463 return PreheaderBB;
594464 }
595465
596 /// converToCTRLoop - check if the loop is a candidate for
597 /// converting to a CTR loop. If so, then perform the
598 /// transformation.
599 ///
600 /// This function works on innermost loops first. A loop can
601 /// be converted if it is a counting loop; either a register
602 /// value or an immediate.
603 ///
604 /// The code makes several assumptions about the representation
605 /// of the loop in llvm.
606 bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
607 bool Changed = false;
608 // Process nested loops first.
609 for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
610 Changed |= convertToCTRLoop(*I);
611 }
612 // If a nested loop has been converted, then we can't convert this loop.
613 if (Changed) {
614 return Changed;
615 }
616
617 SmallVector OldInsts;
618 // Are we able to determine the trip count for the loop?
619 CountValue *TripCount = getTripCount(L, OldInsts);
620 if (TripCount == 0) {
621 DEBUG(dbgs() << "failed to get trip count!\n");
622 return false;
623 }
624
625 if (TripCount->isImm()) {
626 DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
627
628 // FIXME: We currently can't form 64-bit constants
629 // (including 32-bit unsigned constants)
630 if (!isInt<32>(TripCount->getImm()))
631 return false;
632 }
633
634 // Does the loop contain any invalid instructions?
635 if (containsInvalidInstruction(L)) {
636 return false;
637 }
638 MachineBasicBlock *Preheader = L->getLoopPreheader();
639 // No preheader means there's not place for the loop instr.
640 if (Preheader == 0) {
641 return false;
642 }
643 MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
644
645 DebugLoc dl;
646 if (InsertPos != Preheader->end())
647 dl = InsertPos->getDebugLoc();
648
649 MachineBasicBlock *LastMBB = L->getExitingBlock();
650 // Don't generate CTR loop if the loop has more than one exit.
651 if (LastMBB == 0) {
652 return false;
653 }
654 MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
655
656 // Determine the loop start.
657 MachineBasicBlock *LoopStart = L->getTopBlock();
658 if (L->getLoopLatch() != LastMBB) {
659 // When the exit and latch are not the same, use the latch block as the
660 // start.
661 // The loop start address is used only after the 1st iteration, and the loop
662 // latch may contains instrs. that need to be executed after the 1st iter.
663 LoopStart = L->getLoopLatch();
664 // Make sure the latch is a successor of the exit, otherwise it won't work.
665 if (!LastMBB->isSuccessor(LoopStart)) {
666 return false;
667 }
668 }
669
670 // Convert the loop to a CTR loop
671 DEBUG(dbgs() << "Change to CTR loop at "; L->dump());
672
673 MachineFunction *MF = LastMBB->getParent();
674 const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget();
675 bool isPPC64 = Subtarget.isPPC64();
676
677 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
678 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
679 const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
680
681 unsigned CountReg;
682 if (TripCount->isReg()) {
683 // Create a copy of the loop count register.
684 const TargetRegisterClass *SrcRC =
685 MF->getRegInfo().getRegClass(TripCount->getReg());
686 CountReg = MF->getRegInfo().createVirtualRegister(RC);
687 unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
688 (unsigned) PPC::EXTSW_32_64 :
689 (unsigned) TargetOpcode::COPY;
690 BuildMI(*Preheader, InsertPos, dl,
691 TII->get(CopyOp), CountReg).addReg(TripCount->getReg());
692 if (TripCount->isNeg()) {
693 unsigned CountReg1 = CountReg;
694 CountReg = MF->getRegInfo().createVirtualRegister(RC);
695 BuildMI(*Preheader, InsertPos, dl,
696 TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG),
697 CountReg).addReg(CountReg1);
698 }
699 } else {
700 assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
701 // Put the trip count in a register for transfer into the count register.
702
703 int64_t CountImm = TripCount->getImm();
704 if (TripCount->isNeg())
705 CountImm = -CountImm;
706
707 CountReg = MF->getRegInfo().createVirtualRegister(RC);
708 if (abs64(CountImm) > 0x7FFF) {
709 BuildMI(*Preheader, InsertPos, dl,
710 TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
711 CountReg).addImm((CountImm >> 16) & 0xFFFF);
712 unsigned CountReg1 = CountReg;
713 CountReg = MF->getRegInfo().createVirtualRegister(RC);
714 BuildMI(*Preheader, InsertPos, dl,
715 TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
716 CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF);
717 } else {
718 BuildMI(*Preheader, InsertPos, dl,
719 TII->get(isPPC64 ? PPC::LI8 : PPC::LI),
720 CountReg).addImm(CountImm);
721 }
722 }
723
724 // Add the mtctr instruction to the beginning of the loop.
725 BuildMI(*Preheader, InsertPos, dl,
726 TII->get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(CountReg,
727 TripCount->isImm() ? RegState::Kill : 0);
728
729 // Make sure the loop start always has a reference in the CFG. We need to
730 // create a BlockAddress operand to get this mechanism to work both the
731 // MachineBasicBlock and BasicBlock objects need the flag set.
732 LoopStart->setHasAddressTaken();
733 // This line is needed to set the hasAddressTaken flag on the BasicBlock
734 // object
735 BlockAddress::get(const_cast(LoopStart->getBasicBlock()));
736
737 // Replace the loop branch with a bdnz instruction.
738 dl = LastI->getDebugLoc();
739 const std::vector Blocks = L->getBlocks();
740 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
741 MachineBasicBlock *MBB = Blocks[i];
742 if (MBB != Preheader)
743 MBB->addLiveIn(isPPC64 ? PPC::CTR8 : PPC::CTR);
744 }
745
746 // The loop ends with either:
747 // - a conditional branch followed by an unconditional branch, or
748 // - a conditional branch to the loop start.
749 assert(LastI->getOpcode() == PPC::BCC &&
750 "loop end must start with a BCC instruction");
751 // Either the BCC branches to the beginning of the loop, or it
752 // branches out of the loop and there is an unconditional branch
753 // to the start of the loop.
754 MachineBasicBlock *BranchTarget = LastI->getOperand(2).getMBB();
755 BuildMI(*LastMBB, LastI, dl,
756 TII->get((BranchTarget == LoopStart) ?
757 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
758 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget);
759
760 // Conditional branch; just delete it.
761 DEBUG(dbgs() << "Removing old branch: " << *LastI);
762 LastMBB->erase(LastI);
763
764 delete TripCount;
765
766 // The induction operation (add) and the comparison (cmpwi) may now be
767 // unneeded. If these are unneeded, then remove them.
768 for (unsigned i = 0; i < OldInsts.size(); ++i)
769 removeIfDead(OldInsts[i]);
770
771 ++NumCTRLoops;
772 return true;
466 void PPCCTRLoops::PlaceSplitBlockCarefully(BasicBlock *NewBB,
467 SmallVectorImpl &SplitPreds,
468 Loop *L) {
469 // Check to see if NewBB is already well placed.
470 Function::iterator BBI = NewBB; --BBI;
471 for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
472 if (&*BBI == SplitPreds[i])
473 return;
474 }
475
476 // If it isn't already after an outside block, move it after one. This is
477 // always good as it makes the uncond branch from the outside block into a
478 // fall-through.
479
480 // Figure out *which* outside block to put this after. Prefer an outside
481 // block that neighbors a BB actually in the loop.
482 BasicBlock *FoundBB = 0;
483 for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
484 Function::iterator BBI = SplitPreds[i];
485 if (++BBI != NewBB->getParent()->end() &&
486 L->contains(BBI)) {
487 FoundBB = SplitPreds[i];
488 break;
489 }
490 }
491
492 // If our heuristic for a *good* bb to place this after doesn't find
493 // anything, just pick something. It's likely better than leaving it within
494 // the loop.
495 if (!FoundBB)
496 FoundBB = SplitPreds[0];
497 NewBB->moveAfter(FoundBB);
773498 }
774499
12411241 getI32Imm(BROpc) };
12421242 return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
12431243 }
1244 case PPCISD::BDNZ:
1245 case PPCISD::BDZ: {
1246 bool IsPPC64 = PPCSubTarget.isPPC64();
1247 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
1248 return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ?
1249 (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1250 (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
1251 MVT::Other, Ops, 2);
1252 }
12441253 case PPCISD::COND_BRANCH: {
12451254 // Op #0 is the Chain.
12461255 // Op #1 is the PPC::PRED_* number.
311311
312312 // We want to custom lower some of our intrinsics.
313313 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
314
315 // To handle counter-based loop conditions.
316 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
314317
315318 // Comparisons that require checking two conditions.
316319 setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
645648 case PPCISD::LARX: return "PPCISD::LARX";
646649 case PPCISD::STCX: return "PPCISD::STCX";
647650 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
651 case PPCISD::BDNZ: return "PPCISD::BDNZ";
652 case PPCISD::BDZ: return "PPCISD::BDZ";
648653 case PPCISD::MFFS: return "PPCISD::MFFS";
649654 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
650655 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
57765781 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
57775782 case ISD::MUL: return LowerMUL(Op, DAG);
57785783
5784 // For counter-based loop handling.
5785 case ISD::INTRINSIC_W_CHAIN: return SDValue();
5786
57795787 // Frame & Return address.
57805788 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
57815789 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
57905798 switch (N->getOpcode()) {
57915799 default:
57925800 llvm_unreachable("Do not know how to custom type legalize this operation!");
5801 case ISD::INTRINSIC_W_CHAIN: {
5802 if (cast(N->getOperand(1))->getZExtValue() !=
5803 Intrinsic::ppc_is_decremented_ctr_nonzero)
5804 break;
5805
5806 assert(N->getValueType(0) == MVT::i1 &&
5807 "Unexpected result type for CTR decrement intrinsic");
5808 EVT SVT = getSetCCResultType(N->getValueType(0));
5809 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
5810 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
5811 N->getOperand(1));
5812
5813 Results.push_back(NewInt);
5814 Results.push_back(NewInt.getValue(1));
5815 break;
5816 }
57935817 case ISD::VAARG: {
57945818 if (!TM.getSubtarget().isSVR4ABI()
57955819 || TM.getSubtarget().isPPC64())
71017125 // compare down to code that is difficult to reassemble.
71027126 ISD::CondCode CC = cast(N->getOperand(1))->get();
71037127 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
7128
7129 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
7130 // value. If so, pass-through the AND to get to the intrinsic.
7131 if (LHS.getOpcode() == ISD::AND &&
7132 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
7133 cast(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
7134 Intrinsic::ppc_is_decremented_ctr_nonzero &&
7135 isa(LHS.getOperand(1)) &&
7136 !cast(LHS.getOperand(1))->getConstantIntValue()->
7137 isZero())
7138 LHS = LHS.getOperand(0);
7139
7140 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
7141 cast(LHS.getOperand(1))->getZExtValue() ==
7142 Intrinsic::ppc_is_decremented_ctr_nonzero &&
7143 isa(RHS)) {
7144 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
7145 "Counter decrement comparison is not EQ or NE");
7146
7147 unsigned Val = cast(RHS)->getZExtValue();
7148 bool isBDNZ = (CC == ISD::SETEQ && Val) ||
7149 (CC == ISD::SETNE && !Val);
7150
7151 // We now need to make the intrinsic dead (it cannot be instruction
7152 // selected).
7153 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
7154 assert(LHS.getNode()->hasOneUse() &&
7155 "Counter decrement has more than one use");
7156
7157 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
7158 N->getOperand(0), N->getOperand(4));
7159 }
7160
71047161 int CompareOpc;
71057162 bool isDot;
71067163
144144 /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
145145 /// an optional input flag argument.
146146 COND_BRANCH,
147
148 /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
149 /// loops.
150 BDNZ, BDZ,
147151
148152 /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
149153 /// towards zero. Used only as part of the long double-to-int
291291 def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
292292 "mtctr $rS", SprMTSPR>,
293293 PPC970_DGroup_First, PPC970_Unit_FXU;
294 }
295 let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR8] in {
296 let Pattern = [(int_ppc_mtctr i64:$rS)] in
297 def MTCTR8se : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
298 "mtctr $rS", SprMTSPR>,
299 PPC970_DGroup_First, PPC970_Unit_FXU;
294300 }
295301
296302 let Pattern = [(set i64:$rT, readcyclecounter)] in
16991699 "mtctr $rS", SprMTSPR>,
17001700 PPC970_DGroup_First, PPC970_Unit_FXU;
17011701 }
1702 let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
1703 let Pattern = [(int_ppc_mtctr i32:$rS)] in
1704 def MTCTRse : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
1705 "mtctr $rS", SprMTSPR>,
1706 PPC970_DGroup_First, PPC970_Unit_FXU;
1707 }
17021708
17031709 let Defs = [LR] in {
17041710 def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
135135 Reserved.set(PPC::FP);
136136 Reserved.set(PPC::FP8);
137137
138 // The counter registers must be reserved so that counter-based loops can
139 // be correctly formed (and the mtctr instructions are not DCE'd).
140 Reserved.set(PPC::CTR);
141 Reserved.set(PPC::CTR8);
142
138143 Reserved.set(PPC::R1);
139144 Reserved.set(PPC::LR);
140145 Reserved.set(PPC::LR8);
9090 return *getPPCTargetMachine().getSubtargetImpl();
9191 }
9292
93 virtual bool addPreRegAlloc();
93 virtual bool addPreISel();
9494 virtual bool addILPOpts();
9595 virtual bool addInstSelector();
9696 virtual bool addPreSched2();
102102 return new PPCPassConfig(this, PM);
103103 }
104104
105 bool PPCPassConfig::addPreRegAlloc() {
105 bool PPCPassConfig::addPreISel() {
106106 if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
107 addPass(createPPCCTRLoops());
107 addPass(createPPCCTRLoops(getPPCTargetMachine()));
108108
109109 return false;
110110 }
0 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
1 target triple = "powerpc64-unknown-linux-gnu"
2 ; RUN: llc < %s -march=ppc64 | FileCheck %s
3
4 ; CHECK: test_pos1_ir_sle
5 ; CHECK: bdnz
6 ; a < b
7 define void @test_pos1_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
8 entry:
9 %cmp3 = icmp sle i32 28395, %b
10 br i1 %cmp3, label %for.body.lr.ph, label %for.end
11
12 for.body.lr.ph: ; preds = %entry
13 br label %for.body
14
15 for.body: ; preds = %for.body.lr.ph, %for.body
16 %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
17 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
18 %0 = load i8* %arrayidx, align 1
19 %conv = zext i8 %0 to i32
20 %add = add nsw i32 %conv, 1
21 %conv1 = trunc i32 %add to i8
22 store i8 %conv1, i8* %arrayidx, align 1
23 %inc = add nsw i32 %i.04, 1
24 %cmp = icmp sle i32 %inc, %b
25 br i1 %cmp, label %for.body, label %for.end
26
27 for.end: ; preds = %for.body, %entry
28 ret void
29 }
30
31
32
33 ; CHECK: test_pos2_ir_sle
34 ; FIXME: Support this loop!
35 ; CHECK-NOT: bdnz
36 ; a < b
37 define void @test_pos2_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
38 entry:
39 %cmp3 = icmp sle i32 9073, %b
40 br i1 %cmp3, label %for.body.lr.ph, label %for.end
41
42 for.body.lr.ph: ; preds = %entry
43 br label %for.body
44
45 for.body: ; preds = %for.body.lr.ph, %for.body
46 %i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
47 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
48 %0 = load i8* %arrayidx, align 1
49 %conv = zext i8 %0 to i32
50 %add = add nsw i32 %conv, 1
51 %conv1 = trunc i32 %add to i8
52 store i8 %conv1, i8* %arrayidx, align 1
53 %inc = add nsw i32 %i.04, 2
54 %cmp = icmp sle i32 %inc, %b
55 br i1 %cmp, label %for.body, label %for.end
56
57 for.end: ; preds = %for.body, %entry
58 ret void
59 }
60
61
62
63 ; CHECK: test_pos4_ir_sle
64 ; FIXME: Support this loop!
65 ; CHECK-NOT: bdnz
66 ; a < b
67 define void @test_pos4_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
68 entry:
69 %cmp3 = icmp sle i32 21956, %b
70 br i1 %cmp3, label %for.body.lr.ph, label %for.end
71
72 for.body.lr.ph: ; preds = %entry
73 br label %for.body
74
75 for.body: ; preds = %for.body.lr.ph, %for.body
76 %i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
77 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
78 %0 = load i8* %arrayidx, align 1
79 %conv = zext i8 %0 to i32
80 %add = add nsw i32 %conv, 1
81 %conv1 = trunc i32 %add to i8
82 store i8 %conv1, i8* %arrayidx, align 1
83 %inc = add nsw i32 %i.04, 4
84 %cmp = icmp sle i32 %inc, %b
85 br i1 %cmp, label %for.body, label %for.end
86
87 for.end: ; preds = %for.body, %entry
88 ret void
89 }
90
91
92
93 ; CHECK: test_pos8_ir_sle
94 ; FIXME: Support this loop!
95 ; CHECK-NOT: bdnz
96 ; a < b
97 define void @test_pos8_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
98 entry:
99 %cmp3 = icmp sle i32 16782, %b
100 br i1 %cmp3, label %for.body.lr.ph, label %for.end
101
102 for.body.lr.ph: ; preds = %entry
103 br label %for.body
104
105 for.body: ; preds = %for.body.lr.ph, %for.body
106 %i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
107 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
108 %0 = load i8* %arrayidx, align 1
109 %conv = zext i8 %0 to i32
110 %add = add nsw i32 %conv, 1
111 %conv1 = trunc i32 %add to i8
112 store i8 %conv1, i8* %arrayidx, align 1
113 %inc = add nsw i32 %i.04, 8
114 %cmp = icmp sle i32 %inc, %b
115 br i1 %cmp, label %for.body, label %for.end
116
117 for.end: ; preds = %for.body, %entry
118 ret void
119 }
120
121
122
123 ; CHECK: test_pos16_ir_sle
124 ; FIXME: Support this loop!
125 ; CHECK-NOT: bdnz
126 ; a < b
127 define void @test_pos16_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
128 entry:
129 %cmp3 = icmp sle i32 19097, %b
130 br i1 %cmp3, label %for.body.lr.ph, label %for.end
131
132 for.body.lr.ph: ; preds = %entry
133 br label %for.body
134
135 for.body: ; preds = %for.body.lr.ph, %for.body
136 %i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
137 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
138 %0 = load i8* %arrayidx, align 1
139 %conv = zext i8 %0 to i32
140 %add = add nsw i32 %conv, 1
141 %conv1 = trunc i32 %add to i8
142 store i8 %conv1, i8* %arrayidx, align 1
143 %inc = add nsw i32 %i.04, 16
144 %cmp = icmp sle i32 %inc, %b
145 br i1 %cmp, label %for.body, label %for.end
146
147 for.end: ; preds = %for.body, %entry
148 ret void
149 }
150
151
152
153 ; CHECK: test_pos1_ri_sle
154 ; CHECK: bdnz
155 ; a < b
156 define void @test_pos1_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
157 entry:
158 %cmp3 = icmp sle i32 %a, 14040
159 br i1 %cmp3, label %for.body.lr.ph, label %for.end
160
161 for.body.lr.ph: ; preds = %entry
162 br label %for.body
163
164 for.body: ; preds = %for.body.lr.ph, %for.body
165 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
166 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
167 %0 = load i8* %arrayidx, align 1
168 %conv = zext i8 %0 to i32
169 %add = add nsw i32 %conv, 1
170 %conv1 = trunc i32 %add to i8
171 store i8 %conv1, i8* %arrayidx, align 1
172 %inc = add nsw i32 %i.04, 1
173 %cmp = icmp sle i32 %inc, 14040
174 br i1 %cmp, label %for.body, label %for.end
175
176 for.end: ; preds = %for.body, %entry
177 ret void
178 }
179
180
181
182 ; CHECK: test_pos2_ri_sle
183 ; CHECK: bdnz
184 ; a < b
185 define void @test_pos2_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
186 entry:
187 %cmp3 = icmp sle i32 %a, 13710
188 br i1 %cmp3, label %for.body.lr.ph, label %for.end
189
190 for.body.lr.ph: ; preds = %entry
191 br label %for.body
192
193 for.body: ; preds = %for.body.lr.ph, %for.body
194 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
195 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
196 %0 = load i8* %arrayidx, align 1
197 %conv = zext i8 %0 to i32
198 %add = add nsw i32 %conv, 1
199 %conv1 = trunc i32 %add to i8
200 store i8 %conv1, i8* %arrayidx, align 1
201 %inc = add nsw i32 %i.04, 2
202 %cmp = icmp sle i32 %inc, 13710
203 br i1 %cmp, label %for.body, label %for.end
204
205 for.end: ; preds = %for.body, %entry
206 ret void
207 }
208
209
210
211 ; CHECK: test_pos4_ri_sle
212 ; CHECK: bdnz
213 ; a < b
214 define void @test_pos4_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
215 entry:
216 %cmp3 = icmp sle i32 %a, 9920
217 br i1 %cmp3, label %for.body.lr.ph, label %for.end
218
219 for.body.lr.ph: ; preds = %entry
220 br label %for.body
221
222 for.body: ; preds = %for.body.lr.ph, %for.body
223 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
224 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
225 %0 = load i8* %arrayidx, align 1
226 %conv = zext i8 %0 to i32
227 %add = add nsw i32 %conv, 1
228 %conv1 = trunc i32 %add to i8
229 store i8 %conv1, i8* %arrayidx, align 1
230 %inc = add nsw i32 %i.04, 4
231 %cmp = icmp sle i32 %inc, 9920
232 br i1 %cmp, label %for.body, label %for.end
233
234 for.end: ; preds = %for.body, %entry
235 ret void
236 }
237
238
239
240 ; CHECK: test_pos8_ri_sle
241 ; CHECK: bdnz
242 ; a < b
243 define void @test_pos8_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
244 entry:
245 %cmp3 = icmp sle i32 %a, 18924
246 br i1 %cmp3, label %for.body.lr.ph, label %for.end
247
248 for.body.lr.ph: ; preds = %entry
249 br label %for.body
250
251 for.body: ; preds = %for.body.lr.ph, %for.body
252 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
253 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
254 %0 = load i8* %arrayidx, align 1
255 %conv = zext i8 %0 to i32
256 %add = add nsw i32 %conv, 1
257 %conv1 = trunc i32 %add to i8
258 store i8 %conv1, i8* %arrayidx, align 1
259 %inc = add nsw i32 %i.04, 8
260 %cmp = icmp sle i32 %inc, 18924
261 br i1 %cmp, label %for.body, label %for.end
262
263 for.end: ; preds = %for.body, %entry
264 ret void
265 }
266
267
268
269 ; CHECK: test_pos16_ri_sle
270 ; CHECK: bdnz
271 ; a < b
272 define void @test_pos16_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
273 entry:
274 %cmp3 = icmp sle i32 %a, 11812
275 br i1 %cmp3, label %for.body.lr.ph, label %for.end
276
277 for.body.lr.ph: ; preds = %entry
278 br label %for.body
279
280 for.body: ; preds = %for.body.lr.ph, %for.body
281 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
282 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
283 %0 = load i8* %arrayidx, align 1
284 %conv = zext i8 %0 to i32
285 %add = add nsw i32 %conv, 1
286 %conv1 = trunc i32 %add to i8
287 store i8 %conv1, i8* %arrayidx, align 1
288 %inc = add nsw i32 %i.04, 16
289 %cmp = icmp sle i32 %inc, 11812
290 br i1 %cmp, label %for.body, label %for.end
291
292 for.end: ; preds = %for.body, %entry
293 ret void
294 }
295
296
297
298 ; CHECK: test_pos1_rr_sle
299 ; FIXME: Support this loop!
300 ; CHECK-NOT: bdnz
301 ; a < b
302 define void @test_pos1_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
303 entry:
304 %cmp3 = icmp sle i32 %a, %b
305 br i1 %cmp3, label %for.body.lr.ph, label %for.end
306
307 for.body.lr.ph: ; preds = %entry
308 br label %for.body
309
310 for.body: ; preds = %for.body.lr.ph, %for.body
311 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
312 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
313 %0 = load i8* %arrayidx, align 1
314 %conv = zext i8 %0 to i32
315 %add = add nsw i32 %conv, 1
316 %conv1 = trunc i32 %add to i8
317 store i8 %conv1, i8* %arrayidx, align 1
318 %inc = add nsw i32 %i.04, 1
319 %cmp = icmp sle i32 %inc, %b
320 br i1 %cmp, label %for.body, label %for.end
321
322 for.end: ; preds = %for.body, %entry
323 ret void
324 }
325
326
327
328 ; CHECK: test_pos2_rr_sle
329 ; FIXME: Support this loop!
330 ; CHECK-NOT: bdnz
331 ; a < b
332 define void @test_pos2_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
333 entry:
334 %cmp3 = icmp sle i32 %a, %b
335 br i1 %cmp3, label %for.body.lr.ph, label %for.end
336
337 for.body.lr.ph: ; preds = %entry
338 br label %for.body
339
340 for.body: ; preds = %for.body.lr.ph, %for.body
341 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
342 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
343 %0 = load i8* %arrayidx, align 1
344 %conv = zext i8 %0 to i32
345 %add = add nsw i32 %conv, 1
346 %conv1 = trunc i32 %add to i8
347 store i8 %conv1, i8* %arrayidx, align 1
348 %inc = add nsw i32 %i.04, 2
349 %cmp = icmp sle i32 %inc, %b
350 br i1 %cmp, label %for.body, label %for.end
351
352 for.end: ; preds = %for.body, %entry
353 ret void
354 }
355
356
357
358 ; CHECK: test_pos4_rr_sle
359 ; FIXME: Support this loop!
360 ; CHECK-NOT: bdnz
361 ; a < b
362 define void @test_pos4_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
363 entry:
364 %cmp3 = icmp sle i32 %a, %b
365 br i1 %cmp3, label %for.body.lr.ph, label %for.end
366
367 for.body.lr.ph: ; preds = %entry
368 br label %for.body
369
370 for.body: ; preds = %for.body.lr.ph, %for.body
371 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
372 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
373 %0 = load i8* %arrayidx, align 1
374 %conv = zext i8 %0 to i32
375 %add = add nsw i32 %conv, 1
376 %conv1 = trunc i32 %add to i8
377 store i8 %conv1, i8* %arrayidx, align 1
378 %inc = add nsw i32 %i.04, 4
379 %cmp = icmp sle i32 %inc, %b
380 br i1 %cmp, label %for.body, label %for.end
381
382 for.end: ; preds = %for.body, %entry
383 ret void
384 }
385
386
387
388 ; CHECK: test_pos8_rr_sle
389 ; FIXME: Support this loop!
390 ; CHECK-NOT: bdnz
391 ; a < b
392 define void @test_pos8_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
393 entry:
394 %cmp3 = icmp sle i32 %a, %b
395 br i1 %cmp3, label %for.body.lr.ph, label %for.end
396
397 for.body.lr.ph: ; preds = %entry
398 br label %for.body
399
400 for.body: ; preds = %for.body.lr.ph, %for.body
401 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
402 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
403 %0 = load i8* %arrayidx, align 1
404 %conv = zext i8 %0 to i32
405 %add = add nsw i32 %conv, 1
406 %conv1 = trunc i32 %add to i8
407 store i8 %conv1, i8* %arrayidx, align 1
408 %inc = add nsw i32 %i.04, 8
409 %cmp = icmp sle i32 %inc, %b
410 br i1 %cmp, label %for.body, label %for.end
411
412 for.end: ; preds = %for.body, %entry
413 ret void
414 }
415
416
417
418 ; CHECK: test_pos16_rr_sle
419 ; FIXME: Support this loop!
420 ; CHECK-NOT: bdnz
421 ; a < b
422 define void @test_pos16_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
423 entry:
424 %cmp3 = icmp sle i32 %a, %b
425 br i1 %cmp3, label %for.body.lr.ph, label %for.end
426
427 for.body.lr.ph: ; preds = %entry
428 br label %for.body
429
430 for.body: ; preds = %for.body.lr.ph, %for.body
431 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
432 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
433 %0 = load i8* %arrayidx, align 1
434 %conv = zext i8 %0 to i32
435 %add = add nsw i32 %conv, 1
436 %conv1 = trunc i32 %add to i8
437 store i8 %conv1, i8* %arrayidx, align 1
438 %inc = add nsw i32 %i.04, 16
439 %cmp = icmp sle i32 %inc, %b
440 br i1 %cmp, label %for.body, label %for.end
441
442 for.end: ; preds = %for.body, %entry
443 ret void
444 }
445
0 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
1 target triple = "powerpc64-unknown-linux-gnu"
2 ; RUN: llc < %s -march=ppc64 | FileCheck %s
3
4 ; CHECK: test_pos1_ir_slt
5 ; CHECK: bdnz
6 ; a < b
7 define void @test_pos1_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
8 entry:
9 %cmp3 = icmp slt i32 8531, %b
10 br i1 %cmp3, label %for.body.lr.ph, label %for.end
11
12 for.body.lr.ph: ; preds = %entry
13 br label %for.body
14
15 for.body: ; preds = %for.body.lr.ph, %for.body
16 %i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ]
17 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
18 %0 = load i8* %arrayidx, align 1
19 %conv = zext i8 %0 to i32
20 %add = add nsw i32 %conv, 1
21 %conv1 = trunc i32 %add to i8
22 store i8 %conv1, i8* %arrayidx, align 1
23 %inc = add nsw i32 %i.04, 1
24 %cmp = icmp slt i32 %inc, %b
25 br i1 %cmp, label %for.body, label %for.end
26
27 for.end: ; preds = %for.body, %entry
28 ret void
29 }
30
31
32
33 ; CHECK: test_pos2_ir_slt
34 ; FIXME: Support this loop!
35 ; CHECK-NOT: bdnz
36 ; a < b
37 define void @test_pos2_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
38 entry:
39 %cmp3 = icmp slt i32 9152, %b
40 br i1 %cmp3, label %for.body.lr.ph, label %for.end
41
42 for.body.lr.ph: ; preds = %entry
43 br label %for.body
44
45 for.body: ; preds = %for.body.lr.ph, %for.body
46 %i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ]
47 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
48 %0 = load i8* %arrayidx, align 1
49 %conv = zext i8 %0 to i32
50 %add = add nsw i32 %conv, 1
51 %conv1 = trunc i32 %add to i8
52 store i8 %conv1, i8* %arrayidx, align 1
53 %inc = add nsw i32 %i.04, 2
54 %cmp = icmp slt i32 %inc, %b
55 br i1 %cmp, label %for.body, label %for.end
56
57 for.end: ; preds = %for.body, %entry
58 ret void
59 }
60
61
62
63 ; CHECK: test_pos4_ir_slt
64 ; FIXME: Support this loop!
65 ; CHECK-NOT: bdnz
66 ; a < b
67 define void @test_pos4_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
68 entry:
69 %cmp3 = icmp slt i32 18851, %b
70 br i1 %cmp3, label %for.body.lr.ph, label %for.end
71
72 for.body.lr.ph: ; preds = %entry
73 br label %for.body
74
75 for.body: ; preds = %for.body.lr.ph, %for.body
76 %i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ]
77 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
78 %0 = load i8* %arrayidx, align 1
79 %conv = zext i8 %0 to i32
80 %add = add nsw i32 %conv, 1
81 %conv1 = trunc i32 %add to i8
82 store i8 %conv1, i8* %arrayidx, align 1
83 %inc = add nsw i32 %i.04, 4
84 %cmp = icmp slt i32 %inc, %b
85 br i1 %cmp, label %for.body, label %for.end
86
87 for.end: ; preds = %for.body, %entry
88 ret void
89 }
90
91
92
93 ; CHECK: test_pos8_ir_slt
94 ; FIXME: Support this loop!
95 ; CHECK-NOT: bdnz
96 ; a < b
97 define void @test_pos8_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
98 entry:
99 %cmp3 = icmp slt i32 25466, %b
100 br i1 %cmp3, label %for.body.lr.ph, label %for.end
101
102 for.body.lr.ph: ; preds = %entry
103 br label %for.body
104
105 for.body: ; preds = %for.body.lr.ph, %for.body
106 %i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ]
107 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
108 %0 = load i8* %arrayidx, align 1
109 %conv = zext i8 %0 to i32
110 %add = add nsw i32 %conv, 1
111 %conv1 = trunc i32 %add to i8
112 store i8 %conv1, i8* %arrayidx, align 1
113 %inc = add nsw i32 %i.04, 8
114 %cmp = icmp slt i32 %inc, %b
115 br i1 %cmp, label %for.body, label %for.end
116
117 for.end: ; preds = %for.body, %entry
118 ret void
119 }
120
121
122
123 ; CHECK: test_pos16_ir_slt
124 ; FIXME: Support this loop!
125 ; CHECK-NOT: bdnz
126 ; a < b
127 define void @test_pos16_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
128 entry:
129 %cmp3 = icmp slt i32 9295, %b
130 br i1 %cmp3, label %for.body.lr.ph, label %for.end
131
132 for.body.lr.ph: ; preds = %entry
133 br label %for.body
134
135 for.body: ; preds = %for.body.lr.ph, %for.body
136 %i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ]
137 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
138 %0 = load i8* %arrayidx, align 1
139 %conv = zext i8 %0 to i32
140 %add = add nsw i32 %conv, 1
141 %conv1 = trunc i32 %add to i8
142 store i8 %conv1, i8* %arrayidx, align 1
143 %inc = add nsw i32 %i.04, 16
144 %cmp = icmp slt i32 %inc, %b
145 br i1 %cmp, label %for.body, label %for.end
146
147 for.end: ; preds = %for.body, %entry
148 ret void
149 }
150
151
152
153 ; CHECK: test_pos1_ri_slt
154 ; CHECK: bdnz
155 ; a < b
156 define void @test_pos1_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
157 entry:
158 %cmp3 = icmp slt i32 %a, 31236
159 br i1 %cmp3, label %for.body.lr.ph, label %for.end
160
161 for.body.lr.ph: ; preds = %entry
162 br label %for.body
163
164 for.body: ; preds = %for.body.lr.ph, %for.body
165 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
166 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
167 %0 = load i8* %arrayidx, align 1
168 %conv = zext i8 %0 to i32
169 %add = add nsw i32 %conv, 1
170 %conv1 = trunc i32 %add to i8
171 store i8 %conv1, i8* %arrayidx, align 1
172 %inc = add nsw i32 %i.04, 1
173 %cmp = icmp slt i32 %inc, 31236
174 br i1 %cmp, label %for.body, label %for.end
175
176 for.end: ; preds = %for.body, %entry
177 ret void
178 }
179
180
181
182 ; CHECK: test_pos2_ri_slt
183 ; CHECK: bdnz
184 ; a < b
185 define void @test_pos2_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
186 entry:
187 %cmp3 = icmp slt i32 %a, 22653
188 br i1 %cmp3, label %for.body.lr.ph, label %for.end
189
190 for.body.lr.ph: ; preds = %entry
191 br label %for.body
192
193 for.body: ; preds = %for.body.lr.ph, %for.body
194 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
195 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
196 %0 = load i8* %arrayidx, align 1
197 %conv = zext i8 %0 to i32
198 %add = add nsw i32 %conv, 1
199 %conv1 = trunc i32 %add to i8
200 store i8 %conv1, i8* %arrayidx, align 1
201 %inc = add nsw i32 %i.04, 2
202 %cmp = icmp slt i32 %inc, 22653
203 br i1 %cmp, label %for.body, label %for.end
204
205 for.end: ; preds = %for.body, %entry
206 ret void
207 }
208
209
210
211 ; CHECK: test_pos4_ri_slt
212 ; CHECK: bdnz
213 ; a < b
214 define void @test_pos4_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
215 entry:
216 %cmp3 = icmp slt i32 %a, 1431
217 br i1 %cmp3, label %for.body.lr.ph, label %for.end
218
219 for.body.lr.ph: ; preds = %entry
220 br label %for.body
221
222 for.body: ; preds = %for.body.lr.ph, %for.body
223 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
224 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
225 %0 = load i8* %arrayidx, align 1
226 %conv = zext i8 %0 to i32
227 %add = add nsw i32 %conv, 1
228 %conv1 = trunc i32 %add to i8
229 store i8 %conv1, i8* %arrayidx, align 1
230 %inc = add nsw i32 %i.04, 4
231 %cmp = icmp slt i32 %inc, 1431
232 br i1 %cmp, label %for.body, label %for.end
233
234 for.end: ; preds = %for.body, %entry
235 ret void
236 }
237
238
239
240 ; CHECK: test_pos8_ri_slt
241 ; CHECK: bdnz
242 ; a < b
243 define void @test_pos8_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
244 entry:
245 %cmp3 = icmp slt i32 %a, 22403
246 br i1 %cmp3, label %for.body.lr.ph, label %for.end
247
248 for.body.lr.ph: ; preds = %entry
249 br label %for.body
250
251 for.body: ; preds = %for.body.lr.ph, %for.body
252 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
253 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
254 %0 = load i8* %arrayidx, align 1
255 %conv = zext i8 %0 to i32
256 %add = add nsw i32 %conv, 1
257 %conv1 = trunc i32 %add to i8
258 store i8 %conv1, i8* %arrayidx, align 1
259 %inc = add nsw i32 %i.04, 8
260 %cmp = icmp slt i32 %inc, 22403
261 br i1 %cmp, label %for.body, label %for.end
262
263 for.end: ; preds = %for.body, %entry
264 ret void
265 }
266
267
268
269 ; CHECK: test_pos16_ri_slt
270 ; CHECK: bdnz
271 ; a < b
272 define void @test_pos16_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
273 entry:
274 %cmp3 = icmp slt i32 %a, 21715
275 br i1 %cmp3, label %for.body.lr.ph, label %for.end
276
277 for.body.lr.ph: ; preds = %entry
278 br label %for.body
279
280 for.body: ; preds = %for.body.lr.ph, %for.body
281 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
282 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
283 %0 = load i8* %arrayidx, align 1
284 %conv = zext i8 %0 to i32
285 %add = add nsw i32 %conv, 1
286 %conv1 = trunc i32 %add to i8
287 store i8 %conv1, i8* %arrayidx, align 1
288 %inc = add nsw i32 %i.04, 16
289 %cmp = icmp slt i32 %inc, 21715
290 br i1 %cmp, label %for.body, label %for.end
291
292 for.end: ; preds = %for.body, %entry
293 ret void
294 }
295
296
297
298 ; CHECK: test_pos1_rr_slt
299 ; CHECK: bdnz
300 ; a < b
301 define void @test_pos1_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
302 entry:
303 %cmp3 = icmp slt i32 %a, %b
304 br i1 %cmp3, label %for.body.lr.ph, label %for.end
305
306 for.body.lr.ph: ; preds = %entry
307 br label %for.body
308
309 for.body: ; preds = %for.body.lr.ph, %for.body
310 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
311 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
312 %0 = load i8* %arrayidx, align 1
313 %conv = zext i8 %0 to i32
314 %add = add nsw i32 %conv, 1
315 %conv1 = trunc i32 %add to i8
316 store i8 %conv1, i8* %arrayidx, align 1
317 %inc = add nsw i32 %i.04, 1
318 %cmp = icmp slt i32 %inc, %b
319 br i1 %cmp, label %for.body, label %for.end
320
321 for.end: ; preds = %for.body, %entry
322 ret void
323 }
324
325
326
327 ; CHECK: test_pos2_rr_slt
328 ; FIXME: Support this loop!
329 ; CHECK-NOT: bdnz
330 ; a < b
331 define void @test_pos2_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
332 entry:
333 %cmp3 = icmp slt i32 %a, %b
334 br i1 %cmp3, label %for.body.lr.ph, label %for.end
335
336 for.body.lr.ph: ; preds = %entry
337 br label %for.body
338
339 for.body: ; preds = %for.body.lr.ph, %for.body
340 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
341 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
342 %0 = load i8* %arrayidx, align 1
343 %conv = zext i8 %0 to i32
344 %add = add nsw i32 %conv, 1
345 %conv1 = trunc i32 %add to i8
346 store i8 %conv1, i8* %arrayidx, align 1
347 %inc = add nsw i32 %i.04, 2
348 %cmp = icmp slt i32 %inc, %b
349 br i1 %cmp, label %for.body, label %for.end
350
351 for.end: ; preds = %for.body, %entry
352 ret void
353 }
354
355
356
357 ; CHECK: test_pos4_rr_slt
358 ; FIXME: Support this loop!
359 ; CHECK-NOT: bdnz
360 ; a < b
361 define void @test_pos4_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
362 entry:
363 %cmp3 = icmp slt i32 %a, %b
364 br i1 %cmp3, label %for.body.lr.ph, label %for.end
365
366 for.body.lr.ph: ; preds = %entry
367 br label %for.body
368
369 for.body: ; preds = %for.body.lr.ph, %for.body
370 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
371 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
372 %0 = load i8* %arrayidx, align 1
373 %conv = zext i8 %0 to i32
374 %add = add nsw i32 %conv, 1
375 %conv1 = trunc i32 %add to i8
376 store i8 %conv1, i8* %arrayidx, align 1
377 %inc = add nsw i32 %i.04, 4
378 %cmp = icmp slt i32 %inc, %b
379 br i1 %cmp, label %for.body, label %for.end
380
381 for.end: ; preds = %for.body, %entry
382 ret void
383 }
384
385
386
387 ; CHECK: test_pos8_rr_slt
388 ; FIXME: Support this loop!
389 ; CHECK-NOT: bdnz
390 ; a < b
391 define void @test_pos8_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
392 entry:
393 %cmp3 = icmp slt i32 %a, %b
394 br i1 %cmp3, label %for.body.lr.ph, label %for.end
395
396 for.body.lr.ph: ; preds = %entry
397 br label %for.body
398
399 for.body: ; preds = %for.body.lr.ph, %for.body
400 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
401 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
402 %0 = load i8* %arrayidx, align 1
403 %conv = zext i8 %0 to i32
404 %add = add nsw i32 %conv, 1
405 %conv1 = trunc i32 %add to i8
406 store i8 %conv1, i8* %arrayidx, align 1
407 %inc = add nsw i32 %i.04, 8
408 %cmp = icmp slt i32 %inc, %b
409 br i1 %cmp, label %for.body, label %for.end
410
411 for.end: ; preds = %for.body, %entry
412 ret void
413 }
414
415
416
417 ; CHECK: test_pos16_rr_slt
418 ; FIXME: Support this loop!
419 ; CHECK-NOT: bdnz
420 ; a < b
421 define void @test_pos16_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
422 entry:
423 %cmp3 = icmp slt i32 %a, %b
424 br i1 %cmp3, label %for.body.lr.ph, label %for.end
425
426 for.body.lr.ph: ; preds = %entry
427 br label %for.body
428
429 for.body: ; preds = %for.body.lr.ph, %for.body
430 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
431 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
432 %0 = load i8* %arrayidx, align 1
433 %conv = zext i8 %0 to i32
434 %add = add nsw i32 %conv, 1
435 %conv1 = trunc i32 %add to i8
436 store i8 %conv1, i8* %arrayidx, align 1
437 %inc = add nsw i32 %i.04, 16
438 %cmp = icmp slt i32 %inc, %b
439 br i1 %cmp, label %for.body, label %for.end
440
441 for.end: ; preds = %for.body, %entry
442 ret void
443 }
444
0 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
1 target triple = "powerpc64-unknown-linux-gnu"
2 ; RUN: llc < %s -march=ppc64 | FileCheck %s
3
4 ; CHECK: test_pos1_ir_ne
5 ; CHECK: bdnz
6 ; a < b
7 define void @test_pos1_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
8 entry:
9 %cmp3 = icmp slt i32 32623, %b
10 br i1 %cmp3, label %for.body.lr.ph, label %for.end
11
12 for.body.lr.ph: ; preds = %entry
13 br label %for.body
14
15 for.body: ; preds = %for.body.lr.ph, %for.body
16 %i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
17 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
18 %0 = load i8* %arrayidx, align 1
19 %conv = zext i8 %0 to i32
20 %add = add nsw i32 %conv, 1
21 %conv1 = trunc i32 %add to i8
22 store i8 %conv1, i8* %arrayidx, align 1
23 %inc = add nsw i32 %i.04, 1
24 %cmp = icmp ne i32 %inc, %b
25 br i1 %cmp, label %for.body, label %for.end
26
27 for.end: ; preds = %for.body, %entry
28 ret void
29 }
30
31
32
33 ; CHECK: test_pos2_ir_ne
34 ; FIXME: Support this loop!
35 ; CHECK-NOT: bdnz
36 ; a < b
37 define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
38 entry:
39 %cmp3 = icmp slt i32 29554, %b
40 br i1 %cmp3, label %for.body.lr.ph, label %for.end
41
42 for.body.lr.ph: ; preds = %entry
43 br label %for.body
44
45 for.body: ; preds = %for.body.lr.ph, %for.body
46 %i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
47 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
48 %0 = load i8* %arrayidx, align 1
49 %conv = zext i8 %0 to i32
50 %add = add nsw i32 %conv, 1
51 %conv1 = trunc i32 %add to i8
52 store i8 %conv1, i8* %arrayidx, align 1
53 %inc = add nsw i32 %i.04, 2
54 %cmp = icmp ne i32 %inc, %b
55 br i1 %cmp, label %for.body, label %for.end
56
57 for.end: ; preds = %for.body, %entry
58 ret void
59 }
60
61
62
63 ; CHECK: test_pos4_ir_ne
64 ; FIXME: Support this loop!
65 ; CHECK-NOT: bdnz
66 ; a < b
67 define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
68 entry:
69 %cmp3 = icmp slt i32 15692, %b
70 br i1 %cmp3, label %for.body.lr.ph, label %for.end
71
72 for.body.lr.ph: ; preds = %entry
73 br label %for.body
74
75 for.body: ; preds = %for.body.lr.ph, %for.body
76 %i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
77 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
78 %0 = load i8* %arrayidx, align 1
79 %conv = zext i8 %0 to i32
80 %add = add nsw i32 %conv, 1
81 %conv1 = trunc i32 %add to i8
82 store i8 %conv1, i8* %arrayidx, align 1
83 %inc = add nsw i32 %i.04, 4
84 %cmp = icmp ne i32 %inc, %b
85 br i1 %cmp, label %for.body, label %for.end
86
87 for.end: ; preds = %for.body, %entry
88 ret void
89 }
90
91
92
93 ; CHECK: test_pos8_ir_ne
94 ; FIXME: Support this loop!
95 ; CHECK-NOT: bdnz
96 ; a < b
97 define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
98 entry:
99 %cmp3 = icmp slt i32 10449, %b
100 br i1 %cmp3, label %for.body.lr.ph, label %for.end
101
102 for.body.lr.ph: ; preds = %entry
103 br label %for.body
104
105 for.body: ; preds = %for.body.lr.ph, %for.body
106 %i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
107 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
108 %0 = load i8* %arrayidx, align 1
109 %conv = zext i8 %0 to i32
110 %add = add nsw i32 %conv, 1
111 %conv1 = trunc i32 %add to i8
112 store i8 %conv1, i8* %arrayidx, align 1
113 %inc = add nsw i32 %i.04, 8
114 %cmp = icmp ne i32 %inc, %b
115 br i1 %cmp, label %for.body, label %for.end
116
117 for.end: ; preds = %for.body, %entry
118 ret void
119 }
120
121
122
123 ; CHECK: test_pos16_ir_ne
124 ; FIXME: Support this loop!
125 ; CHECK-NOT: bdnz
126 ; a < b
127 define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
128 entry:
129 %cmp3 = icmp slt i32 32087, %b
130 br i1 %cmp3, label %for.body.lr.ph, label %for.end
131
132 for.body.lr.ph: ; preds = %entry
133 br label %for.body
134
135 for.body: ; preds = %for.body.lr.ph, %for.body
136 %i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
137 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
138 %0 = load i8* %arrayidx, align 1
139 %conv = zext i8 %0 to i32
140 %add = add nsw i32 %conv, 1
141 %conv1 = trunc i32 %add to i8
142 store i8 %conv1, i8* %arrayidx, align 1
143 %inc = add nsw i32 %i.04, 16
144 %cmp = icmp ne i32 %inc, %b
145 br i1 %cmp, label %for.body, label %for.end
146
147 for.end: ; preds = %for.body, %entry
148 ret void
149 }
150
151
152
153 ; CHECK: test_pos1_ri_ne
154 ; CHECK: bdnz
155 ; a < b
156 define void @test_pos1_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
157 entry:
158 %cmp3 = icmp slt i32 %a, 3472
159 br i1 %cmp3, label %for.body.lr.ph, label %for.end
160
161 for.body.lr.ph: ; preds = %entry
162 br label %for.body
163
164 for.body: ; preds = %for.body.lr.ph, %for.body
165 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
166 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
167 %0 = load i8* %arrayidx, align 1
168 %conv = zext i8 %0 to i32
169 %add = add nsw i32 %conv, 1
170 %conv1 = trunc i32 %add to i8
171 store i8 %conv1, i8* %arrayidx, align 1
172 %inc = add nsw i32 %i.04, 1
173 %cmp = icmp ne i32 %inc, 3472
174 br i1 %cmp, label %for.body, label %for.end
175
176 for.end: ; preds = %for.body, %entry
177 ret void
178 }
179
180
181
182 ; CHECK: test_pos2_ri_ne
183 ; FIXME: Support this loop!
184 ; CHECK-NOT: bdnz
185 ; a < b
186 define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
187 entry:
188 %cmp3 = icmp slt i32 %a, 8730
189 br i1 %cmp3, label %for.body.lr.ph, label %for.end
190
191 for.body.lr.ph: ; preds = %entry
192 br label %for.body
193
194 for.body: ; preds = %for.body.lr.ph, %for.body
195 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
196 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
197 %0 = load i8* %arrayidx, align 1
198 %conv = zext i8 %0 to i32
199 %add = add nsw i32 %conv, 1
200 %conv1 = trunc i32 %add to i8
201 store i8 %conv1, i8* %arrayidx, align 1
202 %inc = add nsw i32 %i.04, 2
203 %cmp = icmp ne i32 %inc, 8730
204 br i1 %cmp, label %for.body, label %for.end
205
206 for.end: ; preds = %for.body, %entry
207 ret void
208 }
209
210
211
212 ; CHECK: test_pos4_ri_ne
213 ; FIXME: Support this loop!
214 ; CHECK-NOT: bdnz
215 ; a < b
216 define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
217 entry:
218 %cmp3 = icmp slt i32 %a, 1493
219 br i1 %cmp3, label %for.body.lr.ph, label %for.end
220
221 for.body.lr.ph: ; preds = %entry
222 br label %for.body
223
224 for.body: ; preds = %for.body.lr.ph, %for.body
225 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
226 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
227 %0 = load i8* %arrayidx, align 1
228 %conv = zext i8 %0 to i32
229 %add = add nsw i32 %conv, 1
230 %conv1 = trunc i32 %add to i8
231 store i8 %conv1, i8* %arrayidx, align 1
232 %inc = add nsw i32 %i.04, 4
233 %cmp = icmp ne i32 %inc, 1493
234 br i1 %cmp, label %for.body, label %for.end
235
236 for.end: ; preds = %for.body, %entry
237 ret void
238 }
239
240
241
242 ; CHECK: test_pos8_ri_ne
243 ; FIXME: Support this loop!
244 ; CHECK-NOT: bdnz
245 ; a < b
246 define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
247 entry:
248 %cmp3 = icmp slt i32 %a, 1706
249 br i1 %cmp3, label %for.body.lr.ph, label %for.end
250
251 for.body.lr.ph: ; preds = %entry
252 br label %for.body
253
254 for.body: ; preds = %for.body.lr.ph, %for.body
255 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
256 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
257 %0 = load i8* %arrayidx, align 1
258 %conv = zext i8 %0 to i32
259 %add = add nsw i32 %conv, 1
260 %conv1 = trunc i32 %add to i8
261 store i8 %conv1, i8* %arrayidx, align 1
262 %inc = add nsw i32 %i.04, 8
263 %cmp = icmp ne i32 %inc, 1706
264 br i1 %cmp, label %for.body, label %for.end
265
266 for.end: ; preds = %for.body, %entry
267 ret void
268 }
269
270
271
272 ; CHECK: test_pos16_ri_ne
273 ; FIXME: Support this loop!
274 ; CHECK-NOT: bdnz
275 ; a < b
276 define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
277 entry:
278 %cmp3 = icmp slt i32 %a, 1886
279 br i1 %cmp3, label %for.body.lr.ph, label %for.end
280
281 for.body.lr.ph: ; preds = %entry
282 br label %for.body
283
284 for.body: ; preds = %for.body.lr.ph, %for.body
285 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
286 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
287 %0 = load i8* %arrayidx, align 1
288 %conv = zext i8 %0 to i32
289 %add = add nsw i32 %conv, 1
290 %conv1 = trunc i32 %add to i8
291 store i8 %conv1, i8* %arrayidx, align 1
292 %inc = add nsw i32 %i.04, 16
293 %cmp = icmp ne i32 %inc, 1886
294 br i1 %cmp, label %for.body, label %for.end
295
296 for.end: ; preds = %for.body, %entry
297 ret void
298 }
299
300
301
302 ; CHECK: test_pos1_rr_ne
303 ; CHECK: bdnz
304 ; a < b
305 define void @test_pos1_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
306 entry:
307 %cmp3 = icmp slt i32 %a, %b
308 br i1 %cmp3, label %for.body.lr.ph, label %for.end
309
310 for.body.lr.ph: ; preds = %entry
311 br label %for.body
312
313 for.body: ; preds = %for.body.lr.ph, %for.body
314 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
315 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
316 %0 = load i8* %arrayidx, align 1
317 %conv = zext i8 %0 to i32
318 %add = add nsw i32 %conv, 1
319 %conv1 = trunc i32 %add to i8
320 store i8 %conv1, i8* %arrayidx, align 1
321 %inc = add nsw i32 %i.04, 1
322 %cmp = icmp ne i32 %inc, %b
323 br i1 %cmp, label %for.body, label %for.end
324
325 for.end: ; preds = %for.body, %entry
326 ret void
327 }
328
329
330
331 ; CHECK: test_pos2_rr_ne
332 ; FIXME: Support this loop!
333 ; CHECK-NOT: bdnz
334 ; a < b
335 define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
336 entry:
337 %cmp3 = icmp slt i32 %a, %b
338 br i1 %cmp3, label %for.body.lr.ph, label %for.end
339
340 for.body.lr.ph: ; preds = %entry
341 br label %for.body
342
343 for.body: ; preds = %for.body.lr.ph, %for.body
344 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
345 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
346 %0 = load i8* %arrayidx, align 1
347 %conv = zext i8 %0 to i32
348 %add = add nsw i32 %conv, 1
349 %conv1 = trunc i32 %add to i8
350 store i8 %conv1, i8* %arrayidx, align 1
351 %inc = add nsw i32 %i.04, 2
352 %cmp = icmp ne i32 %inc, %b
353 br i1 %cmp, label %for.body, label %for.end
354
355 for.end: ; preds = %for.body, %entry
356 ret void
357 }
358
359
360
361 ; CHECK: test_pos4_rr_ne
362 ; FIXME: Support this loop!
363 ; CHECK-NOT: bdnz
364 ; a < b
365 define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
366 entry:
367 %cmp3 = icmp slt i32 %a, %b
368 br i1 %cmp3, label %for.body.lr.ph, label %for.end
369
370 for.body.lr.ph: ; preds = %entry
371 br label %for.body
372
373 for.body: ; preds = %for.body.lr.ph, %for.body
374 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
375 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
376 %0 = load i8* %arrayidx, align 1
377 %conv = zext i8 %0 to i32
378 %add = add nsw i32 %conv, 1
379 %conv1 = trunc i32 %add to i8
380 store i8 %conv1, i8* %arrayidx, align 1
381 %inc = add nsw i32 %i.04, 4
382 %cmp = icmp ne i32 %inc, %b
383 br i1 %cmp, label %for.body, label %for.end
384
385 for.end: ; preds = %for.body, %entry
386 ret void
387 }
388
389
390
391 ; CHECK: test_pos8_rr_ne
392 ; FIXME: Support this loop!
393 ; CHECK-NOT: bdnz
394 ; a < b
395 define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
396 entry:
397 %cmp3 = icmp slt i32 %a, %b
398 br i1 %cmp3, label %for.body.lr.ph, label %for.end
399
400 for.body.lr.ph: ; preds = %entry
401 br label %for.body
402
403 for.body: ; preds = %for.body.lr.ph, %for.body
404 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
405 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
406 %0 = load i8* %arrayidx, align 1
407 %conv = zext i8 %0 to i32
408 %add = add nsw i32 %conv, 1
409 %conv1 = trunc i32 %add to i8
410 store i8 %conv1, i8* %arrayidx, align 1
411 %inc = add nsw i32 %i.04, 8
412 %cmp = icmp ne i32 %inc, %b
413 br i1 %cmp, label %for.body, label %for.end
414
415 for.end: ; preds = %for.body, %entry
416 ret void
417 }
418
419
420
421 ; CHECK: test_pos16_rr_ne
422 ; FIXME: Support this loop!
423 ; CHECK-NOT: bdnz
424 ; a < b
425 define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
426 entry:
427 %cmp3 = icmp slt i32 %a, %b
428 br i1 %cmp3, label %for.body.lr.ph, label %for.end
429
430 for.body.lr.ph: ; preds = %entry
431 br label %for.body
432
433 for.body: ; preds = %for.body.lr.ph, %for.body
434 %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
435 %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
436 %0 = load i8* %arrayidx, align 1
437 %conv = zext i8 %0 to i32
438 %add = add nsw i32 %conv, 1
439 %conv1 = trunc i32 %add to i8
440 store i8 %conv1, i8* %arrayidx, align 1
441 %inc = add nsw i32 %i.04, 16
442 %cmp = icmp ne i32 %inc, %b
443 br i1 %cmp, label %for.body, label %for.end
444
445 for.end: ; preds = %for.body, %entry
446 ret void
447 }
448
2121 ; CHECK: @test1
2222 ; CHECK-NOT: or 3, 3, 3
2323 ; CHECK: mtctr
24 ; CHECK-NOT: addi
24 ; CHECK-NOT: addi {[0-9]+}
2525 ; CHECK-NOT: cmplwi
2626 ; CHECK: bdnz
2727 }
4444 ret void
4545 ; CHECK: @test2
4646 ; CHECK: mtctr
47 ; CHECK-NOT: addi
47 ; CHECK-NOT: addi {[0-9]+}
4848 ; CHECK-NOT: cmplwi
4949 ; CHECK: bdnz
5050 }
6868 ret void
6969 ; CHECK: @test3
7070 ; CHECK: mtctr
71 ; CHECK-NOT: addi
71 ; CHECK-NOT: addi {[0-9]+}
7272 ; CHECK-NOT: cmplwi
7373 ; CHECK: bdnz
7474 }
1313 %exitcond = icmp eq i32 %lftr.wideiv, 0
1414 br i1 %exitcond, label %for.end, label %for.body
1515
16 ; FIXME: We currently can't form the 32-bit unsigned trip count necessary here!
1716 ; CHECK: @main
18 ; CHECK-NOT: bdnz
17 ; CHECK: li [[REG:[0-9]+]], 0
18 ; CHECK: oris [[REG2:[0-9]+]], [[REG]], 65535
19 ; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
20 ; CHECK: mtctr [[REG3]]
21 ; CHECK: bdnz
1922
2023 for.end: ; preds = %for.body, %entry
2124 ret void