llvm.org GIT mirror llvm / 2741d2c
Cleanup trip-count finding for PPC CTR loops (and some bug fixes). This cleans up the method used to find trip counts in order to form CTR loops on PPC. This refactoring allows the pass to find loops which have a constant trip count but also happen to end with a comparison to zero. This also adds explicit FIXMEs to mark two different classes of loops that are currently ignored. In addition, we now search through all potential induction operations instead of just the first. Also, we check the predicate code on the conditional branch and abort the transformation if the code is not EQ or NE, and we then make sure that the branch to be transformed matches the condition register defined by the comparison (multiple possible comparisons will be considered). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158607 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 8 years ago
3 changed file(s) with 419 addition(s) and 88 deletion(s). Raw diff Collapse all Expand all
3131 #define DEBUG_TYPE "ctrloops"
3232 #include "PPC.h"
3333 #include "PPCTargetMachine.h"
34 #include "MCTargetDesc/PPCPredicates.h"
3435 #include "llvm/Constants.h"
3536 #include "llvm/PassSupport.h"
3637 #include "llvm/ADT/DenseMap.h"
8182 /// getCanonicalInductionVariable - Check to see if the loop has a canonical
8283 /// induction variable.
8384 /// Should be defined in MachineLoop. Based upon version in class Loop.
84 MachineInstr *getCanonicalInductionVariable(MachineLoop *L,
85 MachineInstr *&IOp) const;
85 void getCanonicalInductionVariable(MachineLoop *L,
86 SmallVector &IVars,
87 SmallVector &IOps) const;
8688
8789 /// getTripCount - Return a loop-invariant LLVM register indicating the
8890 /// number of times the loop will be executed. If the trip-count cannot
8991 /// be determined, this return null.
90 CountValue *getTripCount(MachineLoop *L, bool &WordCmp,
92 CountValue *getTripCount(MachineLoop *L,
9193 SmallVector &OldInsts) const;
9294
9395 /// isInductionOperation - Return true if the instruction matches the
174176
175177 /// isCompareEquals - Returns true if the instruction is a compare equals
176178 /// instruction with an immediate operand.
177 static bool isCompareEqualsImm(const MachineInstr *MI, bool &WordCmp) {
178 if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPLWI) {
179 WordCmp = true;
179 static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
180 if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
181 SignedCmp = true;
180182 return true;
181 } else if (MI->getOpcode() == PPC::CMPDI || MI->getOpcode() == PPC::CMPLDI) {
182 WordCmp = false;
183 } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
184 SignedCmp = false;
183185 return true;
184186 }
185187
226228 /// the machine.
227229 /// This method assumes that the IndVarSimplify pass has been run by 'opt'.
228230 ///
229 MachineInstr
230 *PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L,
231 MachineInstr *&IOp) const {
231 void
232 PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L,
233 SmallVector &IVars,
234 SmallVector &IOps) const {
232235 MachineBasicBlock *TopMBB = L->getTopBlock();
233236 MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
234237 assert(PI != TopMBB->pred_end() &&
235238 "Loop must have more than one incoming edge!");
236239 MachineBasicBlock *Backedge = *PI++;
237 if (PI == TopMBB->pred_end()) return 0; // dead loop
240 if (PI == TopMBB->pred_end()) return; // dead loop
238241 MachineBasicBlock *Incoming = *PI++;
239 if (PI != TopMBB->pred_end()) return 0; // multiple backedges?
242 if (PI != TopMBB->pred_end()) return; // multiple backedges?
240243
241244 // make sure there is one incoming and one backedge and determine which
242245 // is which.
243246 if (L->contains(Incoming)) {
244247 if (L->contains(Backedge))
245 return 0;
248 return;
246249 std::swap(Incoming, Backedge);
247250 } else if (!L->contains(Backedge))
248 return 0;
251 return;
249252
250253 // Loop over all of the PHI nodes, looking for a canonical induction variable:
251254 // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
262265 // Check if the definition is an induction operation.
263266 MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
264267 if (isInductionOperation(DI, DefReg)) {
265 IOp = DI;
266 return MPhi;
268 IOps.push_back(DI);
269 IVars.push_back(MPhi);
267270 }
268271 }
269272 }
270273 }
271 return 0;
274 return;
272275 }
273276
274277 /// getTripCount - Return a loop-invariant LLVM value indicating the
282285 ///
283286 /// Based upon getTripCount in LoopInfo.
284287 ///
285 CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, bool &WordCmp,
288 CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
286289 SmallVector &OldInsts) const {
290 MachineBasicBlock *LastMBB = L->getExitingBlock();
291 // Don't generate a CTR loop if the loop has more than one exit.
292 if (LastMBB == 0)
293 return 0;
294
295 MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
296 if (LastI->getOpcode() != PPC::BCC)
297 return 0;
298
299 // We need to make sure that this compare is defining the condition
300 // register actually used by the terminating branch.
301
302 unsigned PredReg = LastI->getOperand(1).getReg();
303 DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI);
304
305 unsigned PredCond = LastI->getOperand(0).getImm();
306 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
307 return 0;
308
287309 // Check that the loop has a induction variable.
288 MachineInstr *IOp;
289 MachineInstr *IV_Inst = getCanonicalInductionVariable(L, IOp);
290 if (IV_Inst == 0) return 0;
291
292 // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
293 // if Imm is 0, get the count from the PHI opnd
294 // if Imm is -M, than M is the count
295 // Otherwise, Imm is the count
296 MachineOperand *IV_Opnd;
297 const MachineOperand *InitialValue;
298 if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
299 InitialValue = &IV_Inst->getOperand(1);
300 IV_Opnd = &IV_Inst->getOperand(3);
301 } else {
302 InitialValue = &IV_Inst->getOperand(3);
303 IV_Opnd = &IV_Inst->getOperand(1);
304 }
305
306 // Look for the cmp instruction to determine if we
307 // can get a useful trip count. The trip count can
308 // be either a register or an immediate. The location
309 // of the value depends upon the type (reg or imm).
310 while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
311 MachineInstr *MI = IV_Opnd->getParent();
312 if (L->contains(MI) && isCompareEqualsImm(MI, WordCmp)) {
313 OldInsts.push_back(MI);
314 OldInsts.push_back(IOp);
315
316 const MachineOperand &MO = MI->getOperand(2);
317 assert(MO.isImm() && "IV Cmp Operand should be an immediate");
318 int64_t ImmVal = MO.getImm();
319
320 const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
321 assert(L->contains(IV_DefInstr->getParent()) &&
322 "IV definition should occurs in loop");
323 int64_t iv_value = IV_DefInstr->getOperand(2).getImm();
324
325 if (ImmVal == 0) {
326 // Make sure the induction variable changes by one on each iteration.
327 if (iv_value != 1 && iv_value != -1) {
328 return 0;
329 }
330 return new CountValue(InitialValue->getReg(), iv_value > 0);
331 } else {
310 SmallVector IVars, IOps;
311 getCanonicalInductionVariable(L, IVars, IOps);
312 for (unsigned i = 0; i < IVars.size(); ++i) {
313 MachineInstr *IOp = IOps[i];
314 MachineInstr *IV_Inst = IVars[i];
315
316 // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
317 // if Imm is 0, get the count from the PHI opnd
318 // if Imm is -M, than M is the count
319 // Otherwise, Imm is the count
320 MachineOperand *IV_Opnd;
321 const MachineOperand *InitialValue;
322 if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
323 InitialValue = &IV_Inst->getOperand(1);
324 IV_Opnd = &IV_Inst->getOperand(3);
325 } else {
326 InitialValue = &IV_Inst->getOperand(3);
327 IV_Opnd = &IV_Inst->getOperand(1);
328 }
329
330 DEBUG(dbgs() << "Considering:\n");
331 DEBUG(dbgs() << " induction operation: " << *IOp);
332 DEBUG(dbgs() << " induction variable: " << *IV_Inst);
333 DEBUG(dbgs() << " initial value: " << *InitialValue << "\n");
334
335 // Look for the cmp instruction to determine if we
336 // can get a useful trip count. The trip count can
337 // be either a register or an immediate. The location
338 // of the value depends upon the type (reg or imm).
339 while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
340 bool SignedCmp;
341 MachineInstr *MI = IV_Opnd->getParent();
342 if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
343 MI->getOperand(0).getReg() == PredReg) {
344
345 OldInsts.push_back(MI);
346 OldInsts.push_back(IOp);
347
348 DEBUG(dbgs() << " compare: " << *MI);
349
350 const MachineOperand &MO = MI->getOperand(2);
351 assert(MO.isImm() && "IV Cmp Operand should be an immediate");
352
353 int64_t ImmVal;
354 if (SignedCmp)
355 ImmVal = (short) MO.getImm();
356 else
357 ImmVal = MO.getImm();
358
359 const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
360 assert(L->contains(IV_DefInstr->getParent()) &&
361 "IV definition should occurs in loop");
362 int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm();
363
332364 assert(InitialValue->isReg() && "Expecting register for init value");
333 const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg());
334
365 unsigned InitialValueReg = InitialValue->getReg();
366
367 const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
368
335369 // Here we need to look for an immediate load (an li or lis/ori pair).
336370 if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
337371 DefInstr->getOpcode() == PPC::ORI)) {
338 int64_t start = DefInstr->getOperand(2).getImm();
372 int64_t start = (short) DefInstr->getOperand(2).getImm();
339373 const MachineInstr *DefInstr2 =
340374 MRI->getVRegDef(DefInstr->getOperand(0).getReg());
341375 if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
342376 DefInstr2->getOpcode() == PPC::LIS)) {
343 start |= DefInstr2->getOperand(1).getImm() << 16;
344
377 DEBUG(dbgs() << " initial constant: " << *DefInstr);
378 DEBUG(dbgs() << " initial constant: " << *DefInstr2);
379
380 start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16;
381
345382 int64_t count = ImmVal - start;
346383 if ((count % iv_value) != 0) {
347384 return 0;
350387 }
351388 } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
352389 DefInstr->getOpcode() == PPC::LI)) {
353 int64_t count = ImmVal - DefInstr->getOperand(1).getImm();
390 DEBUG(dbgs() << " initial constant: " << *DefInstr);
391
392 int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
354393 if ((count % iv_value) != 0) {
355394 return 0;
356395 }
357396 return new CountValue(count/iv_value);
397 } else if (iv_value == 1 || iv_value == -1) {
398 // We can't determine a constant starting value.
399 if (ImmVal == 0) {
400 return new CountValue(InitialValueReg, iv_value > 0);
401 }
402 // FIXME: handle non-zero end value.
358403 }
404 // FIXME: handle non-unit increments (we might not want to introduce division
405 // but we can handle some 2^n cases with shifts).
406
359407 }
360408 }
361409 }
523571 return Changed;
524572 }
525573
526 bool WordCmp;
527574 SmallVector OldInsts;
528575 // Are we able to determine the trip count for the loop?
529 CountValue *TripCount = getTripCount(L, WordCmp, OldInsts);
576 CountValue *TripCount = getTripCount(L, OldInsts);
530577 if (TripCount == 0) {
531578 DEBUG(dbgs() << "failed to get trip count!\n");
532579 return false;
574621 const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget();
575622 bool isPPC64 = Subtarget.isPPC64();
576623
624 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
625 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
626 const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
627
577628 unsigned CountReg;
578629 if (TripCount->isReg()) {
579630 // Create a copy of the loop count register.
580 const TargetRegisterClass *RC =
631 const TargetRegisterClass *SrcRC =
581632 MF->getRegInfo().getRegClass(TripCount->getReg());
582633 CountReg = MF->getRegInfo().createVirtualRegister(RC);
634 unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ?
635 (unsigned) PPC::EXTSW_32_64 :
636 (unsigned) TargetOpcode::COPY;
583637 BuildMI(*Preheader, InsertPos, dl,
584 TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
638 TII->get(CopyOp), CountReg).addReg(TripCount->getReg());
585639 if (TripCount->isNeg()) {
586640 unsigned CountReg1 = CountReg;
587641 CountReg = MF->getRegInfo().createVirtualRegister(RC);
589643 TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG),
590644 CountReg).addReg(CountReg1);
591645 }
592
593 // On a 64-bit system, if the original comparison was only 32-bit, then
594 // mask out the higher-order part of the count.
595 if (isPPC64 && WordCmp) {
596 unsigned CountReg1 = CountReg;
597 CountReg = MF->getRegInfo().createVirtualRegister(RC);
598 BuildMI(*Preheader, InsertPos, dl,
599 TII->get(PPC::RLDICL), CountReg).addReg(CountReg1
600 ).addImm(0).addImm(32);
601 }
602646 } else {
603647 assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
604648 // Put the trip count in a register for transfer into the count register.
605 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
606 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
607 const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
608649
609650 int64_t CountImm = TripCount->getImm();
610 if (TripCount->isNeg())
611 CountImm = -CountImm;
651 assert(!TripCount->isNeg() && "Constant trip count must be positive");
612652
613653 CountReg = MF->getRegInfo().createVirtualRegister(RC);
614654 if (CountImm > 0xFFFF) {
664704 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget);
665705
666706 // Conditional branch; just delete it.
707 DEBUG(dbgs() << "Removing old branch: " << *LastI);
667708 LastMBB->erase(LastI);
668709
669710 delete TripCount;
0 ; ModuleID = 'tsc_s000.c'
1 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
2 target triple = "powerpc64-unknown-linux-gnu"
3 ; RUN: llc < %s -march=ppc64 | FileCheck %s
4
5 @Y = common global [16000 x double] zeroinitializer, align 32
6 @X = common global [16000 x double] zeroinitializer, align 32
7 @Z = common global [16000 x double] zeroinitializer, align 32
8 @U = common global [16000 x double] zeroinitializer, align 32
9 @V = common global [16000 x double] zeroinitializer, align 32
10 @aa = common global [256 x [256 x double]] zeroinitializer, align 32
11 @bb = common global [256 x [256 x double]] zeroinitializer, align 32
12 @cc = common global [256 x [256 x double]] zeroinitializer, align 32
13 @array = common global [65536 x double] zeroinitializer, align 32
14 @x = common global [16000 x double] zeroinitializer, align 32
15 @temp = common global double 0.000000e+00, align 8
16 @temp_int = common global i32 0, align 4
17 @a = common global [16000 x double] zeroinitializer, align 32
18 @b = common global [16000 x double] zeroinitializer, align 32
19 @c = common global [16000 x double] zeroinitializer, align 32
20 @d = common global [16000 x double] zeroinitializer, align 32
21 @e = common global [16000 x double] zeroinitializer, align 32
22 @tt = common global [256 x [256 x double]] zeroinitializer, align 32
23 @indx = common global [16000 x i32] zeroinitializer, align 32
24 @xx = common global double* null, align 8
25 @yy = common global double* null, align 8
26
27 define i32 @s000() nounwind {
28 entry:
29 br label %for.cond1.preheader
30
31 for.cond1.preheader: ; preds = %for.end, %entry
32 %nl.010 = phi i32 [ 0, %entry ], [ %inc7, %for.end ]
33 br label %for.body3
34
35 for.body3: ; preds = %for.body3, %for.cond1.preheader
36 %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next.15, %for.body3 ]
37 %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
38 %0 = load double* %arrayidx, align 32, !tbaa !0
39 %add = fadd double %0, 1.000000e+00
40 %arrayidx5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
41 store double %add, double* %arrayidx5, align 32, !tbaa !0
42 %indvars.iv.next11 = or i64 %indvars.iv, 1
43 %arrayidx.1 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next11
44 %1 = load double* %arrayidx.1, align 8, !tbaa !0
45 %add.1 = fadd double %1, 1.000000e+00
46 %arrayidx5.1 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next11
47 store double %add.1, double* %arrayidx5.1, align 8, !tbaa !0
48 %indvars.iv.next.112 = or i64 %indvars.iv, 2
49 %arrayidx.2 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.112
50 %2 = load double* %arrayidx.2, align 16, !tbaa !0
51 %add.2 = fadd double %2, 1.000000e+00
52 %arrayidx5.2 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.112
53 store double %add.2, double* %arrayidx5.2, align 16, !tbaa !0
54 %indvars.iv.next.213 = or i64 %indvars.iv, 3
55 %arrayidx.3 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.213
56 %3 = load double* %arrayidx.3, align 8, !tbaa !0
57 %add.3 = fadd double %3, 1.000000e+00
58 %arrayidx5.3 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.213
59 store double %add.3, double* %arrayidx5.3, align 8, !tbaa !0
60 %indvars.iv.next.314 = or i64 %indvars.iv, 4
61 %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.314
62 %4 = load double* %arrayidx.4, align 32, !tbaa !0
63 %add.4 = fadd double %4, 1.000000e+00
64 %arrayidx5.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.314
65 store double %add.4, double* %arrayidx5.4, align 32, !tbaa !0
66 %indvars.iv.next.415 = or i64 %indvars.iv, 5
67 %arrayidx.5 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.415
68 %5 = load double* %arrayidx.5, align 8, !tbaa !0
69 %add.5 = fadd double %5, 1.000000e+00
70 %arrayidx5.5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.415
71 store double %add.5, double* %arrayidx5.5, align 8, !tbaa !0
72 %indvars.iv.next.516 = or i64 %indvars.iv, 6
73 %arrayidx.6 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.516
74 %6 = load double* %arrayidx.6, align 16, !tbaa !0
75 %add.6 = fadd double %6, 1.000000e+00
76 %arrayidx5.6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.516
77 store double %add.6, double* %arrayidx5.6, align 16, !tbaa !0
78 %indvars.iv.next.617 = or i64 %indvars.iv, 7
79 %arrayidx.7 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.617
80 %7 = load double* %arrayidx.7, align 8, !tbaa !0
81 %add.7 = fadd double %7, 1.000000e+00
82 %arrayidx5.7 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.617
83 store double %add.7, double* %arrayidx5.7, align 8, !tbaa !0
84 %indvars.iv.next.718 = or i64 %indvars.iv, 8
85 %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.718
86 %8 = load double* %arrayidx.8, align 32, !tbaa !0
87 %add.8 = fadd double %8, 1.000000e+00
88 %arrayidx5.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.718
89 store double %add.8, double* %arrayidx5.8, align 32, !tbaa !0
90 %indvars.iv.next.819 = or i64 %indvars.iv, 9
91 %arrayidx.9 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.819
92 %9 = load double* %arrayidx.9, align 8, !tbaa !0
93 %add.9 = fadd double %9, 1.000000e+00
94 %arrayidx5.9 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.819
95 store double %add.9, double* %arrayidx5.9, align 8, !tbaa !0
96 %indvars.iv.next.920 = or i64 %indvars.iv, 10
97 %arrayidx.10 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.920
98 %10 = load double* %arrayidx.10, align 16, !tbaa !0
99 %add.10 = fadd double %10, 1.000000e+00
100 %arrayidx5.10 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.920
101 store double %add.10, double* %arrayidx5.10, align 16, !tbaa !0
102 %indvars.iv.next.1021 = or i64 %indvars.iv, 11
103 %arrayidx.11 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1021
104 %11 = load double* %arrayidx.11, align 8, !tbaa !0
105 %add.11 = fadd double %11, 1.000000e+00
106 %arrayidx5.11 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1021
107 store double %add.11, double* %arrayidx5.11, align 8, !tbaa !0
108 %indvars.iv.next.1122 = or i64 %indvars.iv, 12
109 %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1122
110 %12 = load double* %arrayidx.12, align 32, !tbaa !0
111 %add.12 = fadd double %12, 1.000000e+00
112 %arrayidx5.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1122
113 store double %add.12, double* %arrayidx5.12, align 32, !tbaa !0
114 %indvars.iv.next.1223 = or i64 %indvars.iv, 13
115 %arrayidx.13 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1223
116 %13 = load double* %arrayidx.13, align 8, !tbaa !0
117 %add.13 = fadd double %13, 1.000000e+00
118 %arrayidx5.13 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1223
119 store double %add.13, double* %arrayidx5.13, align 8, !tbaa !0
120 %indvars.iv.next.1324 = or i64 %indvars.iv, 14
121 %arrayidx.14 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1324
122 %14 = load double* %arrayidx.14, align 16, !tbaa !0
123 %add.14 = fadd double %14, 1.000000e+00
124 %arrayidx5.14 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1324
125 store double %add.14, double* %arrayidx5.14, align 16, !tbaa !0
126 %indvars.iv.next.1425 = or i64 %indvars.iv, 15
127 %arrayidx.15 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1425
128 %15 = load double* %arrayidx.15, align 8, !tbaa !0
129 %add.15 = fadd double %15, 1.000000e+00
130 %arrayidx5.15 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1425
131 store double %add.15, double* %arrayidx5.15, align 8, !tbaa !0
132 %indvars.iv.next.15 = add i64 %indvars.iv, 16
133 %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
134 %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000
135 br i1 %exitcond.15, label %for.end, label %for.body3
136
137 for.end: ; preds = %for.body3
138 %call = tail call i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
139 %inc7 = add nsw i32 %nl.010, 1
140 %exitcond = icmp eq i32 %inc7, 400000
141 br i1 %exitcond, label %for.end8, label %for.cond1.preheader
142
143 for.end8: ; preds = %for.end
144 ret i32 0
145
146 ; CHECK: @s000
147 ; CHECK: mtctr
148 ; CHECK: bdnz
149 }
150
151 declare i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double)
152
153 !0 = metadata !{metadata !"double", metadata !1}
154 !1 = metadata !{metadata !"omnipotent char", metadata !2}
155 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
0 ; ModuleID = 'SingleSource/Regression/C/sumarray2d.c'
1 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
2 target triple = "powerpc64-unknown-linux-gnu"
3 ; RUN: llc < %s -march=ppc64 | FileCheck %s
4
5 @.str = private unnamed_addr constant [23 x i8] c"Sum(Array[%d,%d] = %d\0A\00", align 1
6
7 define i32 @SumArray([100 x i32]* nocapture %Array, i32 %NumI, i32 %NumJ) nounwind readonly {
8 entry:
9 %cmp12 = icmp eq i32 %NumI, 0
10 br i1 %cmp12, label %for.end8, label %for.cond1.preheader.lr.ph
11
12 for.cond1.preheader.lr.ph: ; preds = %entry
13 %cmp29 = icmp eq i32 %NumJ, 0
14 br i1 %cmp29, label %for.inc6, label %for.body3.lr.ph.us
15
16 for.inc6.us: ; preds = %for.body3.us
17 %indvars.iv.next17 = add i64 %indvars.iv16, 1
18 %lftr.wideiv18 = trunc i64 %indvars.iv.next17 to i32
19 %exitcond19 = icmp eq i32 %lftr.wideiv18, %NumI
20 br i1 %exitcond19, label %for.end8, label %for.body3.lr.ph.us
21
22 for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us
23 %indvars.iv = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ]
24 %Result.111.us = phi i32 [ %Result.014.us, %for.body3.lr.ph.us ], [ %add.us, %for.body3.us ]
25 %arrayidx5.us = getelementptr inbounds [100 x i32]* %Array, i64 %indvars.iv16, i64 %indvars.iv
26 %0 = load i32* %arrayidx5.us, align 4, !tbaa !0
27 %add.us = add nsw i32 %0, %Result.111.us
28 %indvars.iv.next = add i64 %indvars.iv, 1
29 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
30 %exitcond = icmp eq i32 %lftr.wideiv, %NumJ
31 br i1 %exitcond, label %for.inc6.us, label %for.body3.us
32
33 for.body3.lr.ph.us: ; preds = %for.inc6.us, %for.cond1.preheader.lr.ph
34 %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc6.us ], [ 0, %for.cond1.preheader.lr.ph ]
35 %Result.014.us = phi i32 [ %add.us, %for.inc6.us ], [ 0, %for.cond1.preheader.lr.ph ]
36 br label %for.body3.us
37
38 for.inc6: ; preds = %for.inc6, %for.cond1.preheader.lr.ph
39 %i.013 = phi i32 [ %inc7, %for.inc6 ], [ 0, %for.cond1.preheader.lr.ph ]
40 %inc7 = add i32 %i.013, 1
41 %exitcond20 = icmp eq i32 %inc7, %NumI
42 br i1 %exitcond20, label %for.end8, label %for.inc6
43
44 for.end8: ; preds = %for.inc6.us, %for.inc6, %entry
45 %Result.0.lcssa = phi i32 [ 0, %entry ], [ %add.us, %for.inc6.us ], [ 0, %for.inc6 ]
46 ret i32 %Result.0.lcssa
47 ; CHECK: @SumArray
48 ; CHECK: mtctr
49 ; CHECK: bdnz
50 }
51
52 define i32 @main() nounwind {
53 entry:
54 %Array = alloca [100 x [100 x i32]], align 4
55 br label %for.body
56
57 for.body: ; preds = %for.body, %entry
58 %indvars.iv33 = phi i64 [ 0, %entry ], [ %indvars.iv.next34, %for.body ]
59 %0 = trunc i64 %indvars.iv33 to i32
60 %sub = sub i32 0, %0
61 %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv33, i64 %indvars.iv33
62 store i32 %sub, i32* %arrayidx2, align 4, !tbaa !0
63 %indvars.iv.next34 = add i64 %indvars.iv33, 1
64 %lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32
65 %exitcond36 = icmp eq i32 %lftr.wideiv35, 100
66 br i1 %exitcond36, label %for.cond6.preheader, label %for.body
67
68 for.cond6.preheader: ; preds = %for.body, %for.inc17
69 %indvars.iv29 = phi i64 [ %indvars.iv.next30, %for.inc17 ], [ 0, %for.body ]
70 br label %for.body8
71
72 for.body8: ; preds = %for.inc14, %for.cond6.preheader
73 %indvars.iv = phi i64 [ 0, %for.cond6.preheader ], [ %indvars.iv.next, %for.inc14 ]
74 %1 = trunc i64 %indvars.iv to i32
75 %2 = trunc i64 %indvars.iv29 to i32
76 %cmp9 = icmp eq i32 %1, %2
77 br i1 %cmp9, label %for.inc14, label %if.then
78
79 if.then: ; preds = %for.body8
80 %3 = add i64 %indvars.iv, %indvars.iv29
81 %arrayidx13 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv29, i64 %indvars.iv
82 %4 = trunc i64 %3 to i32
83 store i32 %4, i32* %arrayidx13, align 4, !tbaa !0
84 br label %for.inc14
85
86 for.inc14: ; preds = %for.body8, %if.then
87 %indvars.iv.next = add i64 %indvars.iv, 1
88 %lftr.wideiv27 = trunc i64 %indvars.iv.next to i32
89 %exitcond28 = icmp eq i32 %lftr.wideiv27, 100
90 br i1 %exitcond28, label %for.inc17, label %for.body8
91
92 for.inc17: ; preds = %for.inc14
93 %indvars.iv.next30 = add i64 %indvars.iv29, 1
94 %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32
95 %exitcond32 = icmp eq i32 %lftr.wideiv31, 100
96 br i1 %exitcond32, label %for.body3.lr.ph.us.i, label %for.cond6.preheader
97
98 for.inc6.us.i: ; preds = %for.body3.us.i
99 %indvars.iv.next17.i = add i64 %indvars.iv16.i, 1
100 %lftr.wideiv24 = trunc i64 %indvars.iv.next17.i to i32
101 %exitcond25 = icmp eq i32 %lftr.wideiv24, 100
102 br i1 %exitcond25, label %SumArray.exit, label %for.body3.lr.ph.us.i
103
104 for.body3.us.i: ; preds = %for.body3.lr.ph.us.i, %for.body3.us.i
105 %indvars.iv.i = phi i64 [ 0, %for.body3.lr.ph.us.i ], [ %indvars.iv.next.i, %for.body3.us.i ]
106 %Result.111.us.i = phi i32 [ %Result.014.us.i, %for.body3.lr.ph.us.i ], [ %add.us.i, %for.body3.us.i ]
107 %arrayidx5.us.i = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv16.i, i64 %indvars.iv.i
108 %5 = load i32* %arrayidx5.us.i, align 4, !tbaa !0
109 %add.us.i = add nsw i32 %5, %Result.111.us.i
110 %indvars.iv.next.i = add i64 %indvars.iv.i, 1
111 %lftr.wideiv = trunc i64 %indvars.iv.next.i to i32
112 %exitcond = icmp eq i32 %lftr.wideiv, 100
113 br i1 %exitcond, label %for.inc6.us.i, label %for.body3.us.i
114
115 for.body3.lr.ph.us.i: ; preds = %for.inc17, %for.inc6.us.i
116 %indvars.iv16.i = phi i64 [ %indvars.iv.next17.i, %for.inc6.us.i ], [ 0, %for.inc17 ]
117 %Result.014.us.i = phi i32 [ %add.us.i, %for.inc6.us.i ], [ 0, %for.inc17 ]
118 br label %for.body3.us.i
119
120 SumArray.exit: ; preds = %for.inc6.us.i
121 %call20 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str, i64 0, i64 0), i32 100, i32 100, i32 %add.us.i) nounwind
122 ret i32 0
123
124 ; CHECK: @main
125 ; CHECK: mtctr
126 ; CHECK: bdnz
127 }
128
129 declare i32 @printf(i8* nocapture, ...) nounwind
130
131 !0 = metadata !{metadata !"int", metadata !1}
132 !1 = metadata !{metadata !"omnipotent char", metadata !2}
133 !2 = metadata !{metadata !"Simple C/C++ TBAA"}