llvm.org GIT mirror llvm / bc9b754
Turn on if-conversion for thumb2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79084 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 10 years ago
12 changed file(s) with 246 addition(s) and 51 deletion(s). Raw diff Collapse all Expand all
272272 }
273273
274274 static inline
275 const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
276 return MIB.addReg(0);
277 }
278
279 static inline
275280 bool isUncondBranchOpcode(int Opc) {
276281 return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
277282 }
390390
391391 setStackPointerRegisterToSaveRestore(ARM::SP);
392392 setSchedulingPreference(SchedulingForRegPressure);
393 setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
394 setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
395
396 if (!Subtarget->isThumb()) {
397 // Use branch latency information to determine if-conversion limits.
398 // FIXME: If-converter should use instruction latency of the branch being
399 // eliminated to compute the threshold. For ARMv6, the branch "latency"
400 // varies depending on whether it's dynamically or statically predicted
401 // and on whether the destination is in the prefetch buffer.
402 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
403 const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData();
404 unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass());
405 if (Latency > 1) {
406 setIfCvtBlockSizeLimit(Latency-1);
407 if (Latency > 2)
408 setIfCvtDupBlockSizeLimit(Latency-2);
409 } else {
410 setIfCvtBlockSizeLimit(10);
411 setIfCvtDupBlockSizeLimit(2);
412 }
393
394 // FIXME: If-converter should use instruction latency to determine
395 // profitability rather than relying on fixed limits.
396 if (Subtarget->getCPUString() == "generic") {
397 // Generic (and overly aggressive) if-conversion limits.
398 setIfCvtBlockSizeLimit(10);
399 setIfCvtDupBlockSizeLimit(2);
400 } else if (Subtarget->hasV6Ops()) {
401 setIfCvtBlockSizeLimit(2);
402 setIfCvtDupBlockSizeLimit(1);
403 } else {
404 setIfCvtBlockSizeLimit(3);
405 setIfCvtDupBlockSizeLimit(2);
413406 }
414407
415408 maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
3838 InstrItinData]>,
3939 InstrItinData]>,
4040 InstrItinData]>,
41 InstrItinData, InstrStage<1, [FU_LdSt0]>]>,
41 InstrItinData,
42 InstrStage<1, [FU_LdSt0]>]>,
4243 InstrItinData]>,
4344 InstrItinData]>,
4445 InstrItinData]>,
4546 InstrItinData]>,
46 InstrItinData, InstrStage<1, [FU_LdSt0]>]>,
47 InstrItinData,
48 InstrStage<1, [FU_LdSt0]>]>,
4749 InstrItinData]>
4850 ]>;
4951
1717 InstrItinData]>,
1818 InstrItinData]>,
1919 InstrItinData]>,
20 InstrItinData, InstrStage<1, [FU_LdSt0]>]>,
20 InstrItinData,
21 InstrStage<1, [FU_LdSt0]>]>,
2122 InstrItinData]>,
2223 InstrItinData]>,
2324 InstrItinData]>,
2425 InstrItinData]>,
25 InstrItinData, InstrStage<1, [FU_LdSt0]>]>,
26 InstrItinData,
27 InstrStage<1, [FU_LdSt0]>]>,
2628 InstrItinData]>
2729 ]>;
5454 InstrItinData]>,
5555 InstrItinData]>,
5656 InstrItinData]>,
57 InstrItinData, InstrStage<1, [FU_LdSt0]>]>,
57 InstrItinData,
58 InstrStage<1, [FU_LdSt0]>]>,
5859 InstrItinData]>,
5960 InstrItinData]>,
6061 InstrItinData]>,
6162 InstrItinData]>,
62 InstrItinData, InstrStage<1, [FU_LdSt0]>]>,
63 InstrItinData,
64 InstrStage<1, [FU_LdSt0]>]>,
6365 InstrItinData]>
6466 ]>;
7474 ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
7575 const std::string &FS)
7676 : ARMBaseTargetMachine(T, TT, FS, true),
77 InstrInfo(Subtarget.hasThumb2()
78 ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
79 : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
7780 DataLayout(Subtarget.isAPCS_ABI() ?
7881 std::string("e-p:32:32-f64:32:32-i64:32:32-"
7982 "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
8083 std::string("e-p:32:32-f64:64:64-i64:64:64-"
8184 "i16:16:32-i8:8:32-i1:8:32-a:0:32")),
8285 TLInfo(*this) {
83 // Create the approriate type of Thumb InstrInfo
84 if (Subtarget.hasThumb2())
85 InstrInfo = new Thumb2InstrInfo(Subtarget);
86 else
87 InstrInfo = new Thumb1InstrInfo(Subtarget);
8886 }
8987
9088
115113 PM.add(createARMLoadStoreOptimizationPass());
116114
117115 if (OptLevel != CodeGenOpt::None &&
118 !DisableIfConversion && !Subtarget.isThumb())
116 !DisableIfConversion && !Subtarget.isThumb1Only())
119117 PM.add(createIfConverterPass());
120118
121119 if (Subtarget.isThumb2()) {
701701 unsigned NumTZ = CountTrailingZeros_32(Mask);
702702 assert(NumTZ <= 3 && "Invalid IT mask!");
703703 for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
704 bool T = (Mask & (1 << Pos)) != 0;
704 bool T = (Mask & (1 << Pos)) == 0;
705705 if (T)
706706 O << 't';
707707 else
6565 .addImm(CC);
6666 ++MBBI;
6767
68 // Finalize IT mask. If the following instruction is not predicated or it's
69 // predicated on a condition that's not the same or the opposite of CC, then
70 // the mask is 0x8.
68 // Finalize IT mask.
7169 ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
72 unsigned Mask = 0x8;
73 while (MBBI != E || (Mask & 1)) {
70 unsigned Mask = 0, Pos = 3;
71 while (MBBI != E && Pos) {
7472 ARMCC::CondCodes NCC = getPredicate(&*MBBI, TII);
75 if (NCC == CC) {
76 Mask >>= 1;
77 Mask |= 0x8;
78 } else if (NCC == OCC) {
79 Mask >>= 1;
80 } else {
73 if (NCC == OCC) {
74 Mask |= (1 << Pos);
75 } else if (NCC != CC)
8176 break;
82 }
77 --Pos;
8378 ++MBBI;
8479 }
80 Mask |= (1 << Pos);
8581 MIB.addImm(Mask);
8682 Modified = true;
8783 ++NumITs;
128128 static char ID;
129129 Thumb2SizeReduce();
130130
131 const TargetInstrInfo *TII;
131 const Thumb2InstrInfo *TII;
132132
133133 virtual bool runOnMachineFunction(MachineFunction &MF);
134134
453453 DebugLoc dl = MI->getDebugLoc();
454454 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
455455 MIB.addOperand(MI->getOperand(0));
456 if (HasCC && NewTID.hasOptionalDef())
457 AddDefaultT1CC(MIB, CCDead);
456 if (NewTID.hasOptionalDef()) {
457 if (HasCC)
458 AddDefaultT1CC(MIB, CCDead);
459 else
460 AddNoT1CC(MIB);
461 }
458462
459463 // Transfer the rest of operands.
460464 unsigned NumOps = TID.getNumOperands();
533537 DebugLoc dl = MI->getDebugLoc();
534538 MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
535539 MIB.addOperand(MI->getOperand(0));
536 if (HasCC && NewTID.hasOptionalDef())
537 AddDefaultT1CC(MIB, CCDead);
540 if (NewTID.hasOptionalDef()) {
541 if (HasCC)
542 AddDefaultT1CC(MIB, CCDead);
543 else
544 AddNoT1CC(MIB);
545 }
538546
539547 // Transfer the rest of operands.
540548 unsigned NumOps = TID.getNumOperands();
658666
659667 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
660668 const TargetMachine &TM = MF.getTarget();
661 TII = TM.getInstrInfo();
669 TII = static_cast(TM.getInstrInfo());
662670
663671 bool Modified = false;
664672 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
0 ; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep r7
1
2 %struct.noise3 = type { [3 x [17 x i32]] }
3 %struct.noiseguard = type { i32, i32, i32 }
4
5 define arm_apcscc void @vorbis_encode_noisebias_setup(i8* nocapture %vi.0.7.val, double %s, i32 %block, i32* nocapture %suppress, %struct.noise3* nocapture %in, %struct.noiseguard* nocapture %guard, double %userbias) nounwind {
6 entry:
7 %0 = getelementptr %struct.noiseguard* %guard, i32 %block, i32 2; [#uses=1]
8 %1 = load i32* %0, align 4 ; [#uses=1]
9 store i32 %1, i32* undef, align 4
10 unreachable
11 }
0 ; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | FileCheck %s
1
2 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
3 ; CHECK: t1:
4 ; CHECK: it ne
5 ; CHECK: cmpne
6 switch i32 %c, label %cond_next [
7 i32 1, label %cond_true
8 i32 7, label %cond_true
9 ]
10
11 cond_true:
12 %tmp12 = add i32 %a, 1
13 %tmp1518 = add i32 %tmp12, %b
14 ret i32 %tmp1518
15
16 cond_next:
17 %tmp15 = add i32 %b, %a
18 ret i32 %tmp15
19 }
20
21 ; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
22 define i32 @t2(i32 %a, i32 %b) {
23 entry:
24 ; CHECK: t2:
25 ; CHECK: ite le
26 ; CHECK: suble
27 ; CHECK: subgt
28 %tmp1434 = icmp eq i32 %a, %b ; [#uses=1]
29 br i1 %tmp1434, label %bb17, label %bb.outer
30
31 bb.outer: ; preds = %cond_false, %entry
32 %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; [#uses=5]
33 %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; [#uses=1]
34 br label %bb
35
36 bb: ; preds = %cond_true, %bb.outer
37 %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; [#uses=2]
38 %tmp. = sub i32 0, %b_addr.021.0.ph ; [#uses=1]
39 %tmp.40 = mul i32 %indvar, %tmp. ; [#uses=1]
40 %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; [#uses=6]
41 %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; [#uses=1]
42 br i1 %tmp3, label %cond_true, label %cond_false
43
44 cond_true: ; preds = %bb
45 %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; [#uses=2]
46 %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; [#uses=1]
47 %indvar.next = add i32 %indvar, 1 ; [#uses=1]
48 br i1 %tmp1437, label %bb17, label %bb
49
50 cond_false: ; preds = %bb
51 %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; [#uses=2]
52 %tmp14 = icmp eq i32 %a_addr.026.0, %tmp10 ; [#uses=1]
53 br i1 %tmp14, label %bb17, label %bb.outer
54
55 bb17: ; preds = %cond_false, %cond_true, %entry
56 %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; [#uses=1]
57 ret i32 %a_addr.026.1
58 }
59
60 @x = external global i32* ; [#uses=1]
61
62 define void @foo(i32 %a) {
63 entry:
64 %tmp = load i32** @x ; [#uses=1]
65 store i32 %a, i32* %tmp
66 ret void
67 }
68
69 define void @t3(i32 %a, i32 %b) {
70 entry:
71 ; CHECK: t3:
72 ; CHECK: it lt
73 ; CHECK: poplt {r7, pc}
74 %tmp1 = icmp sgt i32 %a, 10 ; [#uses=1]
75 br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
76
77 cond_true: ; preds = %entry
78 tail call void @foo( i32 %b )
79 ret void
80
81 UnifiedReturnBlock: ; preds = %entry
82 ret void
83 }
0 ; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | FileCheck %s
1
2 define void @foo(i32 %X, i32 %Y) {
3 entry:
4 ; CHECK: foo:
5 ; CHECK: it ne
6 ; CHECK: cmpne
7 ; CHECK: it hi
8 ; CHECK: pophi {r7, pc}
9 %tmp1 = icmp ult i32 %X, 4 ; [#uses=1]
10 %tmp4 = icmp eq i32 %Y, 0 ; [#uses=1]
11 %tmp7 = or i1 %tmp4, %tmp1 ; [#uses=1]
12 br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
13
14 cond_true: ; preds = %entry
15 %tmp10 = tail call i32 (...)* @bar( ) ; [#uses=0]
16 ret void
17
18 UnifiedReturnBlock: ; preds = %entry
19 ret void
20 }
21
22 declare i32 @bar(...)
23
24 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
25
26 %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
27
28 define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
29 entry:
30 ; CHECK: CountTree:
31 ; CHECK: it eq
32 ; CHECK: cmpeq
33 ; CHECK: beq.n
34 ; CHECK: itt eq
35 ; CHECK: moveq
36 ; CHECK: popeq
37 br label %tailrecurse
38
39 tailrecurse: ; preds = %bb, %entry
40 %tmp6 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
41 %tmp9 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2]
42 %tmp12 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
43 %tmp14 = icmp eq %struct.quad_struct* null, null ; [#uses=1]
44 %tmp17 = icmp eq %struct.quad_struct* %tmp6, null ; [#uses=1]
45 %tmp23 = icmp eq %struct.quad_struct* %tmp9, null ; [#uses=1]
46 %tmp29 = icmp eq %struct.quad_struct* %tmp12, null ; [#uses=1]
47 %bothcond = and i1 %tmp17, %tmp14 ; [#uses=1]
48 %bothcond1 = and i1 %bothcond, %tmp23 ; [#uses=1]
49 %bothcond2 = and i1 %bothcond1, %tmp29 ; [#uses=1]
50 br i1 %bothcond2, label %return, label %bb
51
52 bb: ; preds = %tailrecurse
53 %tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 ) ; [#uses=0]
54 br label %tailrecurse
55
56 return: ; preds = %tailrecurse
57 ret i32 0
58 }
59
60 %struct.SString = type { i8*, i32, i32 }
61
62 declare void @abort()
63
64 define fastcc void @t1(%struct.SString* %word, i8 signext %c) {
65 entry:
66 ; CHECK: t1:
67 ; CHECK: it ne
68 ; CHECK: popne {r7, pc}
69 %tmp1 = icmp eq %struct.SString* %word, null ; [#uses=1]
70 br i1 %tmp1, label %cond_true, label %cond_false
71
72 cond_true: ; preds = %entry
73 tail call void @abort( )
74 unreachable
75
76 cond_false: ; preds = %entry
77 ret void
78 }
79
80 define fastcc void @t2() nounwind {
81 entry:
82 ; CHECK: t2:
83 ; CHECK: cmp r0, #0
84 ; CHECK: bne.n
85 br i1 undef, label %bb.i.i3, label %growMapping.exit
86
87 bb.i.i3: ; preds = %entry
88 unreachable
89
90 growMapping.exit: ; preds = %entry
91 unreachable
92 }