llvm.org GIT mirror llvm / 5ee67e8
Generate PPC early conditional returns PowerPC has a conditional branch to the link register (return) instruction: BCLR. This should be used any time when we'd otherwise have a conditional branch to a return. This adds a small pass, PPCEarlyReturn, which runs just prior to the branch selection pass (and, importantly, after block placement) to generate these conditional returns when possible. It will also eliminate unconditional branches to returns (these happen rarely; most of the time these have already been tail duplicated by the time PPCEarlyReturn is invoked). This is a nice optimization for small functions that do not maintain a stack frame. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179026 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 7 years ago
8 changed file(s) with 218 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
3030 class MCInst;
3131
3232 FunctionPass *createPPCCTRLoops();
33 FunctionPass *createPPCEarlyReturnPass();
3334 FunctionPass *createPPCBranchSelectionPass();
3435 FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
3536 FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
3940
4041 /// \brief Creates an PPC-specific Target Transformation Info pass.
4142 ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
42
43
4344 namespace PPCII {
4445
4546 /// Target Operand Flag enum.
564564 bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
565565 bits<3> CR;
566566
567 let BO = BIBO{2-6};
568 let BI{0-1} = BIBO{0-1};
569 let BI{2-4} = CR;
567 let BO = BIBO{4-0};
568 let BI{0-1} = BIBO{5-6};
569 let BI{2-4} = CR{0-2};
570570 let BH = 0;
571571 }
572572
1717 #include "PPCInstrBuilder.h"
1818 #include "PPCMachineFunctionInfo.h"
1919 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
2021 #include "llvm/ADT/STLExtras.h"
2122 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
2224 #include "llvm/CodeGen/MachineInstrBuilder.h"
2325 #include "llvm/CodeGen/MachineMemOperand.h"
2426 #include "llvm/CodeGen/MachineRegisterInfo.h"
895897 return 4; // PowerPC instructions are all 4 bytes
896898 }
897899 }
900
901 #undef DEBUG_TYPE
902 #define DEBUG_TYPE "ppc-early-ret"
903 STATISTIC(NumBCLR, "Number of early conditional returns");
904 STATISTIC(NumBLR, "Number of early returns");
905
906 namespace llvm {
907 void initializePPCEarlyReturnPass(PassRegistry&);
908 }
909
910 namespace {
911 // PPCEarlyReturn pass - For simple functions without epilogue code, move
912 // returns up, and create conditional returns, to avoid unnecessary
913 // branch-to-blr sequences.
914 struct PPCEarlyReturn : public MachineFunctionPass {
915 static char ID;
916 PPCEarlyReturn() : MachineFunctionPass(ID) {
917 initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
918 }
919
920 const PPCTargetMachine *TM;
921 const PPCInstrInfo *TII;
922
923 protected:
924 bool processBlock(MachineBasicBlock &LastMBB) {
925 bool Changed = false;
926
927 MachineBasicBlock::iterator I = LastMBB.begin();
928 I = LastMBB.SkipPHIsAndLabels(I);
929
930 // The block must be essentially empty except for the blr.
931 if (I == LastMBB.end() || I->getOpcode() != PPC::BLR ||
932 I != LastMBB.getLastNonDebugInstr())
933 return Changed;
934
935 SmallVector PredToRemove;
936 for (MachineBasicBlock::pred_iterator PI = LastMBB.pred_begin(),
937 PIE = LastMBB.pred_end(); PI != PIE; ++PI) {
938 bool OtherReference = false, BlockChanged = false;
939 for (MachineBasicBlock::iterator J = (*PI)->begin();
940 J != (*PI)->end();) {
941 if (J->getOpcode() == PPC::B) {
942 if (J->getOperand(0).getMBB() == &LastMBB) {
943 // This is an unconditional branch to the return. Replace the
944 // branch with a blr.
945 BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR));
946 MachineBasicBlock::iterator K = J++;
947 K->eraseFromParent();
948 BlockChanged = true;
949 ++NumBLR;
950 continue;
951 }
952 } else if (J->getOpcode() == PPC::BCC) {
953 if (J->getOperand(2).getMBB() == &LastMBB) {
954 // This is a conditional branch to the return. Replace the branch
955 // with a bclr.
956 BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR))
957 .addImm(J->getOperand(0).getImm())
958 .addReg(J->getOperand(1).getReg());
959 MachineBasicBlock::iterator K = J++;
960 K->eraseFromParent();
961 BlockChanged = true;
962 ++NumBCLR;
963 continue;
964 }
965 } else if (J->isBranch()) {
966 if (J->isIndirectBranch()) {
967 if (LastMBB.hasAddressTaken())
968 OtherReference = true;
969 } else
970 for (unsigned i = 0; i < J->getNumOperands(); ++i)
971 if (J->getOperand(i).isMBB() &&
972 J->getOperand(i).getMBB() == &LastMBB)
973 OtherReference = true;
974 }
975
976 ++J;
977 }
978
979 if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&LastMBB))
980 OtherReference = true;
981
982 // Predecessors are stored in a vector and can't be removed here.
983 if (!OtherReference && BlockChanged) {
984 PredToRemove.push_back(*PI);
985 }
986
987 if (BlockChanged)
988 Changed = true;
989 }
990
991 for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
992 PredToRemove[i]->removeSuccessor(&LastMBB);
993
994 if (Changed && !LastMBB.hasAddressTaken()) {
995 // We now might be able to merge this blr-only block into its
996 // by-layout predecessor.
997 if (LastMBB.pred_size() == 1 &&
998 (*LastMBB.pred_begin())->isLayoutSuccessor(&LastMBB)) {
999 // Move the blr into the preceding block.
1000 MachineBasicBlock &PrevMBB = **LastMBB.pred_begin();
1001 PrevMBB.splice(PrevMBB.end(), &LastMBB, I);
1002 PrevMBB.removeSuccessor(&LastMBB);
1003 }
1004
1005 if (LastMBB.pred_empty())
1006 LastMBB.eraseFromParent();
1007 }
1008
1009 return Changed;
1010 }
1011
1012 public:
1013 virtual bool runOnMachineFunction(MachineFunction &MF) {
1014 TM = static_cast(&MF.getTarget());
1015 TII = TM->getInstrInfo();
1016
1017 bool Changed = false;
1018
1019 // If the function does not have at least two block, then there is
1020 // nothing to do.
1021 if (MF.size() < 2)
1022 return Changed;
1023
1024 for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
1025 MachineBasicBlock &B = *I++;
1026 if (processBlock(B))
1027 Changed = true;
1028 }
1029
1030 return Changed;
1031 }
1032
1033 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
1034 MachineFunctionPass::getAnalysisUsage(AU);
1035 }
1036 };
1037 }
1038
1039 INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
1040 "PowerPC Early-Return Creation", false, false)
1041
1042 char PPCEarlyReturn::ID = 0;
1043 FunctionPass*
1044 llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
1045
510510 // BCC represents an arbitrary conditional branch on a predicate.
511511 // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
512512 // a two-value operand where a dag node expects two operands. :(
513 let isCodeGenOnly = 1 in
513 let isCodeGenOnly = 1 in {
514514 def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
515515 "b${cond:cc} ${cond:reg}, $dst"
516516 /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
517 let isReturn = 1, Uses = [LR, RM] in
518 def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
519 "b${cond:cc}lr ${cond:reg}", BrB, []>;
520 }
517521
518522 let Defs = [CTR], Uses = [CTR] in {
519523 def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
123123 }
124124
125125 bool PPCPassConfig::addPreEmitPass() {
126 if (getOptLevel() != CodeGenOpt::None)
127 addPass(createPPCEarlyReturnPass());
126128 // Must run branch selection immediately preceding the asm printer.
127129 addPass(createPPCBranchSelectionPass());
128130 return false;
122122
123123 This could be much better (bdnz instead of bdz) but it still beats us. If we
124124 produced this with bdnz, the loop would be a single dispatch group.
125
126 ===-------------------------------------------------------------------------===
127
128 Compile:
129
130 void foo(int *P) {
131 if (P) *P = 0;
132 }
133
134 into:
135
136 _foo:
137 cmpwi cr0,r3,0
138 beqlr cr0
139 li r0,0
140 stw r0,0(r3)
141 blr
142
143 This is effectively a simple form of predication.
144125
145126 ===-------------------------------------------------------------------------===
146127
0 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
1 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
2 target triple = "powerpc64-unknown-linux-gnu"
3
4 define void @foo(i32* %P) #0 {
5 entry:
6 %tobool = icmp eq i32* %P, null
7 br i1 %tobool, label %if.end, label %if.then
8
9 if.then: ; preds = %entry
10 store i32 0, i32* %P, align 4, !tbaa !0
11 br label %if.end
12
13 if.end: ; preds = %entry, %if.then
14 ret void
15
16 ; CHECK: @foo
17 ; CHECK: beqlr
18 ; CHECK: blr
19 }
20
21 define void @bar(i32* %P, i32* %Q) #0 {
22 entry:
23 %tobool = icmp eq i32* %P, null
24 br i1 %tobool, label %if.else, label %if.then
25
26 if.then: ; preds = %entry
27 store i32 0, i32* %P, align 4, !tbaa !0
28 %tobool1 = icmp eq i32* %Q, null
29 br i1 %tobool1, label %if.end3, label %if.then2
30
31 if.then2: ; preds = %if.then
32 store i32 1, i32* %Q, align 4, !tbaa !0
33 br label %if.end3
34
35 if.else: ; preds = %entry
36 store i32 0, i32* %Q, align 4, !tbaa !0
37 br label %if.end3
38
39 if.end3: ; preds = %if.then, %if.then2, %if.else
40 ret void
41
42 ; CHECK: @bar
43 ; CHECK: beqlr
44 ; CHECK: blr
45 }
46
47 attributes #0 = { nounwind }
48
49 !0 = metadata !{metadata !"int", metadata !1}
50 !1 = metadata !{metadata !"omnipotent char", metadata !2}
51 !2 = metadata !{metadata !"Simple C/C++ TBAA"}
106106
107107 declare double @trunc(double) nounwind readnone
108108
109 define float @test11(float %x) nounwind {
109 define void @test11(float %x, float* %y) nounwind {
110110 %call = tail call float @rintf(float %x) nounwind readnone
111 ret float %call
111 store float %call, float* %y
112 ret void
112113
113114 ; CHECK: test11:
114115 ; CHECK-NOT: frin
124125
125126 declare float @rintf(float) nounwind readnone
126127
127 define double @test12(double %x) nounwind {
128 define void @test12(double %x, double* %y) nounwind {
128129 %call = tail call double @rint(double %x) nounwind readnone
129 ret double %call
130 store double %call, double* %y
131 ret void
130132
131133 ; CHECK: test12:
132134 ; CHECK-NOT: frin