llvm.org GIT mirror llvm / 5b00cea
Fix dynamic linking on PPC64. Dynamic linking on PPC64 has had problems since we had to move the top-down hazard-detection logic post-ra. For dynamic linking to work there needs to be a nop placed after every call. It turns out that it is really hard to guarantee that nothing will be placed in between the call (bl) and the nop during post-ra scheduling. Previous attempts at fixing this by placing logic inside the hazard detector only partially worked. This is now fixed in a different way: call+nop codegen-only instructions. As far as CodeGen is concerned the pair is now a single instruction and cannot be split. This solution works much better than previous attempts. The scoreboard hazard detector is also renamed to be more generic, there is currently no cpu-specific logic in it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153816 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 8 years ago
8 changed file(s) with 127 addition(s) and 36 deletion(s). Raw diff Collapse all Expand all
2121 using namespace llvm;
2222
2323 //===----------------------------------------------------------------------===//
24 // PowerPC 440 Hazard Recognizer
25 void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) {
24 // PowerPC Scoreboard Hazard Recognizer
25 void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
2626 const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
27 if (!MCID) {
27 if (!MCID)
2828 // This is a PPC pseudo-instruction.
2929 return;
30 }
3130
3231 ScoreboardHazardRecognizer::EmitInstruction(SU);
32 }
33
34 ScheduleHazardRecognizer::HazardType
35 PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
36 return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
37 }
38
39 void PPCScoreboardHazardRecognizer::AdvanceCycle() {
40 ScoreboardHazardRecognizer::AdvanceCycle();
41 }
42
43 void PPCScoreboardHazardRecognizer::Reset() {
44 ScoreboardHazardRecognizer::Reset();
3345 }
3446
3547 //===----------------------------------------------------------------------===//
6072
6173 PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
6274 : TII(tii) {
63 LastWasBL8_ELF = false;
6475 EndDispatchGroup();
6576 }
6677
131142 return NoHazard;
132143
133144 unsigned Opcode = MI->getOpcode();
134
135 // If the last instruction was a BL8_ELF, then the NOP must follow it
136 // directly (this is strong requirement from the linker due to the ELF ABI).
137 // We return only Hazard (and not NoopHazard) because if the NOP is necessary
138 // then it will already be in the instruction stream (it is not always
139 // necessary; tail calls, for example, do not need it).
140 if (LastWasBL8_ELF && Opcode != PPC::NOP)
141 return Hazard;
142
143145 bool isFirst, isSingle, isCracked, isLoad, isStore;
144146 PPCII::PPC970_Unit InstrType =
145147 GetInstrType(Opcode, isFirst, isSingle, isCracked,
198200 return;
199201
200202 unsigned Opcode = MI->getOpcode();
201 LastWasBL8_ELF = (Opcode == PPC::BL8_ELF);
202
203203 bool isFirst, isSingle, isCracked, isLoad, isStore;
204204 PPCII::PPC970_Unit InstrType =
205205 GetInstrType(Opcode, isFirst, isSingle, isCracked,
239239 }
240240
241241 void PPCHazardRecognizer970::Reset() {
242 LastWasBL8_ELF = false;
243242 EndDispatchGroup();
244243 }
245244
2020
2121 namespace llvm {
2222
23 /// PPCHazardRecognizer440 - This class implements a scoreboard-based
24 /// hazard recognizer for the PPC 440 and friends.
25 class PPCHazardRecognizer440 : public ScoreboardHazardRecognizer {
23 /// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based
24 /// hazard recognizer for generic PPC processors.
25 class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer {
2626 const ScheduleDAG *DAG;
2727 public:
28 PPCHazardRecognizer440(const InstrItineraryData *ItinData,
28 PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData,
2929 const ScheduleDAG *DAG_) :
3030 ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {}
3131
32 virtual HazardType getHazardType(SUnit *SU, int Stalls);
3233 virtual void EmitInstruction(SUnit *SU);
34 virtual void AdvanceCycle();
35 virtual void Reset();
3336 };
3437
3538 /// PPCHazardRecognizer970 - This class defines a finite state automata that
4750
4851 // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
4952 bool HasCTRSet;
50
51 // Was the last instruction issued a BL8_ELF
52 bool LastWasBL8_ELF;
5353
5454 // StoredPtr - Keep track of the address of any store. If we see a load from
5555 // the same address (or one that aliases it), disallow the store. We can have
471471 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
472472 case PPCISD::STD_32: return "PPCISD::STD_32";
473473 case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
474 case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4";
474475 case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
475476 case PPCISD::NOP: return "PPCISD::NOP";
476477 case PPCISD::MTCTR: return "PPCISD::MTCTR";
28122813 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
28132814 }
28142815
2815 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
2816 InFlag = Chain.getValue(1);
2817
28182816 // Add a NOP immediately after the branch instruction when using the 64-bit
28192817 // SVR4 ABI. At link time, if caller and callee are in a different module and
28202818 // thus have a different TOC, the call will be replaced with a call to a stub
28232821 // which restores the TOC of the caller from the TOC save slot of the current
28242822 // stack frame. If caller and callee belong to the same module (and have the
28252823 // same TOC), the NOP will remain unchanged.
2824
2825 bool needsTOCRestore = false;
28262826 if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
2827 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
28282827 if (CallOpc == PPCISD::BCTRL_SVR4) {
28292828 // This is a call through a function pointer.
28302829 // Restore the caller TOC from the save area into R2.
28352834 // since r2 is a reserved register (which prevents the register allocator
28362835 // from allocating it), resulting in an additional register being
28372836 // allocated and an unnecessary move instruction being generated.
2838 Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
2839 InFlag = Chain.getValue(1);
2840 } else {
2837 needsTOCRestore = true;
2838 } else if (CallOpc == PPCISD::CALL_SVR4) {
28412839 // Otherwise insert NOP.
2842 InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
2843 }
2840 CallOpc = PPCISD::CALL_NOP_SVR4;
2841 }
2842 }
2843
2844 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
2845 InFlag = Chain.getValue(1);
2846
2847 if (needsTOCRestore) {
2848 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2849 Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
2850 InFlag = Chain.getValue(1);
28442851 }
28452852
28462853 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
9494 EXTSW_32,
9595
9696 /// CALL - A direct function call.
97 CALL_Darwin, CALL_SVR4,
97 /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit
98 /// SVR4 calls.
99 CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4,
98100
99101 /// NOP - Special NOP which follows 64-bit SVR4 calls.
100102 NOP,
8888 let Uses = [RM] in {
8989 def BL8_ELF : IForm<18, 0, 1,
9090 (outs), (ins calltarget:$func, variable_ops),
91 "bl $func", BrB, []>; // See Pat patterns below.
91 "bl $func", BrB, []>; // See Pat patterns below.
92
93 let isCodeGenOnly = 1 in
94 def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24,
95 (outs), (ins calltarget:$func, variable_ops),
96 "bl $func\n\tnop", BrB, []>;
97
9298 def BLA8_ELF : IForm<18, 1, 1,
9399 (outs), (ins aaddr:$func, variable_ops),
94100 "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
101
102 let isCodeGenOnly = 1 in
103 def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
104 (outs), (ins aaddr:$func, variable_ops),
105 "bla $func\n\tnop", BrB,
106 [(PPCcall_nop_SVR4 (i64 imm:$func))]>;
95107 }
96108 let Uses = [X11, CTR8, RM] in {
97109 def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
110122
111123 def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
112124 (BL8_ELF tglobaladdr:$dst)>;
125 def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)),
126 (BL8_NOP_ELF tglobaladdr:$dst)>;
127
113128 def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
114129 (BL8_ELF texternalsym:$dst)>;
130 def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)),
131 (BL8_NOP_ELF texternalsym:$dst)>;
132
115133 def : Pat<(PPCnop),
116134 (NOP)>;
117135
5050 class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
5151 class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
5252
53 // Two joined instructions; used to emit two adjacent instructions as one.
54 // The itinerary from the first instruction is used for scheduling and
55 // classification.
56 class I2 opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
57 InstrItinClass itin>
58 : Instruction {
59 field bits<64> Inst;
60
61 bit PPC64 = 0; // Default value, override with isPPC64
62
63 let Namespace = "PPC";
64 let Inst{0-5} = opcode1;
65 let Inst{32-37} = opcode2;
66 let OutOperandList = OOL;
67 let InOperandList = IOL;
68 let AsmString = asmstr;
69 let Itinerary = itin;
70
71 bits<1> PPC970_First = 0;
72 bits<1> PPC970_Single = 0;
73 bits<1> PPC970_Cracked = 0;
74 bits<3> PPC970_Unit = 0;
75
76 /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
77 /// these must be reflected there! See comments there for what these are.
78 let TSFlags{0} = PPC970_First;
79 let TSFlags{1} = PPC970_Single;
80 let TSFlags{2} = PPC970_Cracked;
81 let TSFlags{5-3} = PPC970_Unit;
82 }
5383
5484 // 1.7.1 I-Form
5585 class IForm opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
159189 class DForm_4_zero opcode, dag OOL, dag IOL, string asmstr,
160190 InstrItinClass itin, list pattern>
161191 : DForm_1 {
192 let A = 0;
193 let Addr = 0;
194 }
195
196 class IForm_and_DForm_1 opcode1, bit aa, bit lk, bits<6> opcode2,
197 dag OOL, dag IOL, string asmstr,
198 InstrItinClass itin, list pattern>
199 : I2 {
200 bits<5> A;
201 bits<21> Addr;
202
203 let Pattern = pattern;
204 bits<24> LI;
205
206 let Inst{6-29} = LI;
207 let Inst{30} = aa;
208 let Inst{31} = lk;
209
210 let Inst{38-42} = A;
211 let Inst{43-47} = Addr{20-16}; // Base Reg
212 let Inst{48-63} = Addr{15-0}; // Displacement
213 }
214
215 // This is used to emit BL8+NOP.
216 class IForm_and_DForm_4_zero opcode1, bit aa, bit lk, bits<6> opcode2,
217 dag OOL, dag IOL, string asmstr,
218 InstrItinClass itin, list pattern>
219 : IForm_and_DForm_1
220 OOL, IOL, asmstr, itin, pattern> {
162221 let A = 0;
163222 let Addr = 0;
164223 }
5050 unsigned Directive = TM->getSubtarget().getDarwinDirective();
5151 if (Directive == PPC::DIR_440) {
5252 const InstrItineraryData *II = TM->getInstrItineraryData();
53 return new PPCHazardRecognizer440(II, DAG);
53 return new PPCScoreboardHazardRecognizer(II, DAG);
5454 }
5555
5656 return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
683683 case PPC::GC_LABEL:
684684 case PPC::DBG_VALUE:
685685 return 0;
686 case PPC::BL8_NOP_ELF:
687 case PPC::BLA8_NOP_ELF:
688 return 8;
686689 default:
687690 return 4; // PowerPC instructions are all 4 bytes
688691 }
115115 def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
116116 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
117117 SDNPVariadic]>;
118 def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall,
119 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
120 SDNPVariadic]>;
118121 def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
119122 def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
120123 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;