llvm.org GIT mirror llvm / 49b3971
[PowerPC] Fix reverted patch r227976 to avoid register assignment issues See full discussion in http://reviews.llvm.org/D7491. We now hide the add-immediate and call instructions together in a separate pseudo-op, which is tagged to define GPR3 and clobber the call-killed registers. The PPCTLSDynamicCall pass prior to RA now expands this op into the two separate addi and call ops, with explicit definitions of GPR3 on both instructions, and explicit clobbers on the call instruction. The pass is now marked as requiring and preserving the LiveIntervals and SlotIndexes analyses, and fixes these up after the replacement sequences are introduced. Self-hosting has been verified on LE P8 and BE P7 with various optimization levels, etc. It has also been verified with the --no-tls-optimize flag workaround removed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228725 91177308-0d34-0410-b5e6-96231b3b80d8 Bill Schmidt 5 years ago
14 changed file(s) with 434 addition(s) and 120 deletion(s). Raw diff Collapse all Expand all
3131 PPCTargetObjectFile.cpp
3232 PPCTargetTransformInfo.cpp
3333 PPCSelectionDAGInfo.cpp
34 PPCTLSDynamicCall.cpp
3435 PPCVSXCopy.cpp
3536 PPCVSXFMAMutate.cpp
3637 )
3939 FunctionPass *createPPCVSXFMAMutatePass();
4040 FunctionPass *createPPCBranchSelectionPass();
4141 FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
42 FunctionPass *createPPCTLSDynamicCallPass();
4243 void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
4344 AsmPrinter &AP, bool isDarwin);
4445
8990 MO_TOC_LO = 7 << 4,
9091
9192 // Symbol for VK_PPC_TLS fixup attached to an ADD instruction
92 MO_TLS = 8 << 4,
93
94 // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
95 // call sequences.
96 MO_TLSLD = 9 << 4,
97 MO_TLSGD = 10 << 4
93 MO_TLS = 8 << 4
9894 };
9995 } // end namespace PPCII
10096
100100 const MachineInstr &MI);
101101 void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
102102 const MachineInstr &MI);
103 void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
103104 };
104105
105106 /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
405406 EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
406407 }
407408
409 /// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
410 /// call to __tls_get_addr to the current output stream.
411 void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
412 MCSymbolRefExpr::VariantKind VK) {
413 StringRef Name = "__tls_get_addr";
414 MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
415 MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
416
417 assert(MI->getOperand(0).isReg() &&
418 ((Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
419 (!Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::R3)) &&
420 "GETtls[ld]ADDR[32] must define GPR3");
421 assert(MI->getOperand(1).isReg() &&
422 ((Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::X3) ||
423 (!Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) &&
424 "GETtls[ld]ADDR[32] must read GPR3");
425
426 if (!Subtarget.isPPC64() && !Subtarget.isDarwin() &&
427 TM.getRelocationModel() == Reloc::PIC_)
428 Kind = MCSymbolRefExpr::VK_PLT;
429 const MCSymbolRefExpr *TlsRef =
430 MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext);
431 const MachineOperand &MO = MI->getOperand(2);
432 const GlobalValue *GValue = MO.getGlobal();
433 MCSymbol *MOSymbol = getSymbol(GValue);
434 const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext);
435 EmitToStreamer(OutStreamer,
436 MCInstBuilder(Subtarget.isPPC64() ?
437 PPC::BL8_NOP_TLS : PPC::BL_TLS)
438 .addExpr(TlsRef)
439 .addExpr(SymVar));
440 }
441
408442 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
409443 /// the current output stream.
410444 ///
807841 .addExpr(SymGotTlsGD));
808842 return;
809843 }
844 case PPC::GETtlsADDR:
845 // Transform: %X3 = GETtlsADDR %X3,
846 // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
847 case PPC::GETtlsADDR32: {
848 // Transform: %R3 = GETtlsADDR32 %R3,
849 // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT
850 EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD);
851 return;
852 }
810853 case PPC::ADDIStlsldHA: {
811854 // Transform: %Xd = ADDIStlsldHA %X2,
812855 // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
841884 .addReg(MI->getOperand(0).getReg())
842885 .addReg(MI->getOperand(1).getReg())
843886 .addExpr(SymGotTlsLD));
887 return;
888 }
889 case PPC::GETtlsldADDR:
890 // Transform: %X3 = GETtlsldADDR %X3,
891 // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld)
892 case PPC::GETtlsldADDR32: {
893 // Transform: %R3 = GETtlsldADDR32 %R3,
894 // Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT
895 EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD);
844896 return;
845897 }
846898 case PPC::ADDISdtprelHA:
354354 return FuncInfo->hasNonRISpills();
355355 }
356356
357 /// MustSaveLR - Return true if this function requires that we save the LR
358 /// register onto the stack in the prolog and restore it in the epilog of the
359 /// function.
360 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
361 const PPCFunctionInfo *MFI = MF.getInfo();
362
363 // We need a save/restore of LR if there is any def of LR (which is
364 // defined by calls, including the PIC setup sequence), or if there is
365 // some use of the LR stack slot (e.g. for builtin_return_address).
366 // (LR comes in 32 and 64 bit versions.)
367 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
368 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
369 }
370
357371 /// determineFrameLayout - Determine the size of the frame and maximum call
358372 /// frame size.
359373 unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
380394 // stackless code if all local vars are reg-allocated.
381395 bool DisableRedZone = MF.getFunction()->getAttributes().
382396 hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
397 unsigned LR = RegInfo->getRARegister();
383398 if (!DisableRedZone &&
384399 (Subtarget.isPPC64() || // 32-bit SVR4, no stack-
385400 !Subtarget.isSVR4ABI() || // allocated locals.
387402 FrameSize <= 224 && // Fits in red zone.
388403 !MFI->hasVarSizedObjects() && // No dynamic alloca.
389404 !MFI->adjustsStack() && // No calls.
405 !MustSaveLR(MF, LR) &&
390406 !RegInfo->hasBasePointer(MF)) { // No special alignment.
391407 // No need for frame
392408 if (UpdateMF)
11071123 }
11081124 }
11091125
1110 /// MustSaveLR - Return true if this function requires that we save the LR
1111 /// register onto the stack in the prolog and restore it in the epilog of the
1112 /// function.
1113 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
1114 const PPCFunctionInfo *MFI = MF.getInfo();
1115
1116 // We need a save/restore of LR if there is any def of LR (which is
1117 // defined by calls, including the PIC setup sequence), or if there is
1118 // some use of the LR stack slot (e.g. for builtin_return_address).
1119 // (LR comes in 32 and 64 bit versions.)
1120 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
1121 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
1122 }
1123
11241126 void
11251127 PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
11261128 RegScavenger *) const {
805805 case PPCISD::SHL: return "PPCISD::SHL";
806806 case PPCISD::CALL: return "PPCISD::CALL";
807807 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
808 case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS";
809 case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
810808 case PPCISD::MTCTR: return "PPCISD::MTCTR";
811809 case PPCISD::BCTRL: return "PPCISD::BCTRL";
812810 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
840838 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
841839 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
842840 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
841 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
842 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
843843 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
844844 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
845 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
846 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
845847 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
846848 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
847849 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
17001702 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
17011703 }
17021704
1703 // Generate a call to __tls_get_addr for the given GOT entry Op.
1704 std::pair
1705 PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
1706 SelectionDAG &DAG) const {
1707
1708 Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
1709 TargetLowering::ArgListTy Args;
1710 TargetLowering::ArgListEntry Entry;
1711 Entry.Node = Op;
1712 Entry.Ty = IntPtrTy;
1713 Args.push_back(Entry);
1714
1715 TargetLowering::CallLoweringInfo CLI(DAG);
1716 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
1717 .setCallee(CallingConv::C, IntPtrTy,
1718 DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
1719 std::move(Args), 0);
1720
1721 return LowerCallTo(CLI);
1722 }
1723
17241705 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
17251706 SelectionDAG &DAG) const {
17261707
17671748 }
17681749
17691750 if (Model == TLSModel::GeneralDynamic) {
1770 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1771 PPCII::MO_TLSGD);
1751 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
17721752 SDValue GOTPtr;
17731753 if (is64bit) {
17741754 setUsesTOCBasePtr(DAG);
17811761 else
17821762 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
17831763 }
1784 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
1785 GOTPtr, TGA);
1786 std::pair CallResult = lowerTLSCall(GOTEntry, dl, DAG);
1787 return CallResult.first;
1764 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
1765 GOTPtr, TGA, TGA);
17881766 }
17891767
17901768 if (Model == TLSModel::LocalDynamic) {
1791 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1792 PPCII::MO_TLSLD);
1769 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
17931770 SDValue GOTPtr;
17941771 if (is64bit) {
17951772 setUsesTOCBasePtr(DAG);
18021779 else
18031780 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
18041781 }
1805 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
1806 GOTPtr, TGA);
1807 std::pair CallResult = lowerTLSCall(GOTEntry, dl, DAG);
1808 SDValue TLSAddr = CallResult.first;
1809 SDValue Chain = CallResult.second;
1810 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
1811 Chain, TLSAddr, TGA);
1782 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
1783 PtrVT, GOTPtr, TGA, TGA);
1784 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
1785 PtrVT, TLSAddr, TGA);
18121786 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
18131787 }
18141788
38323806 if (Callee.getNode()) {
38333807 Ops.push_back(Chain);
38343808 Ops.push_back(Callee);
3835
3836 // If this is a call to __tls_get_addr, find the symbol whose address
3837 // is to be taken and add it to the list. This will be used to
3838 // generate __tls_get_addr(@tlsgd) or __tls_get_addr(@tlsld).
3839 // We find the symbol by walking the chain to the CopyFromReg, walking
3840 // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
3841 // pulling the symbol from that node.
3842 if (ExternalSymbolSDNode *S = dyn_cast(Callee))
3843 if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
3844 assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
3845 SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
3846 SDValue TGTAddr = AddI->getOperand(1);
3847 assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
3848 "Didn't find target global TLS address where we expected one");
3849 Ops.push_back(TGTAddr);
3850 CallOpc = PPCISD::CALL_TLS;
3851 }
38523809 }
38533810 // If this is a tail call add stack pointer delta.
38543811 if (isTailCall)
40113968 Ops.insert(std::next(Ops.begin()), AddTOC);
40123969 } else if ((CallOpc == PPCISD::CALL) &&
40133970 (!isLocalCall(Callee) ||
4014 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
3971 DAG.getTarget().getRelocationModel() == Reloc::PIC_))
40153972 // Otherwise insert NOP for non-local calls.
40163973 CallOpc = PPCISD::CALL_NOP;
4017 } else if (CallOpc == PPCISD::CALL_TLS)
4018 // For 64-bit SVR4, TLS calls are always non-local.
4019 CallOpc = PPCISD::CALL_NOP_TLS;
40203974 }
40213975
40223976 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
100100 /// SVR4 calls.
101101 CALL, CALL_NOP,
102102
103 /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
104 /// to access TLS variables.
105 CALL_TLS, CALL_NOP_TLS,
106
107103 /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
108104 /// MTCTR instruction.
109105 MTCTR,
222218 /// register to sym\@got\@tlsgd\@ha.
223219 ADDIS_TLSGD_HA,
224220
225 /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
221 /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
226222 /// model, produces an ADDI8 instruction that adds G8RReg to
227 /// sym\@got\@tlsgd\@l.
223 /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
224 /// ADDIS_TLSGD_L_ADDR until after register assignment.
228225 ADDI_TLSGD_L,
226
227 /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
228 /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
229 /// ADDIS_TLSGD_L_ADDR until after register assignment.
230 GET_TLS_ADDR,
231
232 /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
233 /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
234 /// register assignment.
235 ADDI_TLSGD_L_ADDR,
229236
230237 /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
231238 /// model, produces an ADDIS8 instruction that adds the GOT base
232239 /// register to sym\@got\@tlsld\@ha.
233240 ADDIS_TLSLD_HA,
234241
235 /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
242 /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
236243 /// model, produces an ADDI8 instruction that adds G8RReg to
237 /// sym\@got\@tlsld\@l.
244 /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
245 /// ADDIS_TLSLD_L_ADDR until after register assignment.
238246 ADDI_TLSLD_L,
239247
240 /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
241 /// local-dynamic TLS model, produces an ADDIS8 instruction
242 /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
243 /// to tie this in place following a copy to %X3 from the result
244 /// of a GET_TLSLD_ADDR.
248 /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
249 /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
250 /// ADDIS_TLSLD_L_ADDR until after register assignment.
251 GET_TLSLD_ADDR,
252
253 /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
254 /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
255 /// following register assignment.
256 ADDI_TLSLD_L_ADDR,
257
258 /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS
259 /// model, produces an ADDIS8 instruction that adds X3 to
260 /// sym\@dtprel\@ha.
245261 ADDIS_DTPREL_HA,
246262
247263 /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
634650 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
635651 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
636652 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
637 std::pair lowerTLSCall(SDValue Op, SDLoc dl,
638 SelectionDAG &DAG) const;
639653 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
640654 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
641655 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
200200 (BL8 texternalsym:$dst)>;
201201 def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
202202 (BL8_NOP texternalsym:$dst)>;
203
204 def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
205 (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
206203
207204 // Atomic operations
208205 let usesCustomInserter = 1 in {
903900 [(set i64:$rD,
904901 (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
905902 isPPC64;
903 // LR8 is a true define, while the rest of the Defs are clobbers. X3 is
904 // explicitly defined when this op is created, so not mentioned here.
905 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
906 Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
907 def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
908 "#GETtlsADDR",
909 [(set i64:$rD,
910 (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
911 isPPC64;
912 // Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8
913 // are true defines while the rest of the Defs are clobbers.
914 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
915 Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
916 in
917 def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
918 (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
919 "#ADDItlsgdLADDR",
920 [(set i64:$rD,
921 (PPCaddiTlsgdLAddr i64:$reg,
922 tglobaltlsaddr:$disp,
923 tglobaltlsaddr:$sym))]>,
924 isPPC64;
906925 def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
907926 "#ADDIStlsldHA",
908927 [(set i64:$rD,
913932 [(set i64:$rD,
914933 (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
915934 isPPC64;
935 // LR8 is a true define, while the rest of the Defs are clobbers. X3 is
936 // explicitly defined when this op is created, so not mentioned here.
937 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
938 Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
939 def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
940 "#GETtlsldADDR",
941 [(set i64:$rD,
942 (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
943 isPPC64;
944 // Combined op for ADDItlsldL and GETtlsADDR, late expanded. X3 and LR8
945 // are true defines, while the rest of the Defs are clobbers.
946 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
947 Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
948 in
949 def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
950 (ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
951 "#ADDItlsldLADDR",
952 [(set i64:$rD,
953 (PPCaddiTlsldLAddr i64:$reg,
954 tglobaltlsaddr:$disp,
955 tglobaltlsaddr:$sym))]>,
956 isPPC64;
916957 def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
917958 "#ADDISdtprelHA",
918959 [(set i64:$rD,
109109 def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
110110 def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
111111 def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
112 def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
113 def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
114 SDTypeProfile<1, 3, [
115 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
116 SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
112117 def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
113118 def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
114 def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
115 [SDNPHasChain]>;
119 def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
120 def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR",
121 SDTypeProfile<1, 3, [
122 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
123 SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
124 def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
116125 def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
117126
118127 def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
135144 def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
136145 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
137146 SDNPVariadic]>;
138 def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
139 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
140 SDNPVariadic]>;
141147 def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
142148 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
143149 SDNPVariadic]>;
144 def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
145 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
146 SDNPVariadic]>;
147150 def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
148151 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
149152 def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
24582461 def : Pat<(PPCcall (i32 texternalsym:$dst)),
24592462 (BL texternalsym:$dst)>;
24602463
2461 def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
2462 (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
2463
24642464 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
24652465 (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
24662466
25152515 "#ADDItlsgdL32",
25162516 [(set i32:$rD,
25172517 (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
2518 // LR is a true define, while the rest of the Defs are clobbers. R3 is
2519 // explicitly defined when this op is created, so not mentioned here.
2520 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
2521 Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
2522 def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
2523 "GETtlsADDR32",
2524 [(set i32:$rD,
2525 (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
2526 // Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR
2527 // are true defines while the rest of the Defs are clobbers.
2528 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
2529 Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
2530 def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD),
2531 (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
2532 "#ADDItlsgdLADDR32",
2533 [(set i32:$rD,
2534 (PPCaddiTlsgdLAddr i32:$reg,
2535 tglobaltlsaddr:$disp,
2536 tglobaltlsaddr:$sym))]>;
25182537 def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
25192538 "#ADDItlsldL32",
25202539 [(set i32:$rD,
25212540 (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
2541 // LR is a true define, while the rest of the Defs are clobbers. R3 is
2542 // explicitly defined when this op is created, so not mentioned here.
2543 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
2544 Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
2545 def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
2546 "GETtlsldADDR32",
2547 [(set i32:$rD,
2548 (PPCgetTlsldAddr i32:$reg,
2549 tglobaltlsaddr:$sym))]>;
2550 // Combined op for ADDItlsldL32 and GETtlsADDR32, late expanded. R3 and LR
2551 // are true defines while the rest of the Defs are clobbers.
2552 let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
2553 Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
2554 def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD),
2555 (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
2556 "#ADDItlsldLADDR32",
2557 [(set i32:$rD,
2558 (PPCaddiTlsldLAddr i32:$reg,
2559 tglobaltlsaddr:$disp,
2560 tglobaltlsaddr:$sym))]>;
25222561 def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
25232562 "#ADDIdtprelL32",
25242563 [(set i32:$rD,
136136 case PPCII::MO_TLS:
137137 RefKind = MCSymbolRefExpr::VK_PPC_TLS;
138138 break;
139 case PPCII::MO_TLSGD:
140 RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
141 break;
142 case PPCII::MO_TLSLD:
143 RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
144 break;
145139 }
146140
147141 if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
0 //===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into
10 // separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of
11 // which define GPR3. A copy is added from GPR3 to the target virtual
12 // register of the original instruction. The GETtlsADDR[32] is really
13 // a call instruction, so its target register is constrained to be GPR3.
14 // This is not true of ADDItls[gd]L[32], but there is a legacy linker
15 // optimization bug that requires the target register of the addi of
16 // a local- or general-dynamic TLS access sequence to be GPR3.
17 //
18 // This is done in a late pass so that TLS variable accesses can be
19 // fully commoned by MachineCSE.
20 //
21 //===----------------------------------------------------------------------===//
22
23 #include "PPCInstrInfo.h"
24 #include "PPC.h"
25 #include "PPCInstrBuilder.h"
26 #include "PPCTargetMachine.h"
27 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/raw_ostream.h"
32
33 using namespace llvm;
34
35 #define DEBUG_TYPE "ppc-tls-dynamic-call"
36
37 namespace llvm {
38 void initializePPCTLSDynamicCallPass(PassRegistry&);
39 }
40
41 namespace {
42 struct PPCTLSDynamicCall : public MachineFunctionPass {
43 static char ID;
44 PPCTLSDynamicCall() : MachineFunctionPass(ID) {
45 initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry());
46 }
47
48 const PPCTargetMachine *TM;
49 const PPCInstrInfo *TII;
50 LiveIntervals *LIS;
51
52 protected:
53 bool processBlock(MachineBasicBlock &MBB) {
54 bool Changed = false;
55 bool Is64Bit = TM->getSubtargetImpl()->isPPC64();
56
57 for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
58 I != IE; ++I) {
59 MachineInstr *MI = I;
60
61 if (MI->getOpcode() != PPC::ADDItlsgdLADDR &&
62 MI->getOpcode() != PPC::ADDItlsldLADDR &&
63 MI->getOpcode() != PPC::ADDItlsgdLADDR32 &&
64 MI->getOpcode() != PPC::ADDItlsldLADDR32)
65 continue;
66
67 DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << *MI;);
68
69 unsigned OutReg = MI->getOperand(0).getReg();
70 unsigned InReg = MI->getOperand(1).getReg();
71 DebugLoc DL = MI->getDebugLoc();
72 unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
73 unsigned Opc1, Opc2;
74 SmallVector OrigRegs;
75 OrigRegs.push_back(OutReg);
76 OrigRegs.push_back(InReg);
77 OrigRegs.push_back(GPR3);
78
79 switch (MI->getOpcode()) {
80 default:
81 llvm_unreachable("Opcode inconsistency error");
82 case PPC::ADDItlsgdLADDR:
83 Opc1 = PPC::ADDItlsgdL;
84 Opc2 = PPC::GETtlsADDR;
85 break;
86 case PPC::ADDItlsldLADDR:
87 Opc1 = PPC::ADDItlsldL;
88 Opc2 = PPC::GETtlsldADDR;
89 break;
90 case PPC::ADDItlsgdLADDR32:
91 Opc1 = PPC::ADDItlsgdL32;
92 Opc2 = PPC::GETtlsADDR32;
93 break;
94 case PPC::ADDItlsldLADDR32:
95 Opc1 = PPC::ADDItlsldL32;
96 Opc2 = PPC::GETtlsldADDR32;
97 break;
98 }
99
100 // Expand into two ops built prior to the existing instruction.
101 MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
102 .addReg(InReg);
103 Addi->addOperand(MI->getOperand(2));
104
105 // The ADDItls* instruction is the first instruction in the
106 // repair range.
107 MachineBasicBlock::iterator First = I;
108 --First;
109
110 MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
111 .addReg(GPR3));
112 Call->addOperand(MI->getOperand(3));
113
114 BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
115 .addReg(GPR3);
116
117 // The COPY is the last instruction in the repair range.
118 MachineBasicBlock::iterator Last = I;
119 --Last;
120
121 // Move past the original instruction and remove it.
122 ++I;
123 MI->removeFromParent();
124
125 // Repair the live intervals.
126 LIS->repairIntervalsInRange(&MBB, First, Last, OrigRegs);
127 Changed = true;
128 }
129
130 return Changed;
131 }
132
133 public:
134 bool runOnMachineFunction(MachineFunction &MF) override {
135 TM = static_cast(&MF.getTarget());
136 TII = TM->getSubtargetImpl()->getInstrInfo();
137 LIS = &getAnalysis();
138
139 bool Changed = false;
140
141 for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
142 MachineBasicBlock &B = *I++;
143 if (processBlock(B))
144 Changed = true;
145 }
146
147 return Changed;
148 }
149
150 void getAnalysisUsage(AnalysisUsage &AU) const override {
151 AU.addRequired();
152 AU.addPreserved();
153 AU.addRequired();
154 AU.addPreserved();
155 MachineFunctionPass::getAnalysisUsage(AU);
156 }
157 };
158 }
159
160 INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
161 "PowerPC TLS Dynamic Call Fixup", false, false)
162 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
163 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
164 INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE,
165 "PowerPC TLS Dynamic Call Fixup", false, false)
166
167 char PPCTLSDynamicCall::ID = 0;
168 FunctionPass*
169 llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); }
265265 initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
266266 insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
267267 &PPCVSXFMAMutateID);
268 if (getPPCTargetMachine().getRelocationModel() == Reloc::PIC_)
269 addPass(createPPCTLSDynamicCallPass());
268270 }
269271
270272 void PPCPassConfig::addPreSched2() {
1111 ; CHECK-LABEL: @test1
1212 ; CHECK: mflr 0
1313 ; CHECK: std 0, 16(1)
14 ; FIXME: These next two lines don't both need to load the same value.
15 ; CHECK-DAG: ld 3, 16(1)
14 ; CHECK-DAG: ld 3, 64(1)
1615 ; CHECK-DAG: ld 0, 16(1)
1716 ; CHECK: mtlr 0
1817 ; CHECK: blr
0 ; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s
1 ; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | grep "__tls_get_addr" | count 1
2
3 ; This test was derived from LLVM's own
4 ; PrettyStackTraceEntry::~PrettyStackTraceEntry(). It demonstrates an
5 ; opportunity for CSE of calls to __tls_get_addr().
6
7 target datalayout = "e-m:e-i64:64-n32:64"
8 target triple = "powerpc64le-unknown-linux-gnu"
9
10 %"class.llvm::PrettyStackTraceEntry" = type { i32 (...)**, %"class.llvm::PrettyStackTraceEntry"* }
11
12 @_ZTVN4llvm21PrettyStackTraceEntryE = unnamed_addr constant [5 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD2Ev to i8*), i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD0Ev to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)], align 8
13 @_ZL20PrettyStackTraceHead = internal thread_local unnamed_addr global %"class.llvm::PrettyStackTraceEntry"* null, align 8
14 @.str = private unnamed_addr constant [87 x i8] c"PrettyStackTraceHead == this && \22Pretty stack trace entry destruction is out of order\22\00", align 1
15 @.str1 = private unnamed_addr constant [64 x i8] c"/home/wschmidt/llvm/llvm-test2/lib/Support/PrettyStackTrace.cpp\00", align 1
16 @__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev = private unnamed_addr constant [62 x i8] c"virtual llvm::PrettyStackTraceEntry::~PrettyStackTraceEntry()\00", align 1
17
18 declare void @_ZN4llvm21PrettyStackTraceEntryD2Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr
19 declare void @__cxa_pure_virtual()
20 declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*)
21 declare void @_ZdlPv(i8*)
22
23 define void @_ZN4llvm21PrettyStackTraceEntryD0Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr align 2 {
24 entry:
25 %0 = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 0
26 store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @_ZTVN4llvm21PrettyStackTraceEntryE, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
27 %1 = load %"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead, align 8
28 %cmp.i = icmp eq %"class.llvm::PrettyStackTraceEntry"* %1, %this
29 br i1 %cmp.i, label %_ZN4llvm21PrettyStackTraceEntryD2Ev.exit, label %cond.false.i
30
31 cond.false.i: ; preds = %entry
32 tail call void @__assert_fail(i8* getelementptr inbounds ([87 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([64 x i8]* @.str1, i64 0, i64 0), i32 zeroext 119, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev, i64 0, i64 0))
33 unreachable
34
35 _ZN4llvm21PrettyStackTraceEntryD2Ev.exit: ; preds = %entry
36 %NextEntry.i.i = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 1
37 %2 = bitcast %"class.llvm::PrettyStackTraceEntry"** %NextEntry.i.i to i64*
38 %3 = load i64* %2, align 8
39 store i64 %3, i64* bitcast (%"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead to i64*), align 8
40 %4 = bitcast %"class.llvm::PrettyStackTraceEntry"* %this to i8*
41 tail call void @_ZdlPv(i8* %4)
42 ret void
43 }
44
45 ; CHECK-LABEL: _ZN4llvm21PrettyStackTraceEntryD0Ev:
46 ; CHECK: addis [[REG1:[0-9]+]], 2, _ZL20PrettyStackTraceHead@got@tlsld@ha
47 ; CHECK: addi 3, [[REG1]], _ZL20PrettyStackTraceHead@got@tlsld@l
48 ; CHECK: bl __tls_get_addr(_ZL20PrettyStackTraceHead@tlsld)
49 ; CHECK: addis 3, 3, _ZL20PrettyStackTraceHead@dtprel@ha
50 ; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)
51 ; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)
1818 }
1919
2020 ; CHECK-LABEL: call_once:
21 ; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha
22 ; CHECK: addi 3, 3, __once_callable@got@tlsgd@l
21 ; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l
2322 ; CHECK: bl __tls_get_addr(__once_callable@tlsgd)
2423 ; CHECK-NEXT: nop
2524 ; CHECK: std {{[0-9]+}}, 0(3)
26 ; CHECK: addis 3, 2, __once_call@got@tlsgd@ha
27 ; CHECK: addi 3, 3, __once_call@got@tlsgd@l
25 ; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l
2826 ; CHECK: bl __tls_get_addr(__once_call@tlsgd)
2927 ; CHECK-NEXT: nop
3028 ; CHECK: std {{[0-9]+}}, 0(3)