llvm.org GIT mirror llvm / edfd4f1
[PowerPC] ELFv2 function call changes This patch builds upon the two preceding MC changes to implement the basic ELFv2 function call convention. In the ELFv1 ABI, a "function descriptor" was associated with every function, pointing to both the entry address and the related TOC base (and a static chain pointer for nested functions). Function pointers would actually refer to that descriptor, and the indirect call sequence needed to load up both entry address and TOC base. In the ELFv2 ABI, there are no more function descriptors, and function pointers simply refer to the (global) entry point of the function code. Indirect function calls simply branch to that address, after loading it up into r12 (as required by the ABI rules for a global entry point). Direct function calls continue to just do a "bl" to the target symbol; this will be resolved by the linker to the local entry point of the target function if it is local, and to a PLT stub if it is global. That PLT stub would then load the (global) entry point address of the final target into r12 and branch to it. Note that when performing a local function call, r2 must be set up to point to the current TOC base: if the target ends up local, the ABI requires that its local entry point is called with r2 set up; if the target ends up global, the PLT stub requires that r2 is set up. This patch implements all LLVM changes to implement that scheme: - No longer create a function descriptor when emitting a function definition (in EmitFunctionEntryLabel) - Emit two entry points *if* the function needs the TOC base (r2) anywhere (this is done EmitFunctionBodyStart; note that this cannot be done in EmitFunctionBodyStart because the global entry point prologue code must be *part* of the function as covered by debug info). - In order to make use tracking of r2 (as needed above) work correctly, mark direct function calls as implicitly using r2. - Implement the ELFv2 indirect function call sequence (no function descriptors; load target address into r12). - When creating an ELFv2 object file, emit the .abiversion 2 directive to tell the linker to create the appropriate version of PLT stubs. Reviewed by Hal Finkel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213489 91177308-0d34-0410-b5e6-96231b3b80d8 Ulrich Weigand 5 years ago
6 changed file(s) with 160 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
3232 #include "llvm/CodeGen/MachineInstr.h"
3333 #include "llvm/CodeGen/MachineInstrBuilder.h"
3434 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
3536 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
3637 #include "llvm/IR/Constants.h"
3738 #include "llvm/IR/DebugInfo.h"
105106
106107 void EmitFunctionEntryLabel() override;
107108
109 void EmitFunctionBodyStart() override;
108110 void EmitFunctionBodyEnd() override;
109111 };
110112
780782 }
781783
782784 void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
785 if (Subtarget.isELFv2ABI()) {
786 PPCTargetStreamer *TS =
787 static_cast(OutStreamer.getTargetStreamer());
788
789 if (TS)
790 TS->emitAbiVersion(2);
791 }
792
783793 if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_)
784794 return AsmPrinter::EmitStartOfAsmFile(M);
785795
833843 } else
834844 return AsmPrinter::EmitFunctionEntryLabel();
835845 }
836
846
847 // ELFv2 ABI - Normal entry label.
848 if (Subtarget.isELFv2ABI())
849 return AsmPrinter::EmitFunctionEntryLabel();
850
837851 // Emit an official procedure descriptor.
838852 MCSectionSubPair Current = OutStreamer.getCurrentSection();
839853 const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
916930 }
917931
918932 return AsmPrinter::doFinalization(M);
933 }
934
935 /// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2.
936 void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
937 // In the ELFv2 ABI, in functions that use the TOC register, we need to
938 // provide two entry points. The ABI guarantees that when calling the
939 // local entry point, r2 is set up by the caller to contain the TOC base
940 // for this function, and when calling the global entry point, r12 is set
941 // up by the caller to hold the address of the global entry point. We
942 // thus emit a prefix sequence along the following lines:
943 //
944 // func:
945 // # global entry point
946 // addis r2,r12,(.TOC.-func)@ha
947 // addi r2,r2,(.TOC.-func)@l
948 // .localentry func, .-func
949 // # local entry point, followed by function body
950 //
951 // This ensures we have r2 set up correctly while executing the function
952 // body, no matter which entry point is called.
953 if (Subtarget.isELFv2ABI()
954 // Only do all that if the function uses r2 in the first place.
955 && !MF->getRegInfo().use_empty(PPC::X2)) {
956
957 MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol();
958 OutStreamer.EmitLabel(GlobalEntryLabel);
959 const MCSymbolRefExpr *GlobalEntryLabelExp =
960 MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext);
961
962 MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
963 const MCExpr *TOCDeltaExpr =
964 MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext),
965 GlobalEntryLabelExp, OutContext);
966
967 const MCExpr *TOCDeltaHi =
968 PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext);
969 EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
970 .addReg(PPC::X2)
971 .addReg(PPC::X12)
972 .addExpr(TOCDeltaHi));
973
974 const MCExpr *TOCDeltaLo =
975 PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext);
976 EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI)
977 .addReg(PPC::X2)
978 .addReg(PPC::X2)
979 .addExpr(TOCDeltaLo));
980
981 MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol();
982 OutStreamer.EmitLabel(LocalEntryLabel);
983 const MCSymbolRefExpr *LocalEntryLabelExp =
984 MCSymbolRefExpr::Create(LocalEntryLabel, OutContext);
985 const MCExpr *LocalOffsetExp =
986 MCBinaryExpr::CreateSub(LocalEntryLabelExp,
987 GlobalEntryLabelExp, OutContext);
988
989 PPCTargetStreamer *TS =
990 static_cast(OutStreamer.getTargetStreamer());
991
992 if (TS)
993 TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp);
994 }
919995 }
920996
921997 /// EmitFunctionBodyEnd - Print the traceback table before the .size
14971497 for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
14981498 MIB.addReg(RegArgs[II], RegState::Implicit);
14991499
1500 // Direct calls in the ELFv2 ABI need the TOC register live into the call.
1501 if (PPCSubTarget->isELFv2ABI())
1502 MIB.addReg(PPC::X2, RegState::Implicit);
1503
15001504 // Add a register mask with the call-preserved registers. Proper
15011505 // defs for return values will be added by setPhysRegsDeadExcept().
15021506 MIB.addRegMask(TRI.getCallPreservedMask(CC));
33703370
33713371 bool isPPC64 = Subtarget.isPPC64();
33723372 bool isSVR4ABI = Subtarget.isSVR4ABI();
3373 bool isELFv2ABI = Subtarget.isELFv2ABI();
33733374
33743375 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
33753376 NodeTys.push_back(MVT::Other); // Returns a chain
34393440 // to do the call, we can't use PPCISD::CALL.
34403441 SDValue MTCTROps[] = {Chain, Callee, InFlag};
34413442
3442 if (isSVR4ABI && isPPC64) {
3443 if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
34433444 // Function pointers in the 64-bit SVR4 ABI do not point to the function
34443445 // entry point, but to the function descriptor (the function entry point
34453446 // address is part of the function descriptor though).
35193520 CallOpc = PPCISD::BCTRL;
35203521 Callee.setNode(nullptr);
35213522 // Add use of X11 (holding environment pointer)
3522 if (isSVR4ABI && isPPC64)
3523 if (isSVR4ABI && isPPC64 && !isELFv2ABI)
35233524 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
35243525 // Add CTR register as callee so a bctr can be emitted later.
35253526 if (isTailCall)
35403541 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
35413542 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
35423543 RegsToPass[i].second.getValueType()));
3544
3545 // Direct calls in the ELFv2 ABI need the TOC register live into the call.
3546 if (Callee.getNode() && isELFv2ABI)
3547 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
35433548
35443549 return CallOpc;
35453550 }
39873992 SDLoc dl, SelectionDAG &DAG,
39883993 SmallVectorImpl &InVals) const {
39893994
3995 bool isELFv2ABI = Subtarget.isELFv2ABI();
39903996 bool isLittleEndian = Subtarget.isLittleEndian();
39913997 unsigned NumOps = Outs.size();
39923998
43724378 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
43734379 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
43744380 false, false, 0);
4381 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
4382 // This does not mean the MTCTR instruction must use R12; it's easier
4383 // to model this as an extra parameter, so do that.
4384 if (isELFv2ABI)
4385 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
43754386 }
43764387
43774388 // Build a sequence of copy-to-reg nodes chained together with token chain
226226
227227 bool isDarwinABI() const { return isDarwin(); }
228228 bool isSVR4ABI() const { return !isDarwin(); }
229 /// FIXME: Should use a command-line option.
230 bool isELFv2ABI() const { return isPPC64() && isSVR4ABI() &&
231 isLittleEndian(); }
229232
230233 bool enableEarlyIfConversion() const override { return hasISEL(); }
231234
0 ; RUN: llc -march=ppc64le -mcpu=pwr8 < %s | FileCheck %s
1
2 target datalayout = "e-m:e-i64:64-n32:64"
3 target triple = "powerpc64le-unknown-linux-gnu"
4
5 ; Indirect calls requires a full stub creation
6 define void @test_indirect(void ()* nocapture %fp) {
7 ; CHECK-LABEL: @test_indirect
8 tail call void %fp()
9 ; CHECK-DAG: std 2, 40(1)
10 ; CHECK-DAG: mr 12, 3
11 ; CHECK-DAG: mtctr 3
12 ; CHECK: bctrl
13 ; CHECK-NEXT: ld 2, 40(1)
14 ret void
15 }
16
0 ; RUN: llc -march=ppc64le -mcpu=pwr8 < %s | FileCheck %s
1 ; RUN: llc -march=ppc64le -mcpu=pwr8 -O0 < %s | FileCheck %s
2
3 target datalayout = "e-m:e-i64:64-n32:64"
4 target triple = "powerpc64le-unknown-linux-gnu"
5
6 @number64 = global i64 10, align 8
7
8 ; CHECK: .abiversion 2
9
10 define i64 @use_toc(i64 %a) nounwind {
11 entry:
12 ; CHECK-LABEL: @use_toc
13 ; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]:
14 ; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha
15 ; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l
16 ; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]:
17 ; CHECK-NEXT: .localentry use_toc, .Ltmp[[TMP2]]-.Ltmp[[TMP1]]
18 ; CHECK-NEXT: %entry
19 %0 = load i64* @number64, align 8
20 %cmp = icmp eq i64 %0, %a
21 %conv1 = zext i1 %cmp to i64
22 ret i64 %conv1
23 }
24
25 declare void @callee()
26 define void @use_toc_implicit() nounwind {
27 entry:
28 ; CHECK-LABEL: @use_toc_implicit
29 ; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]:
30 ; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha
31 ; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l
32 ; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]:
33 ; CHECK-NEXT: .localentry use_toc_implicit, .Ltmp[[TMP2]]-.Ltmp[[TMP1]]
34 ; CHECK-NEXT: %entry
35 call void @callee()
36 ret void
37 }
38
39 define i64 @no_toc(i64 %a) nounwind {
40 entry:
41 ; CHECK-LABEL: @no_toc
42 ; CHECK-NEXT: %entry
43 ret i64 %a
44 }
45