llvm.org GIT mirror llvm / 7840990
[PowerPC] Make LDtocL and friends invariant loads LDtocL, and other loads that roughly correspond to the TOC_ENTRY SDAG node, represent loads from the TOC, which is invariant. As a result, these loads can be hoisted out of loops, etc. In order to do this, we need to generate GOT-style MMOs for TOC_ENTRY, which requires treating it as a legitimate memory intrinsic node type. Once this is done, the MMO transfer is automatically handled for TableGen-driven instruction selection, and for nodes generated directly in PPCISelDAGToDAG, we need to transfer the MMOs manually. Also, we were not transferring MMOs associated with pre-increment loads, so do that too. Lastly, this fixes an exposed bug where R30 was not added as a defined operand of UpdateGBR. This problem was highlighted by an example (used to generate the test case) posted to llvmdev by Francois Pichet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230553 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 5 years ago
8 changed file(s) with 133 addition(s) and 71 deletion(s). Raw diff Collapse all Expand all
222222
223223 bool AllUsersSelectZero(SDNode *N);
224224 void SwapAllSelectUsers(SDNode *N);
225
226 SDNode *transferMemOperands(SDNode *N, SDNode *Result);
225227 };
226228 }
227229
314316 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
315317 unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
316318 BuildMI(FirstMBB, MBBI, dl,
317 TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg)
319 TII.get(PPC::UpdateGBR), GlobalBaseReg)
318320 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
319321 MF->getInfo()->setUsesPICBase(true);
320322 }
23412343 return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
23422344 }
23432345
2346 SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
2347 // Transfer memoperands.
2348 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2349 MemOp[0] = cast(N)->getMemOperand();
2350 cast(Result)->setMemRefs(MemOp, MemOp + 1);
2351 return Result;
2352 }
2353
23442354
23452355 // Select - Convert the specified operand from a target-independent to a
23462356 // target-specific node if it hasn't already been changed.
24592469 SDValue Chain = LD->getChain();
24602470 SDValue Base = LD->getBasePtr();
24612471 SDValue Ops[] = { Offset, Base, Chain };
2462 return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
2463 PPCLowering->getPointerTy(),
2464 MVT::Other, Ops);
2472 return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
2473 LD->getValueType(0),
2474 PPCLowering->getPointerTy(),
2475 MVT::Other, Ops));
24652476 } else {
24662477 unsigned Opcode;
24672478 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
24962507 SDValue Chain = LD->getChain();
24972508 SDValue Base = LD->getBasePtr();
24982509 SDValue Ops[] = { Base, Offset, Chain };
2499 return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
2500 PPCLowering->getPointerTy(),
2501 MVT::Other, Ops);
2510 return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
2511 LD->getValueType(0),
2512 PPCLowering->getPointerTy(),
2513 MVT::Other, Ops));
25022514 }
25032515 }
25042516
28502862 "Only supported for 64-bit ABI and 32-bit SVR4");
28512863 if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
28522864 SDValue GA = N->getOperand(0);
2853 return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
2854 N->getOperand(1));
2865 return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl,
2866 MVT::i32, GA, N->getOperand(1)));
28552867 }
28562868
28572869 // For medium and large code model, we generate two instructions as
28712883 SDValue GA = N->getOperand(0);
28722884 SDValue TOCbase = N->getOperand(1);
28732885 SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
2874 TOCbase, GA);
2886 TOCbase, GA);
28752887
28762888 if (isa(GA) || isa(GA) ||
28772889 CModel == CodeModel::Large)
2878 return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
2879 SDValue(Tmp, 0));
2890 return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
2891 MVT::i64, GA, SDValue(Tmp, 0)));
28802892
28812893 if (GlobalAddressSDNode *G = dyn_cast(GA)) {
28822894 const GlobalValue *GValue = G->getGlobal();
28842896 (GValue->isDeclaration() || GValue->isWeakForLinker())) ||
28852897 GValue->isDeclaration() || GValue->hasCommonLinkage() ||
28862898 GValue->hasAvailableExternallyLinkage())
2887 return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
2888 SDValue(Tmp, 0));
2899 return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
2900 MVT::i64, GA, SDValue(Tmp, 0)));
28892901 }
28902902
28912903 return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
18201820 setUsesTOCBasePtr(DAG.getMachineFunction());
18211821 }
18221822
1823 static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
1824 SDValue GA) {
1825 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
1826 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
1827 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
1828
1829 SDValue Ops[] = { GA, Reg };
1830 return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
1831 DAG.getVTList(VT, MVT::Other), Ops, VT,
1832 MachinePointerInfo::getGOT(), 0, false, true,
1833 false, 0);
1834 }
1835
18231836 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
18241837 SelectionDAG &DAG) const {
18251838 EVT PtrVT = Op.getValueType();
18311844 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
18321845 setUsesTOCBasePtr(DAG);
18331846 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
1834 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
1835 DAG.getRegister(PPC::X2, MVT::i64));
1847 return getTOCEntry(DAG, SDLoc(CP), true, GA);
18361848 }
18371849
18381850 unsigned MOHiFlag, MOLoFlag;
18421854 if (isPIC && Subtarget.isSVR4ABI()) {
18431855 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
18441856 PPCII::MO_PIC_FLAG);
1845 SDLoc DL(CP);
1846 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
1847 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
1857 return getTOCEntry(DAG, SDLoc(CP), false, GA);
18481858 }
18491859
18501860 SDValue CPIHi =
18631873 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
18641874 setUsesTOCBasePtr(DAG);
18651875 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1866 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
1867 DAG.getRegister(PPC::X2, MVT::i64));
1876 return getTOCEntry(DAG, SDLoc(JT), true, GA);
18681877 }
18691878
18701879 unsigned MOHiFlag, MOLoFlag;
18741883 if (isPIC && Subtarget.isSVR4ABI()) {
18751884 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
18761885 PPCII::MO_PIC_FLAG);
1877 SDLoc DL(GA);
1878 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
1879 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
1886 return getTOCEntry(DAG, SDLoc(GA), false, GA);
18801887 }
18811888
18821889 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
18951902 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
18961903 setUsesTOCBasePtr(DAG);
18971904 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
1898 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
1899 DAG.getRegister(PPC::X2, MVT::i64));
1905 return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
19001906 }
19011907
19021908 unsigned MOHiFlag, MOLoFlag;
20062012 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
20072013 setUsesTOCBasePtr(DAG);
20082014 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2009 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
2010 DAG.getRegister(PPC::X2, MVT::i64));
2015 return getTOCEntry(DAG, DL, true, GA);
20112016 }
20122017
20132018 unsigned MOHiFlag, MOLoFlag;
20182023 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
20192024 GSDN->getOffset(),
20202025 PPCII::MO_PIC_FLAG);
2021 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
2022 DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
2026 return getTOCEntry(DAG, DL, false, GA);
20232027 }
20242028
20252029 SDValue GAHi =
7070 /// though these are usually folded into other nodes.
7171 Hi, Lo,
7272
73 TOC_ENTRY,
74
7573 /// The following two target-specific nodes are used for calls through
7674 /// function pointers in the 64-bit SVR4 ABI.
7775
336334
337335 /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
338336 /// The 4xf32 load used for v4i1 constants.
339 QVLFSb
337 QVLFSb,
338
339 /// GPRC = TOC_ENTRY GA, TOC
340 /// Loads the entry for GA from the TOC, where the TOC base is given by
341 /// the last operand.
342 TOC_ENTRY
340343 };
341344 }
342345
118118
119119 def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
120120 def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
121 def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
121 def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,
122 [SDNPMayLoad, SDNPMemOperand]>;
122123 def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
123124 def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
124125
0 ; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
1 target datalayout = "E-m:e-i64:64-n32:64"
2 target triple = "powerpc64-unknown-linux-gnu"
3
4 @phasor = external constant [4096 x i32]
5
6 ; Function Attrs: nounwind
7 define void @test(i32* nocapture %out, i32 zeroext %step_size) #0 {
8 entry:
9 %shl = shl i32 %step_size, 2
10 %idxprom = zext i32 %shl to i64
11 br label %for.body
12
13 ; Make sure that the TOC load has been hoisted out of the loop.
14 ; CHECK-LABEL: @test
15 ; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc@l
16 ; CHECK: %for.body
17 ; CHECK: blr
18
19 for.body: ; preds = %entry, %for.body
20 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
21 %0 = trunc i64 %indvars.iv to i32
22 %shl1 = shl i32 %0, %step_size
23 %idxprom2 = sext i32 %shl1 to i64
24 %arrayidx.sum = add nsw i64 %idxprom2, %idxprom
25 %arrayidx3 = getelementptr inbounds [4096 x i32]* @phasor, i64 0, i64 %arrayidx.sum
26 %1 = load i32* %arrayidx3, align 4
27 %arrayidx5 = getelementptr inbounds i32* %out, i64 %indvars.iv
28 store i32 %1, i32* %arrayidx5, align 4
29 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
30 %cmp = icmp slt i64 %indvars.iv.next, 1020
31 br i1 %cmp, label %for.body, label %for.end
32
33 for.end: ; preds = %for.body
34 ret void
35 }
36
37 attributes #0 = { nounwind }
38
263263 ret void
264264 }
265265 ; CHECK-LABEL: @caller2
266 ; CHECK: ld [[REG:[0-9]+]], .LC
267 ; CHECK-DAG: lfs 1, 0([[REG]])
268 ; CHECK-DAG: lfs 2, 4([[REG]])
269 ; CHECK-DAG: lfs 3, 8([[REG]])
270 ; CHECK-DAG: lfs 4, 12([[REG]])
271 ; CHECK-DAG: lfs 5, 16([[REG]])
272 ; CHECK-DAG: lfs 6, 20([[REG]])
273 ; CHECK-DAG: lfs 7, 24([[REG]])
274 ; CHECK-DAG: lfs 8, 28([[REG]])
275 ; CHECK: ld [[REG:[0-9]+]], .LC
276 ; CHECK-DAG: lfs 9, 0([[REG]])
277 ; CHECK-DAG: lfs 10, 4([[REG]])
278 ; CHECK-DAG: lfs 11, 8([[REG]])
279 ; CHECK-DAG: lfs 12, 12([[REG]])
280 ; CHECK-DAG: lfs 13, 16([[REG]])
281 ; CHECK: ld [[REG:[0-9]+]], .LC
282 ; CHECK-DAG: lwz [[REG0:[0-9]+]], 0([[REG]])
283 ; CHECK-DAG: lwz [[REG1:[0-9]+]], 4([[REG]])
284 ; CHECK-DAG: sldi [[REG1]], [[REG1]], 32
285 ; CHECK-DAG: or 10, [[REG0]], [[REG1]]
266 ; CHECK: ld {{[0-9]+}}, .LC
267 ; CHECK-DAG: lfs 1, 0({{[0-9]+}})
268 ; CHECK-DAG: lfs 2, 4({{[0-9]+}})
269 ; CHECK-DAG: lfs 3, 8({{[0-9]+}})
270 ; CHECK-DAG: lfs 4, 12({{[0-9]+}})
271 ; CHECK-DAG: lfs 5, 16({{[0-9]+}})
272 ; CHECK-DAG: lfs 6, 20({{[0-9]+}})
273 ; CHECK-DAG: lfs 7, 24({{[0-9]+}})
274 ; CHECK-DAG: lfs 8, 28({{[0-9]+}})
275
276 ; CHECK-DAG: lfs 9, 0({{[0-9]+}})
277 ; CHECK-DAG: lfs 10, 4({{[0-9]+}})
278 ; CHECK-DAG: lfs 11, 8({{[0-9]+}})
279 ; CHECK-DAG: lfs 12, 12({{[0-9]+}})
280 ; CHECK-DAG: lfs 13, 16({{[0-9]+}})
281
282 ; CHECK-DAG: lwz [[REG0:[0-9]+]], 0({{[0-9]+}})
283 ; CHECK-DAG: lwz [[REG1:[0-9]+]], 4({{[0-9]+}})
284 ; CHECK-DAG: sldi [[REG2:[0-9]+]], [[REG1]], 32
285 ; CHECK-DAG: or 10, [[REG0]], [[REG2]]
286286 ; CHECK: bl test2
287287
288288 declare void @test2([8 x float], [5 x float], [2 x float])
2121 ; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l
2222 ; CHECK: bl __tls_get_addr(__once_callable@tlsgd)
2323 ; CHECK-NEXT: nop
24 ; CHECK: std {{[0-9]+}}, 0(3)
24 ; FIXME: We could check here for 'std {{[0-9]+}}, 0(3)', but that no longer
25 ; works because, with new scheduling freedom, we create a copy of R3 based on the
26 ; initial scheduling, but don't coalesce it again after we move the instructions
27 ; so that the copy is no longer necessary.
2528 ; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l
2629 ; CHECK: bl __tls_get_addr(__once_call@tlsgd)
2730 ; CHECK-NEXT: nop
3434 ret void
3535
3636 ; CHECK-LABEL: @test2
37 ; CHECK: ld {{[0-9]+}}, 112(1)
38 ; CHECK: li [[REG16:[0-9]+]], 16
39 ; CHECK: addi [[REGB:[0-9]+]], 1, 112
40 ; CHECK: lvx 2, [[REGB]], [[REG16]]
37 ; CHECK-DAG: ld {{[0-9]+}}, 112(1)
38 ; CHECK-DAG: li [[REG16:[0-9]+]], 16
39 ; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 112
40 ; CHECK-DAG: lvx 2, [[REGB]], [[REG16]]
4141 ; CHECK: blr
4242
4343 ; CHECK-VSX-LABEL: @test2
44 ; CHECK-VSX: ld {{[0-9]+}}, 112(1)
45 ; CHECK-VSX: li [[REG16:[0-9]+]], 16
46 ; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 112
47 ; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
44 ; CHECK-VSX-DAG: ld {{[0-9]+}}, 112(1)
45 ; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16
46 ; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 112
47 ; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
4848 ; CHECK-VSX: blr
4949 }
5050
6060 ret void
6161
6262 ; CHECK-LABEL: @test3
63 ; CHECK: ld {{[0-9]+}}, 128(1)
64 ; CHECK: li [[REG16:[0-9]+]], 16
65 ; CHECK: addi [[REGB:[0-9]+]], 1, 128
66 ; CHECK: lvx 2, [[REGB]], [[REG16]]
63 ; CHECK-DAG: ld {{[0-9]+}}, 128(1)
64 ; CHECK-DAG: li [[REG16:[0-9]+]], 16
65 ; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 128
66 ; CHECK-DAG: lvx 2, [[REGB]], [[REG16]]
6767 ; CHECK: blr
6868
6969 ; CHECK-VSX-LABEL: @test3
70 ; CHECK-VSX: ld {{[0-9]+}}, 128(1)
71 ; CHECK-VSX: li [[REG16:[0-9]+]], 16
72 ; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 128
73 ; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
70 ; CHECK-VSX-DAG: ld {{[0-9]+}}, 128(1)
71 ; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16
72 ; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 128
73 ; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
7474 ; CHECK-VSX: blr
7575 }
7676