llvm.org GIT mirror llvm / 4e98296
[PowerPC] Fold [sz]ext with fp_to_int lowering where possible On modern cores with lfiw[az]x, we can fold a sign or zero extension from i32 to i64 into the load necessary for an i64 -> fp conversion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225493 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 5 years ago
3 changed file(s) with 130 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
54825482 // factor (see spliceIntoChain below for this last part).
54835483 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
54845484 ReuseLoadInfo &RLI,
5485 SelectionDAG &DAG) const {
5485 SelectionDAG &DAG,
5486 ISD::LoadExtType ET) const {
54865487 SDLoc dl(Op);
5487 if ((Op.getOpcode() == ISD::FP_TO_UINT ||
5488 if (ET == ISD::NON_EXTLOAD &&
5489 (Op.getOpcode() == ISD::FP_TO_UINT ||
54885490 Op.getOpcode() == ISD::FP_TO_SINT) &&
54895491 isOperationLegalOrCustom(Op.getOpcode(),
54905492 Op.getOperand(0).getValueType())) {
54945496 }
54955497
54965498 LoadSDNode *LD = dyn_cast(Op);
5497 if (!LD || !ISD::isNON_EXTLoad(LD) || LD->isVolatile() || LD->isNonTemporal())
5499 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
5500 LD->isNonTemporal())
54985501 return false;
54995502 if (LD->getMemoryVT() != MemVT)
55005503 return false;
56145617 ReuseLoadInfo RLI;
56155618 SDValue Bits;
56165619
5620 MachineFunction &MF = DAG.getMachineFunction();
56175621 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
56185622 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
56195623 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
56205624 RLI.Ranges);
56215625 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
5626 } else if (Subtarget.hasLFIWAX() &&
5627 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
5628 MachineMemOperand *MMO =
5629 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
5630 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
5631 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
5632 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
5633 DAG.getVTList(MVT::f64, MVT::Other),
5634 Ops, MVT::i32, MMO);
5635 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
5636 } else if (Subtarget.hasFPCVT() &&
5637 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
5638 MachineMemOperand *MMO =
5639 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
5640 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
5641 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
5642 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
5643 DAG.getVTList(MVT::f64, MVT::Other),
5644 Ops, MVT::i32, MMO);
5645 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
5646 } else if (((Subtarget.hasLFIWAX() &&
5647 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
5648 (Subtarget.hasFPCVT() &&
5649 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
5650 SINT.getOperand(0).getValueType() == MVT::i32) {
5651 MachineFrameInfo *FrameInfo = MF.getFrameInfo();
5652 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5653
5654 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
5655 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
5656
5657 SDValue Store =
5658 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
5659 MachinePointerInfo::getFixedStack(FrameIdx),
5660 false, false, 0);
5661
5662 assert(cast(Store)->getMemoryVT() == MVT::i32 &&
5663 "Expected an i32 store");
5664
5665 RLI.Ptr = FIdx;
5666 RLI.Chain = Store;
5667 RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
5668 RLI.Alignment = 4;
5669
5670 MachineMemOperand *MMO =
5671 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
5672 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
5673 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
5674 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
5675 PPCISD::LFIWZX : PPCISD::LFIWAX,
5676 dl, DAG.getVTList(MVT::f64, MVT::Other),
5677 Ops, MVT::i32, MMO);
56225678 } else
56235679 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
56245680
605605 };
606606
607607 bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
608 SelectionDAG &DAG) const;
608 SelectionDAG &DAG,
609 ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
609610 void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
610611 SelectionDAG &DAG) const;
611612
0 ; RUN: llc -mcpu=a2 < %s | FileCheck %s
1 target datalayout = "E-m:e-i64:64-n32:64"
2 target triple = "powerpc64-unknown-linux-gnu"
3
4 ; Function Attrs: nounwind
5 define double @foo1(i32* %x) #0 {
6 entry:
7 %0 = load i32* %x, align 4
8 %conv = sext i32 %0 to i64
9 %conv1 = sitofp i64 %conv to double
10 ret double %conv1
11
12 ; CHECK-LABEL: @foo1
13 ; CHECK: lfiwax [[REG1:[0-9]+]], 0, 3
14 ; CHECK: fcfid 1, [[REG1]]
15 ; CHECK: blr
16 }
17
18 define double @foo2(i32* %x) #0 {
19 entry:
20 %0 = load i32* %x, align 4
21 %conv = zext i32 %0 to i64
22 %conv1 = sitofp i64 %conv to double
23 ret double %conv1
24
25 ; CHECK-LABEL: @foo2
26 ; CHECK: lfiwzx [[REG1:[0-9]+]], 0, 3
27 ; CHECK: fcfid 1, [[REG1]]
28 ; CHECK: blr
29 }
30
31 define double @foo3(i32* %x) #0 {
32 entry:
33 %0 = load i32* %x, align 4
34 %1 = add i32 %0, 8
35 %conv = zext i32 %1 to i64
36 %conv1 = sitofp i64 %conv to double
37 ret double %conv1
38
39 ; CHECK-LABEL: @foo3
40 ; CHECK-DAG: lwz [[REG1:[0-9]+]], 0(3)
41 ; CHECK-DAG: addi [[REG3:[0-9]+]], 1,
42 ; CHECK-DAG: addi [[REG2:[0-9]+]], [[REG1]], 8
43 ; CHECK-DAG: stw [[REG2]],
44 ; CHECK: lfiwzx [[REG4:[0-9]+]], 0, [[REG3]]
45 ; CHECK: fcfid 1, [[REG4]]
46 ; CHECK: blr
47 }
48
49 define double @foo4(i32* %x) #0 {
50 entry:
51 %0 = load i32* %x, align 4
52 %1 = add i32 %0, 8
53 %conv = sext i32 %1 to i64
54 %conv1 = sitofp i64 %conv to double
55 ret double %conv1
56
57 ; CHECK-LABEL: @foo4
58 ; CHECK-DAG: lwz [[REG1:[0-9]+]], 0(3)
59 ; CHECK-DAG: addi [[REG3:[0-9]+]], 1,
60 ; CHECK-DAG: addi [[REG2:[0-9]+]], [[REG1]], 8
61 ; CHECK-DAG: stw [[REG2]],
62 ; CHECK: lfiwax [[REG4:[0-9]+]], 0, [[REG3]]
63 ; CHECK: fcfid 1, [[REG4]]
64 ; CHECK: blr
65 }
66
67 attributes #0 = { nounwind }
68