llvm.org GIT mirror llvm / 8e9ba0e
[PowerPC] Reuse a load operand in int->fp conversions int->fp conversions on PPC must be done through memory loads and stores. On a modern core, this process begins by storing the int value to memory, then loading it using a (sometimes special) FP load instruction. Unfortunately, we would do this even when the value to be converted was itself a load, and we can just use that same memory location instead of copying it to another first. There is a slight complication when handling int_to_fp(fp_to_int(x)) pairs, because the fp_to_int operand has not been lowered when the int_to_fp is being lowered. We handle this specially by invoking fp_to_int's lowering logic (partially) and getting the necessary memory location (some trivial refactoring was done to make this possible). This is all somewhat ugly, and it would be nice if some later CodeGen stage could just clean this stuff up, but because doing so would involve modifying target-specific nodes (or instructions), it is not immediately clear how that would work. Also, remove a related entry from the README.txt for which we now generate reasonable code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225301 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 5 years ago
4 changed file(s) with 241 addition(s) and 44 deletion(s). Raw diff Collapse all Expand all
54075407 return Op;
54085408 }
54095409
5410 // FIXME: Split this code up when LegalizeDAGTypes lands.
5411 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
5412 SDLoc dl) const {
5410 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
5411 SelectionDAG &DAG,
5412 SDLoc dl) const {
54135413 assert(Op.getOperand(0).getValueType().isFloatingPoint());
54145414 SDValue Src = Op.getOperand(0);
54155415 if (Src.getValueType() == MVT::f32)
54585458 if (Op.getValueType() == MVT::i32 && !i32Stack) {
54595459 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
54605460 DAG.getConstant(4, FIPtr.getValueType()));
5461 MPI = MachinePointerInfo();
5462 }
5463
5464 return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
5465 false, false, false, 0);
5461 MPI = MPI.getWithOffset(4);
5462 }
5463
5464 RLI.Chain = Chain;
5465 RLI.Ptr = FIPtr;
5466 RLI.MPI = MPI;
5467 }
5468
5469 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
5470 SDLoc dl) const {
5471 ReuseLoadInfo RLI;
5472 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
5473
5474 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
5475 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
5476 RLI.Ranges);
5477 }
5478
5479 // We're trying to insert a regular store, S, and then a load, L. If the
5480 // incoming value, O, is a load, we might just be able to have our load use the
5481 // address used by O. However, we don't know if anything else will store to
5482 // that address before we can load from it. To prevent this situation, we need
5483 // to insert our load, L, into the chain as a peer of O. To do this, we give L
5484 // the same chain operand as O, we create a token factor from the chain results
5485 // of O and L, and we replace all uses of O's chain result with that token
5486 // factor (see spliceIntoChain below for this last part).
5487 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
5488 ReuseLoadInfo &RLI,
5489 SelectionDAG &DAG) const {
5490 SDLoc dl(Op);
5491 if ((Op.getOpcode() == ISD::FP_TO_UINT ||
5492 Op.getOpcode() == ISD::FP_TO_SINT) &&
5493 isOperationLegalOrCustom(Op.getOpcode(),
5494 Op.getOperand(0).getValueType())) {
5495
5496 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
5497 return true;
5498 }
5499
5500 LoadSDNode *LD = dyn_cast(Op);
5501 if (!LD || !ISD::isNON_EXTLoad(LD) || LD->isVolatile() || LD->isNonTemporal())
5502 return false;
5503 if (LD->getMemoryVT() != MemVT)
5504 return false;
5505
5506 RLI.Ptr = LD->getBasePtr();
5507 if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) {
5508 assert(LD->getAddressingMode() == ISD::PRE_INC &&
5509 "Non-pre-inc AM on PPC?");
5510 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
5511 LD->getOffset());
5512 }
5513
5514 RLI.Chain = LD->getChain();
5515 RLI.MPI = LD->getPointerInfo();
5516 RLI.IsInvariant = LD->isInvariant();
5517 RLI.Alignment = LD->getAlignment();
5518 RLI.AAInfo = LD->getAAInfo();
5519 RLI.Ranges = LD->getRanges();
5520
5521 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
5522 return true;
5523 }
5524
5525 // Given the head of the old chain, ResChain, insert a token factor containing
5526 // it and NewResChain, and make users of ResChain now be users of that token
5527 // factor.
5528 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
5529 SDValue NewResChain,
5530 SelectionDAG &DAG) const {
5531 if (!ResChain)
5532 return;
5533
5534 SDLoc dl(NewResChain);
5535
5536 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
5537 NewResChain, DAG.getUNDEF(MVT::Other));
5538 assert(TF.getNode() != NewResChain.getNode() &&
5539 "A new TF really is required here");
5540
5541 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
5542 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
54665543 }
54675544
54685545 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
5469 SelectionDAG &DAG) const {
5546 SelectionDAG &DAG) const {
54705547 SDLoc dl(Op);
54715548 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
54725549 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
55385615 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
55395616 }
55405617
5541 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
5618 ReuseLoadInfo RLI;
5619 SDValue Bits;
5620
5621 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
5622 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
5623 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
5624 RLI.Ranges);
5625 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
5626 } else
5627 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
5628
55425629 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
55435630
55445631 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
55595646
55605647 SDValue Ld;
55615648 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
5562 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
5563 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
5564
5565 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
5566 MachinePointerInfo::getFixedStack(FrameIdx),
5567 false, false, 0);
5568
5569 assert(cast(Store)->getMemoryVT() == MVT::i32 &&
5570 "Expected an i32 store");
5649 ReuseLoadInfo RLI;
5650 bool ReusingLoad;
5651 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
5652 DAG))) {
5653 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
5654 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
5655
5656 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
5657 MachinePointerInfo::getFixedStack(FrameIdx),
5658 false, false, 0);
5659
5660 assert(cast(Store)->getMemoryVT() == MVT::i32 &&
5661 "Expected an i32 store");
5662
5663 RLI.Ptr = FIdx;
5664 RLI.Chain = Store;
5665 RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
5666 RLI.Alignment = 4;
5667 }
5668
55715669 MachineMemOperand *MMO =
5572 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
5573 MachineMemOperand::MOLoad, 4, 4);
5574 SDValue Ops[] = { Store, FIdx };
5670 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
5671 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
5672 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
55755673 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
55765674 PPCISD::LFIWZX : PPCISD::LFIWAX,
55775675 dl, DAG.getVTList(MVT::f64, MVT::Other),
55785676 Ops, MVT::i32, MMO);
5677 if (ReusingLoad)
5678 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
55795679 } else {
55805680 assert(Subtarget.isPPC64() &&
55815681 "i32->FP without LFIWAX supported only on PPC64");
64886588 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
64896589 case ISD::FP_TO_UINT:
64906590 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
6491 SDLoc(Op));
6591 SDLoc(Op));
64926592 case ISD::UINT_TO_FP:
64936593 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
64946594 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
590590 }
591591
592592 private:
593
594 struct ReuseLoadInfo {
595 SDValue Ptr;
596 SDValue Chain;
597 SDValue ResChain;
598 MachinePointerInfo MPI;
599 bool IsInvariant;
600 unsigned Alignment;
601 AAMDNodes AAInfo;
602 const MDNode *Ranges;
603
604 ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {}
605 };
606
607 bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
608 SelectionDAG &DAG) const;
609 void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
610 SelectionDAG &DAG) const;
611
612 void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
613 SelectionDAG &DAG, SDLoc dl) const;
614
593615 SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
594616 SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
595617
298298
299299 We could also strength reduce the rem and the div:
300300 http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf
301
302 ===-------------------------------------------------------------------------===
303
304 float foo(float X) { return (int)(X); }
305
306 Currently produces:
307
308 _foo:
309 fctiwz f0, f1
310 stfd f0, -8(r1)
311 lwz r2, -4(r1)
312 extsw r2, r2
313 std r2, -16(r1)
314 lfd f0, -16(r1)
315 fcfid f0, f0
316 frsp f1, f0
317 blr
318
319 We could use a target dag combine to turn the lwz/extsw into an lwa when the
320 lwz has a single use. Since LWA is cracked anyway, this would be a codesize
321 win only.
322301
323302 ===-------------------------------------------------------------------------===
324303
0 ; RUN: llc -mcpu=a2 < %s | FileCheck %s
1 target datalayout = "E-m:e-i64:64-n32:64"
2 target triple = "powerpc64-unknown-linux-gnu"
3
4 ; Function Attrs: nounwind readonly
5 define double @test1(i64* nocapture readonly %x) #0 {
6 entry:
7 %0 = load i64* %x, align 8
8 %conv = sitofp i64 %0 to double
9 ret double %conv
10
11 ; CHECK-LABEL: @test1
12 ; CHECK: lfd [[REG1:[0-9]+]], 0(3)
13 ; CHECK: fcfid 1, [[REG1]]
14 ; CHECK: blr
15 }
16
17 ; Function Attrs: nounwind readonly
18 define double @test2(i32* nocapture readonly %x) #0 {
19 entry:
20 %0 = load i32* %x, align 4
21 %conv = sitofp i32 %0 to double
22 ret double %conv
23
24 ; CHECK-LABEL: @test2
25 ; CHECK: lfiwax [[REG1:[0-9]+]], 0, 3
26 ; CHECK: fcfid 1, [[REG1]]
27 ; CHECK: blr
28 }
29
30 ; Function Attrs: nounwind readnone
31 define float @foo(float %X) #0 {
32 entry:
33 %conv = fptosi float %X to i32
34 %conv1 = sitofp i32 %conv to float
35 ret float %conv1
36
37 ; CHECK-LABEL: @foo
38 ; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1
39 ; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
40 ; CHECK: stfiwx [[REG2]], 0, [[REG1]]
41 ; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]]
42 ; CHECK: fcfids 1, [[REG3]]
43 ; CHECK: blr
44 }
45
46 ; Function Attrs: nounwind readnone
47 define double @food(double %X) #0 {
48 entry:
49 %conv = fptosi double %X to i32
50 %conv1 = sitofp i32 %conv to double
51 ret double %conv1
52
53 ; CHECK-LABEL: @food
54 ; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1
55 ; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
56 ; CHECK: stfiwx [[REG2]], 0, [[REG1]]
57 ; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]]
58 ; CHECK: fcfid 1, [[REG3]]
59 ; CHECK: blr
60 }
61
62 ; Function Attrs: nounwind readnone
63 define float @foou(float %X) #0 {
64 entry:
65 %conv = fptoui float %X to i32
66 %conv1 = uitofp i32 %conv to float
67 ret float %conv1
68
69 ; CHECK-LABEL: @foou
70 ; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1
71 ; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
72 ; CHECK: stfiwx [[REG2]], 0, [[REG1]]
73 ; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]]
74 ; CHECK: fcfidus 1, [[REG3]]
75 ; CHECK: blr
76 }
77
78 ; Function Attrs: nounwind readnone
79 define double @fooud(double %X) #0 {
80 entry:
81 %conv = fptoui double %X to i32
82 %conv1 = uitofp i32 %conv to double
83 ret double %conv1
84
85 ; CHECK-LABEL: @fooud
86 ; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1
87 ; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
88 ; CHECK: stfiwx [[REG2]], 0, [[REG1]]
89 ; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]]
90 ; CHECK: fcfidu 1, [[REG3]]
91 ; CHECK: blr
92 }
93
94 attributes #0 = { nounwind readonly }
95