llvm.org GIT mirror llvm / 112dedc
avoid going through a stack slot to convert from fpstack to xmm reg if we are just going to store it back anyway. This improves things like: double foo(); void bar(double *P) { *P = foo(); } git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45399 91177308-0d34-0410-b5e6-96231b3b80d8 Chris Lattner 11 years ago
3 changed file(s) with 54 addition(s) and 29 deletion(s). Raw diff Collapse all Expand all
16351635 This would result in smaller code and more efficient microops.
16361636
16371637 //===---------------------------------------------------------------------===//
1638
1639 We should be smarter about conversion from fpstack to XMM regs.
1640
1641 double foo();
1642 void bar(double *P) { *P = foo(); }
1643
1644 We compile that to:
1645
1646 _bar:
1647 subl $12, %esp
1648 call L_foo$stub
1649 fstpl (%esp)
1650 movl 16(%esp), %eax
1651 movsd (%esp), %xmm0
1652 movsd %xmm0, (%eax)
1653 addl $12, %esp
1654 ret
1655
1656 for example. The magic to/from the stack is unneeded.
1657
1658 //===---------------------------------------------------------------------===//
3232 #include "llvm/CodeGen/SelectionDAG.h"
3333 #include "llvm/CodeGen/SSARegMap.h"
3434 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Support/CommandLine.h"
3635 #include "llvm/Support/Debug.h"
3736 #include "llvm/Target/TargetOptions.h"
3837 #include "llvm/ADT/SmallSet.h"
811810 CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
812811 CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
813812
814
815813 SmallVector ResultVals;
816814
817815 // Copy all of the result registers out of their specified physreg.
837835 // an XMM register.
838836 if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) ||
839837 (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) {
838 SDOperand StoreLoc;
839 const Value *SrcVal = 0;
840 int SrcValOffset = 0;
841
842 // Determine where to store the value. If the call result is directly
843 // used by a store, see if we can store directly into the location. In
844 // this case, we'll end up producing a fst + movss[load] + movss[store] to
845 // the same location, and the two movss's will be nuked as dead. This
846 // optimizes common things like "*D = atof(..)" to not need an
847 // intermediate stack slot.
848 if (SDOperand(TheCall, 0).hasOneUse() &&
849 SDOperand(TheCall, 1).hasOneUse()) {
850 // Ok, we have one use of the value and one use of the chain. See if
851 // they are the same node: a store.
852 if (StoreSDNode *N = dyn_cast(*TheCall->use_begin())) {
853 if (N->getChain().Val == TheCall && N->getValue().Val == TheCall &&
854 !N->isVolatile() && !N->isTruncatingStore() &&
855 N->getAddressingMode() == ISD::UNINDEXED) {
856 StoreLoc = N->getBasePtr();
857 SrcVal = N->getSrcValue();
858 SrcValOffset = N->getSrcValueOffset();
859 }
860 }
861 }
862
863 // If we weren't able to optimize the result, just create a temporary
864 // stack slot.
865 if (StoreLoc.Val == 0) {
866 MachineFunction &MF = DAG.getMachineFunction();
867 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
868 StoreLoc = DAG.getFrameIndex(SSFI, getPointerTy());
869 }
870
840871 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
841872 // shouldn't be necessary except that RFP cannot be live across
842 // multiple blocks. When stackifier is fixed, they can be uncoupled.
843 MachineFunction &MF = DAG.getMachineFunction();
844 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
845 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
873 // multiple blocks (which could happen if a select gets lowered into
874 // multiple blocks and scheduled in between them). When stackifier is
875 // fixed, they can be uncoupled.
846876 SDOperand Ops[] = {
847 Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
877 Chain, RetVal, StoreLoc, DAG.getValueType(RVLocs[0].getValVT()), InFlag
848878 };
849879 Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
850 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
880 RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain,
881 StoreLoc, SrcVal, SrcValOffset);
851882 Chain = RetVal.getValue(1);
852883 }
853884 ResultVals.push_back(RetVal);
0 ; RUN: llvm-as < %s | llc | not grep movss
1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
2 target triple = "i686-apple-darwin8"
3
4 ; This should store directly into P from the FP stack. It should not
5 ; go through a stack slot to get there.
6
7 define void @bar(double* %P) {
8 entry:
9 %tmp = tail call double (...)* @foo( ) ; [#uses=1]
10 store double %tmp, double* %P, align 8
11 ret void
12 }
13
14 declare double @foo(...)