llvm.org GIT mirror llvm / 536e667
On x86, if the only use of a i64 load is a i64 store, generate a pair of double load and store instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@66776 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 10 years ago
3 changed file(s) with 87 addition(s) and 55 deletion(s). Raw diff Collapse all Expand all
82848284
82858285 /// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
82868286 static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
8287 const X86Subtarget *Subtarget) {
8287 const X86Subtarget *Subtarget) {
82888288 // Turn load->store of MMX types into GPR load/stores. This avoids clobbering
82898289 // the FP state in cases where an emms may be missing.
82908290 // A preferable solution to the general problem is to figure out the right
82918291 // places to insert EMMS. This qualifies as a quick hack.
8292
8293 // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
82928294 StoreSDNode *St = cast(N);
8293 if (St->getValue().getValueType().isVector() &&
8294 St->getValue().getValueType().getSizeInBits() == 64 &&
8295 MVT VT = St->getValue().getValueType();
8296 if (VT.getSizeInBits() != 64)
8297 return SDValue();
8298
8299 bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2();
8300 if ((VT.isVector() ||
8301 (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
82958302 isa(St->getValue()) &&
82968303 !cast(St->getValue())->isVolatile() &&
82978304 St->getChain().hasOneUse() && !St->isVolatile()) {
83158322 Ops.push_back(ChainVal->getOperand(i));
83168323 }
83178324 }
8318 if (Ld) {
8319 DebugLoc DL = N->getDebugLoc();
8320 // If we are a 64-bit capable x86, lower to a single movq load/store pair.
8321 if (Subtarget->is64Bit()) {
8322 SDValue NewLd = DAG.getLoad(MVT::i64, DL, Ld->getChain(),
8323 Ld->getBasePtr(), Ld->getSrcValue(),
8324 Ld->getSrcValueOffset(), Ld->isVolatile(),
8325 Ld->getAlignment());
8326 SDValue NewChain = NewLd.getValue(1);
8327 if (TokenFactorIndex != -1) {
8328 Ops.push_back(NewChain);
8329 NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Ops[0],
8330 Ops.size());
8331 }
8332 return DAG.getStore(NewChain, DL, NewLd, St->getBasePtr(),
8333 St->getSrcValue(), St->getSrcValueOffset(),
8334 St->isVolatile(), St->getAlignment());
8335 }
8336
8337 // Otherwise, lower to two 32-bit copies.
8338 SDValue LoAddr = Ld->getBasePtr();
8339 SDValue HiAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, LoAddr,
8340 DAG.getConstant(4, MVT::i32));
8341
8342 SDValue LoLd = DAG.getLoad(MVT::i32, DL, Ld->getChain(), LoAddr,
8343 Ld->getSrcValue(), Ld->getSrcValueOffset(),
8344 Ld->isVolatile(), Ld->getAlignment());
8345 SDValue HiLd = DAG.getLoad(MVT::i32, DL, Ld->getChain(), HiAddr,
8346 Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
8347 Ld->isVolatile(),
8348 MinAlign(Ld->getAlignment(), 4));
8349
8350 SDValue NewChain = LoLd.getValue(1);
8325
8326 if (!Ld || !ISD::isNormalLoad(Ld))
8327 return SDValue();
8328
8329 // If this is not the MMX case, i.e. we are just turning i64 load/store
8330 // into f64 load/store, avoid the transformation if there are multiple
8331 // uses of the loaded value.
8332 if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
8333 return SDValue();
8334
8335 DebugLoc LdDL = Ld->getDebugLoc();
8336 DebugLoc StDL = N->getDebugLoc();
8337 // If we are a 64-bit capable x86, lower to a single movq load/store pair.
8338 // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
8339 // pair instead.
8340 if (Subtarget->is64Bit() || F64IsLegal) {
8341 MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
8342 SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
8343 Ld->getBasePtr(), Ld->getSrcValue(),
8344 Ld->getSrcValueOffset(), Ld->isVolatile(),
8345 Ld->getAlignment());
8346 SDValue NewChain = NewLd.getValue(1);
83518347 if (TokenFactorIndex != -1) {
8352 Ops.push_back(LoLd);
8353 Ops.push_back(HiLd);
8354 NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Ops[0],
8348 Ops.push_back(NewChain);
8349 NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
83558350 Ops.size());
83568351 }
8357
8358 LoAddr = St->getBasePtr();
8359 HiAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, LoAddr,
8360 DAG.getConstant(4, MVT::i32));
8361
8362 SDValue LoSt = DAG.getStore(NewChain, DL, LoLd, LoAddr,
8352 return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
83638353 St->getSrcValue(), St->getSrcValueOffset(),
83648354 St->isVolatile(), St->getAlignment());
8365 SDValue HiSt = DAG.getStore(NewChain, DL, HiLd, HiAddr,
8366 St->getSrcValue(),
8367 St->getSrcValueOffset() + 4,
8368 St->isVolatile(),
8369 MinAlign(St->getAlignment(), 4));
8370 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoSt, HiSt);
8371 }
8355 }
8356
8357 // Otherwise, lower to two pairs of 32-bit loads / stores.
8358 SDValue LoAddr = Ld->getBasePtr();
8359 SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
8360 DAG.getConstant(4, MVT::i32));
8361
8362 SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
8363 Ld->getSrcValue(), Ld->getSrcValueOffset(),
8364 Ld->isVolatile(), Ld->getAlignment());
8365 SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
8366 Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
8367 Ld->isVolatile(),
8368 MinAlign(Ld->getAlignment(), 4));
8369
8370 SDValue NewChain = LoLd.getValue(1);
8371 if (TokenFactorIndex != -1) {
8372 Ops.push_back(LoLd);
8373 Ops.push_back(HiLd);
8374 NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
8375 Ops.size());
8376 }
8377
8378 LoAddr = St->getBasePtr();
8379 HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
8380 DAG.getConstant(4, MVT::i32));
8381
8382 SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
8383 St->getSrcValue(), St->getSrcValueOffset(),
8384 St->isVolatile(), St->getAlignment());
8385 SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
8386 St->getSrcValue(),
8387 St->getSrcValueOffset() + 4,
8388 St->isVolatile(),
8389 MinAlign(St->getAlignment(), 4));
8390 return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
83728391 }
83738392 return SDValue();
83748393 }
0 ; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq.*(%rsi), %rax}
1 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movsd.*(%eax),}
2
3 ; Uses movsd to load / store i64 values in sse2 is available.
4
5 ; rdar://6659858
6
7 define void @foo(i64* %x, i64* %y) nounwind {
8 entry:
9 %tmp1 = load i64* %y, align 8 ; [#uses=1]
10 store i64 %tmp1, i64* %x, align 8
11 ret void
12 }
None ; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq.*(%rsi), %rax}
1 ; RUN: llvm-as < %s | llc -march=x86 | grep {movl.*4(%eax),}
0 ; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq.*(%rsi), %rax}
1 ; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2 | grep {movl.*4(%eax),}
2 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movsd.(%eax),}
23
34 ; This test should use GPRs to copy the mmx value, not MMX regs. Using mmx regs,
45 ; increases the places that need to use emms.
56
67 ; rdar://5741668
7 target triple = "x86_64-apple-darwin8"
88
99 define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind {
1010 entry: