llvm.org GIT mirror llvm / d880b97
Handle a few more cases of folding load i64 into xmm and zero top bits. Note, some of the code will be moved into target independent part of DAG combiner in a subsequent patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50918 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 12 years ago
7 changed file(s) with 110 addition(s) and 29 deletion(s). Raw diff Collapse all Expand all
974974
975975 // Also handle the case where we explicitly require zeros in the top
976976 // elements. This is a vector shuffle from the zero vector.
977 if (N.getOpcode() == X86ISD::ZEXT_VMOVL && N.Val->hasOneUse() &&
977 if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.Val->hasOneUse() &&
978978 // Check to see if the top elements are all zeros (or bitcast of zeros).
979979 N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
980980 N.getOperand(0).Val->hasOneUse() &&
714714
715715 // We have target-specific dag combine patterns for the following nodes:
716716 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
717 setTargetDAGCombine(ISD::BUILD_VECTOR);
717718 setTargetDAGCombine(ISD::SELECT);
718719 setTargetDAGCombine(ISD::STORE);
719720
34803481 &MaskVec[0], MaskVec.size()));
34813482 }
34823483
3483 /// getZextVMoveL - Return a zero-extending vector move low node.
3484 /// getVZextMovL - Return a zero-extending vector move low node.
34843485 ///
3485 static SDOperand getZextVMoveL(MVT::ValueType VT, MVT::ValueType OpVT,
3486 static SDOperand getVZextMovL(MVT::ValueType VT, MVT::ValueType OpVT,
34863487 SDOperand SrcOp, SelectionDAG &DAG,
34873488 const X86Subtarget *Subtarget) {
34883489 if (VT == MVT::v2f64 || VT == MVT::v4f32) {
35003501 // PR2108
35013502 OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
35023503 return DAG.getNode(ISD::BIT_CONVERT, VT,
3503 DAG.getNode(X86ISD::ZEXT_VMOVL, OpVT,
3504 DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
35043505 DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT,
35053506 SrcOp.getOperand(0).getOperand(0))));
35063507 }
35083509 }
35093510
35103511 return DAG.getNode(ISD::BIT_CONVERT, VT,
3511 DAG.getNode(X86ISD::ZEXT_VMOVL, OpVT,
3512 DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
35123513 DAG.getNode(ISD::BIT_CONVERT, OpVT, SrcOp)));
35133514 }
35143515
35603561 SDOperand NewMask = NewOp.getOperand(2);
35613562 if (isCommutedMOVL(NewMask.Val, true, false)) {
35623563 NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
3563 return getZextVMoveL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget);
3564 return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget);
35643565 }
35653566 }
35663567 } else if (ISD::isBuildVectorAllZeros(V1.Val)) {
35673568 SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
35683569 DAG, *this);
35693570 if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
3570 return getZextVMoveL(VT, NewOp.getValueType(), NewOp.getOperand(1),
3571 return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
35713572 DAG, Subtarget);
35723573 }
35733574 }
35763577 if (V1IsUndef)
35773578 return V2;
35783579 if (ISD::isBuildVectorAllZeros(V1.Val))
3579 return getZextVMoveL(VT, VT, V2, DAG, Subtarget);
3580 return getVZextMovL(VT, VT, V2, DAG, Subtarget);
35803581 return Op;
35813582 }
35823583
56745675 case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
56755676 case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
56765677 case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
5677 case X86ISD::ZEXT_VMOVL: return "X86ISD::ZEXT_VMOVL";
5678 case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
5679 case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
56785680 }
56795681 }
56805682
63016303 LD->getAlignment());
63026304 }
63036305
6306 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
6307 SDOperand Elt = N->getOperand(i);
6308 if (Elt.getOpcode() != ISD::MERGE_VALUES)
6309 return Elt.Val;
6310 return Elt.getOperand(Elt.ResNo).Val;
6311 }
6312
6313 static SDOperand PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
6314 const X86Subtarget *Subtarget) {
6315 // Ignore single operand BUILD_VECTOR.
6316 if (N->getNumOperands() == 1)
6317 return SDOperand();
6318
6319 MVT::ValueType VT = N->getValueType(0);
6320 MVT::ValueType EVT = MVT::getVectorElementType(VT);
6321 if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
6322 // We are looking for load i64 and zero extend. We want to transform
6323 // it before legalizer has a chance to expand it. Also look for i64
6324 // BUILD_PAIR bit casted to f64.
6325 return SDOperand();
6326 // This must be an insertion into a zero vector.
6327 SDOperand HighElt = N->getOperand(1);
6328 if (HighElt.getOpcode() != ISD::UNDEF &&
6329 !isZeroNode(HighElt))
6330 return SDOperand();
6331
6332 // Value must be a load.
6333 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
6334 SDNode *Base = N->getOperand(0).Val;
6335 if (!isa(Base)) {
6336 if (Base->getOpcode() == ISD::BIT_CONVERT)
6337 Base = Base->getOperand(0).Val;
6338 if (Base->getOpcode() != ISD::BUILD_PAIR)
6339 return SDOperand();
6340 SDNode *Pair = Base;
6341 Base = getBuildPairElt(Pair, 0);
6342 if (!ISD::isNON_EXTLoad(Base))
6343 return SDOperand();
6344 SDNode *NextLD = getBuildPairElt(Pair, 1);
6345 if (!ISD::isNON_EXTLoad(NextLD) ||
6346 !isConsecutiveLoad(NextLD, Base, 1, 4/*32 bits*/, MFI))
6347 return SDOperand();
6348 }
6349 LoadSDNode *LD = cast(Base);
6350
6351 // Transform it into VZEXT_LOAD addr.
6352 return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr());
6353 }
6354
63046355 /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
63056356 static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
63066357 const X86Subtarget *Subtarget) {
64976548 switch (N->getOpcode()) {
64986549 default: break;
64996550 case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, Subtarget);
6551 case ISD::BUILD_VECTOR: return PerformBuildVectorCombine(N, DAG, Subtarget);
65006552 case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
65016553 case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
65026554 case X86ISD::FXOR:
200200 // FNSTCW16m - Store FP control world into i16 memory.
201201 FNSTCW16m,
202202
203 // ZEXT_VMOVL - Vector move low and zero extend.
204 ZEXT_VMOVL
203 // VZEXT_MOVL - Vector move low and zero extend.
204 VZEXT_MOVL,
205
206 // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
207 VZEXT_LOAD
205208 };
206209 }
207210
200200 def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
201201 "movd\t{$src, $dst|$dst, $src}",
202202 [(set VR64:$dst,
203 (v2i32 (X86zvmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
203 (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>;
204204 let AddedComplexity = 20 in
205205 def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
206206 "movd\t{$src, $dst|$dst, $src}",
207207 [(set VR64:$dst,
208 (v2i32 (X86zvmovl (v2i32
208 (v2i32 (X86vzmovl (v2i32
209209 (scalar_to_vector (loadi32 addr:$src))))))]>;
210210
211211 // Arithmetic Instructions
559559 // Move scalar to XMM zero-extended
560560 // movd to XMM register zero-extends
561561 let AddedComplexity = 15 in {
562 def : Pat<(v8i8 (X86zvmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
562 def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
563563 (MMX_MOVZDI2PDIrr GR32:$src)>;
564 def : Pat<(v4i16 (X86zvmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
564 def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))),
565565 (MMX_MOVZDI2PDIrr GR32:$src)>;
566566 }
567567
4646 def X86insrtps : SDNode<"X86ISD::INSERTPS",
4747 SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
4848 SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>;
49 def X86zvmovl : SDNode<"X86ISD::ZEXT_VMOVL", SDTUnaryOp>;
49 def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
50 SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
51 def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
52 [SDNPHasChain, SDNPMayLoad]>;
5053
5154 //===----------------------------------------------------------------------===//
5255 // SSE Complex Patterns
10071010 let AddedComplexity = 20 in
10081011 def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
10091012 "movss\t{$src, $dst|$dst, $src}",
1010 [(set VR128:$dst, (v4f32 (X86zvmovl (v4f32 (scalar_to_vector
1013 [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
10111014 (loadf32 addr:$src))))))]>;
10121015
1013 def : Pat<(v4f32 (X86zvmovl (memopv4f32 addr:$src))),
1016 def : Pat<(v4f32 (X86vzmovl (memopv4f32 addr:$src))),
10141017 (MOVZSS2PSrm addr:$src)>;
10151018
10161019 //===----------------------------------------------------------------------===//
22652268 def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
22662269 "movsd\t{$src, $dst|$dst, $src}",
22672270 [(set VR128:$dst,
2268 (v2f64 (X86zvmovl (v2f64 (scalar_to_vector
2271 (v2f64 (X86vzmovl (v2f64 (scalar_to_vector
22692272 (loadf64 addr:$src))))))]>;
22702273
2271 def : Pat<(v2f64 (X86zvmovl (memopv2f64 addr:$src))),
2274 def : Pat<(v2f64 (X86vzmovl (memopv2f64 addr:$src))),
22722275 (MOVZSD2PDrm addr:$src)>;
2276 def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
22732277
22742278 // movd / movq to XMM register zero-extends
22752279 let AddedComplexity = 15 in {
22762280 def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
22772281 "movd\t{$src, $dst|$dst, $src}",
2278 [(set VR128:$dst, (v4i32 (X86zvmovl
2282 [(set VR128:$dst, (v4i32 (X86vzmovl
22792283 (v4i32 (scalar_to_vector GR32:$src)))))]>;
22802284 // This is X86-64 only.
22812285 def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
22822286 "mov{d|q}\t{$src, $dst|$dst, $src}",
2283 [(set VR128:$dst, (v2i64 (X86zvmovl
2287 [(set VR128:$dst, (v2i64 (X86vzmovl
22842288 (v2i64 (scalar_to_vector GR64:$src)))))]>;
22852289 }
22862290
22882292 def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
22892293 "movd\t{$src, $dst|$dst, $src}",
22902294 [(set VR128:$dst,
2291 (v4i32 (X86zvmovl (v4i32 (scalar_to_vector
2295 (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
22922296 (loadi32 addr:$src))))))]>;
22932297 def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
22942298 "movq\t{$src, $dst|$dst, $src}",
22952299 [(set VR128:$dst,
2296 (v2i64 (X86zvmovl (v2i64 (scalar_to_vector
2300 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
22972301 (loadi64 addr:$src))))))]>, XS,
22982302 Requires<[HasSSE2]>;
22992303 }
2304
2305 def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
23002306
23012307 // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
23022308 // IA32 document. movq xmm1, xmm2 does clear the high bits.
23032309 let AddedComplexity = 15 in
23042310 def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
23052311 "movq\t{$src, $dst|$dst, $src}",
2306 [(set VR128:$dst, (v2i64 (X86zvmovl (v2i64 VR128:$src))))]>,
2312 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
23072313 XS, Requires<[HasSSE2]>;
23082314
23092315 let AddedComplexity = 20 in
23102316 def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
23112317 "movq\t{$src, $dst|$dst, $src}",
2312 [(set VR128:$dst, (v2i64 (X86zvmovl
2318 [(set VR128:$dst, (v2i64 (X86vzmovl
23132319 (memopv2i64 addr:$src))))]>,
23142320 XS, Requires<[HasSSE2]>;
23152321
27572763 // movd to XMM register zero-extends
27582764 let AddedComplexity = 15 in {
27592765 // Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
2760 def : Pat<(v2f64 (X86zvmovl (v2f64 (scalar_to_vector FR64:$src)))),
2766 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
27612767 (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
2762 def : Pat<(v4f32 (X86zvmovl (v4f32 (scalar_to_vector FR32:$src)))),
2768 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
27632769 (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
27642770 }
27652771
29152921 def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc, VR128:$src,
29162922 MOVL_shuffle_mask)),
29172923 (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
2918 def : Pat<(v2f64 (X86zvmovl (v2f64 VR128:$src))),
2924 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
29192925 (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
29202926
29212927 // FIXME: Temporary workaround since 2-wide shuffle is broken.
0 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
1 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 1
12 ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+sse2 | grep movd
23
34 define <2 x i64> @t1(i64 %x) nounwind {
0 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movq
1 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movsd
2 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mov | count 3
3
4 define <2 x i64> @t1(<2 x i64>* %ptr) nounwind {
5 %tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
6 %tmp615 = load <2 x i32>* %tmp45
7 %tmp7 = bitcast <2 x i32> %tmp615 to i64
8 %tmp8 = insertelement <2 x i64> zeroinitializer, i64 %tmp7, i32 0
9 ret <2 x i64> %tmp8
10 }
11
12 define <2 x i64> @t2(i64 %x) nounwind {
13 %tmp717 = bitcast i64 %x to double
14 %tmp8 = insertelement <2 x double> undef, double %tmp717, i32 0
15 %tmp9 = insertelement <2 x double> %tmp8, double 0.000000e+00, i32 1
16 %tmp11 = bitcast <2 x double> %tmp9 to <2 x i64>
17 ret <2 x i64> %tmp11
18 }