llvm.org GIT mirror llvm / dcabc7b
Optimization for SIGN_EXTEND operation on AVX. Special handling was added for v4i32 -> v4i64 and v8i16 -> v8i32 extensions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@149600 91177308-0d34-0410-b5e6-96231b3b80d8 Elena Demikhovsky 8 years ago
5 changed file(s) with 84 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
12201220 setTargetDAGCombine(ISD::LOAD);
12211221 setTargetDAGCombine(ISD::STORE);
12221222 setTargetDAGCombine(ISD::ZERO_EXTEND);
1223 setTargetDAGCombine(ISD::SIGN_EXTEND);
12231224 setTargetDAGCombine(ISD::TRUNCATE);
12241225 setTargetDAGCombine(ISD::SINT_TO_FP);
12251226 if (Subtarget->is64Bit())
1464014641 return SDValue();
1464114642 }
1464214643
14644 static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
14645 TargetLowering::DAGCombinerInfo &DCI,
14646 const X86Subtarget *Subtarget) {
14647 if (!DCI.isBeforeLegalizeOps())
14648 return SDValue();
14649
14650 if (!Subtarget->hasAVX()) return SDValue();
14651
14652 // Optimize vectors in AVX mode
14653 // Sign extend v8i16 to v8i32 and
14654 // v4i32 to v4i64
14655 //
14656 // Divide input vector into two parts
14657 // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
14658 // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
14659 // concat the vectors to original VT
14660
14661 EVT VT = N->getValueType(0);
14662 SDValue Op = N->getOperand(0);
14663 EVT OpVT = Op.getValueType();
14664 DebugLoc dl = N->getDebugLoc();
14665
14666 if (((VT == MVT::v4i64) && (OpVT == MVT::v4i32)) ||
14667 ((VT == MVT::v8i32) && (OpVT == MVT::v8i16))) {
14668
14669 unsigned NumElems = OpVT.getVectorNumElements();
14670 SmallVector ShufMask1(NumElems, -1);
14671 for (unsigned i=0; i< NumElems/2; i++) ShufMask1[i] = i;
14672
14673 SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
14674 ShufMask1.data());
14675
14676 SmallVector ShufMask2(NumElems, -1);
14677 for (unsigned i=0; i< NumElems/2; i++) ShufMask2[i] = i+NumElems/2;
14678
14679 SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
14680 ShufMask2.data());
14681
14682 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
14683 VT.getVectorNumElements()/2);
14684
14685 OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo);
14686 OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
14687
14688 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
14689 }
14690 return SDValue();
14691 }
14692
1464314693 static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
1464414694 const X86Subtarget *Subtarget) {
1464514695 // (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
1488514935 case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
1488614936 case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
1488714937 case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, Subtarget);
14938 case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
1488814939 case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI);
1488914940 case X86ISD::SETCC: return PerformSETCCCombine(N, DAG);
1489014941 case X86ISD::SHUFP: // Handle all target specific shuffles
217217
218218 // VZEXT_MOVL - Vector move low and zero extend.
219219 VZEXT_MOVL,
220
221 // VZEXT_MOVL - Vector move low and sign extend.
222 VSEXT_MOVL,
220223
221224 // VSHL, VSRL - 128-bit vector logical left / right shift
222225 VSHLDQ, VSRLDQ,
7070 SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
7171 def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
7272 SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
73 def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
74 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
75
7376 def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
7477 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
7578 def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
54755475 (PMOVZXDQrm addr:$src)>;
54765476 def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
54775477 (PMOVZXDQrm addr:$src)>;
5478 }
5479
5480 let Predicates = [HasAVX] in {
5481 def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
5482 def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
5483 }
5484
5485 let Predicates = [HasSSE41] in {
5486 def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
5487 def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
54785488 }
54795489
54805490
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
1
2 define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
3 ;CHECK: sext_8i16_to_8i32
4 ;CHECK: vpmovsxwd
5
6 %B = sext <8 x i16> %A to <8 x i32>
7 ret <8 x i32>%B
8 }
9
10 define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
11 ;CHECK: sext_4i32_to_4i64
12 ;CHECK: vpmovsxdq
13
14 %B = sext <4 x i32> %A to <4 x i64>
15 ret <4 x i64>%B
16 }