llvm.org GIT mirror llvm / ba05c91
Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148337 91177308-0d34-0410-b5e6-96231b3b80d8 Nadav Rotem 8 years ago
4 changed file(s) with 67 addition(s) and 10 deletion(s). Raw diff Collapse all Expand all
69816981 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
69826982 // (vextract (scalar_to_vector val, 0) -> val
69836983 SDValue InVec = N->getOperand(0);
6984 EVT VT = InVec.getValueType();
6985 EVT NVT = N->getValueType(0);
69846986
69856987 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
69866988 // Check if the result type doesn't match the inserted element type. A
69876989 // SCALAR_TO_VECTOR may truncate the inserted element and the
69886990 // EXTRACT_VECTOR_ELT may widen the extracted vector.
69896991 SDValue InOp = InVec.getOperand(0);
6990 EVT NVT = N->getValueType(0);
69916992 if (InOp.getValueType() != NVT) {
69926993 assert(InOp.getValueType().isInteger() && NVT.isInteger());
69936994 return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
69956996 return InOp;
69966997 }
69976998
6999 SDValue EltNo = N->getOperand(1);
7000 bool ConstEltNo = isa(EltNo);
7001
7002 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
7003 // We only perform this optimization before the op legalization phase because
7004 // we may introduce new vector instructions which are not backed by TD patterns.
7005 // For example on AVX, extracting elements from a wide vector without using
7006 // extract_subvector.
7007 if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
7008 && ConstEltNo && !LegalOperations) {
7009 int Elt = cast(EltNo)->getZExtValue();
7010 int NumElem = VT.getVectorNumElements();
7011 ShuffleVectorSDNode *SVOp = cast(InVec);
7012 // Find the new index to extract from.
7013 int OrigElt = SVOp->getMaskElt(Elt);
7014
7015 // Extracting an undef index is undef.
7016 if (OrigElt == -1)
7017 return DAG.getUNDEF(NVT);
7018
7019 // Select the right vector half to extract from.
7020 if (OrigElt < NumElem) {
7021 InVec = InVec->getOperand(0);
7022 } else {
7023 InVec = InVec->getOperand(1);
7024 OrigElt -= NumElem;
7025 }
7026
7027 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
7028 InVec, DAG.getConstant(OrigElt, MVT::i32));
7029 }
7030
69987031 // Perform only after legalization to ensure build_vector / vector_shuffle
69997032 // optimizations have already been done.
70007033 if (!LegalOperations) return SDValue();
70027035 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
70037036 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
70047037 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
7005 SDValue EltNo = N->getOperand(1);
7006
7007 if (isa(EltNo)) {
7038
7039 if (ConstEltNo) {
70087040 int Elt = cast(EltNo)->getZExtValue();
70097041 bool NewLoad = false;
70107042 bool BCNumEltsChanged = false;
7011 EVT VT = InVec.getValueType();
70127043 EVT ExtVT = VT.getVectorElementType();
70137044 EVT LVT = ExtVT;
70147045
0 ; RUN: llc < %s -march=cellspu -o %t1.s
1 ; RUN: grep rot %t1.s | count 86
1 ; RUN: grep rot %t1.s | count 85
22 ; RUN: grep roth %t1.s | count 8
33 ; RUN: grep roti.*5 %t1.s | count 1
44 ; RUN: grep roti.*27 %t1.s | count 1
162162 define <2 x float> @test1(<4 x float> %param )
163163 {
164164 ; CHECK: test1
165 ; CHECK: rotqbyi
165 ; CHECK: shufb
166166 %el = extractelement <4 x float> %param, i32 1
167167 %vec1 = insertelement <1 x float> undef, float %el, i32 0
168168 %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32>
1111 ; rdar://10538417
1212 define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
1313 ; CHECK: test2:
14 ; CHECK: vxorpd
15 ; CHECK: vperm2f128
14 ; CHECK: vinsertf128
1615 %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32>
1716 %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32>
1817 ret <3 x i64> %2
18 ; CHECK: ret
1919 }
2020
2121 define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
2323 ret <4 x i64> %c
2424 ; CHECK: test3:
2525 ; CHECK: vperm2f128
26 ; CHECK: ret
2627 }
2728
2829 define <8 x float> @test4(float %a) nounwind {
7475 ; CHECK: ret
7576 ret void
7677 }
78
79 ; Extract a value from a shufflevector..
80 define i32 @test9(<4 x i32> %a) nounwind {
81 ; CHECK: test9
82 ; CHECK: vpextrd
83 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32>
84 %r = extractelement <8 x i32> %b, i32 2
85 ; CHECK: ret
86 ret i32 %r
87 }
88
89 ; Extract a value which is the result of an undef mask.
90 define i32 @test10(<4 x i32> %a) nounwind {
91 ; CHECK: @test10
92 ; CHECK-NEXT: #
93 ; CHECK-NEXT: ret
94 %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32>
95 %r = extractelement <8 x i32> %b, i32 2
96 ret i32 %r
97 }
99 %val = fadd <3 x float> %x, %src2
1010 store <3 x float> %val, <3 x float>* %dst.addr
1111 ret void
12 ; CHECK: ret
1213 }
1314
1415
2223 %val = fadd <3 x float> %x, %src2
2324 store <3 x float> %val, <3 x float>* %dst.addr
2425 ret void
26 ; CHECK: ret
2527 }
2628
2729 ; Example of when widening a v3float operation causes the DAG to replace a node
3032 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
3133 entry:
3234 ; CHECK: shuf3:
33 ; CHECK: pshufd
35 ; CHECK: shufps
3436 %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32>
3537 %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32>
3638 %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32>
4446 %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32>
4547 store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst
4648 ret void
49 ; CHECK: ret
4750 }
4851
4952 ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
5255 ; CHECK-NOT: punpckldq
5356 %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32>
5457 ret <8 x i8> %vshuf
58 ; CHECK: ret
5559 }
5660
5761 ; PR11389: another CONCAT_VECTORS case
6064 %v = shufflevector <2 x i8> , <2 x i8> undef, <8 x i32>
6165 store <8 x i8> %v, <8 x i8>* %p, align 8
6266 ret void
67 ; CHECK: ret
6368 }