llvm.org GIT mirror llvm / 6fac1fb
DAGCombiner: Fold a shuffle on CONCAT_VECTORS into a new CONCAT_VECTORS if possible. This pattern occurs in SROA output due to the way vector arguments are lowered on ARM. The testcase from PR15525 now compiles into this, which is better than the code we got with the old scalarrepl: _Store: ldr.w r9, [sp] vmov d17, r3, r9 vmov d16, r1, r2 vst1.8 {d16, d17}, [r0] bx lr Differential Revision: http://llvm-reviews.chandlerc.com/D647 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179106 91177308-0d34-0410-b5e6-96231b3b80d8 Benjamin Kramer 6 years ago
2 changed file(s) with 72 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
91249124 return SDValue();
91259125 }
91269126
9127 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
9128 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
9129 EVT VT = N->getValueType(0);
9130 unsigned NumElts = VT.getVectorNumElements();
9131
9132 SDValue N0 = N->getOperand(0);
9133 SDValue N1 = N->getOperand(1);
9134 ShuffleVectorSDNode *SVN = cast(N);
9135
9136 SmallVector Ops;
9137 EVT ConcatVT = N0.getOperand(0).getValueType();
9138 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
9139 unsigned NumConcats = NumElts / NumElemsPerConcat;
9140
9141 // Look at every vector that's inserted. We're looking for exact
9142 // subvector-sized copies from a concatenated vector
9143 for (unsigned I = 0; I != NumConcats; ++I) {
9144 // Make sure we're dealing with a copy.
9145 unsigned Begin = I * NumElemsPerConcat;
9146 if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
9147 return SDValue();
9148
9149 for (unsigned J = 1; J != NumElemsPerConcat; ++J) {
9150 if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
9151 return SDValue();
9152 }
9153
9154 unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
9155 if (FirstElt < N0.getNumOperands())
9156 Ops.push_back(N0.getOperand(FirstElt));
9157 else
9158 Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
9159 }
9160
9161 return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(),
9162 Ops.size());
9163 }
9164
91279165 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
91289166 EVT VT = N->getValueType(0);
91299167 unsigned NumElts = VT.getVectorNumElements();
92259263 }
92269264 }
92279265
9266 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
9267 Level < AfterLegalizeVectorOps &&
9268 (N1.getOpcode() == ISD::UNDEF ||
9269 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
9270 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
9271 SDValue V = partitionShuffleOfConcats(N, DAG);
9272
9273 if (V.getNode())
9274 return V;
9275 }
9276
92289277 // If this shuffle node is simply a swizzle of another shuffle node,
92299278 // and it reverses the swizzle of the previous shuffle then we can
92309279 // optimize shuffle(shuffle(x, undef), undef) -> x.
0 ; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 | FileCheck %s
1
2 ; PR15525
3 ; CHECK: test1:
4 ; CHECK: ldr.w [[REG:r[0-9]+]], [sp]
5 ; CHECK-NEXT: vmov {{d[0-9]+}}, r1, r2
6 ; CHECK-NEXT: vmov {{d[0-9]+}}, r3, [[REG]]
7 ; CHECK-NEXT: vst1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
8 ; CHECK-NEXT: bx lr
9 define void @test1(i8* %arg, [4 x i64] %vec.coerce) {
10 bb:
11 %tmp = extractvalue [4 x i64] %vec.coerce, 0
12 %tmp2 = bitcast i64 %tmp to <8 x i8>
13 %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <16 x i32>
14 %tmp4 = extractvalue [4 x i64] %vec.coerce, 1
15 %tmp5 = bitcast i64 %tmp4 to <8 x i8>
16 %tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <16 x i32>
17 %tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> %tmp3, <16 x i32>
18 tail call void @llvm.arm.neon.vst1.v16i8(i8* %arg, <16 x i8> %tmp7, i32 2)
19 ret void
20 }
21
22 declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)