llvm.org GIT mirror llvm / bb81d9d
SDAG: Legalize vector BSWAP into a shuffle if the shuffle is legal but the bswap not. - On ARM/ARM64 we get a vrev because the shuffle matching code is really smart. We still unroll anything that's not v4i32 though. - On X86 we get a pshufb with SSSE3. Required more cleverness in isShuffleMaskLegal. - On PPC we get a vperm for v8i16 and v4i32. v2i64 is unrolled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209123 91177308-0d34-0410-b5e6-96231b3b80d8 Benjamin Kramer 6 years ago
9 changed file(s) with 184 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
6262 SDValue ExpandUINT_TO_FLOAT(SDValue Op);
6363 // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
6464 SDValue ExpandSEXTINREG(SDValue Op);
65 // Expand bswap of vectors into a shuffle if legal.
66 SDValue ExpandBSWAP(SDValue Op);
6567 // Implement vselect in terms of XOR, AND, OR when blend is not supported
6668 // by the target.
6769 SDValue ExpandVSELECT(SDValue Op);
296298 case TargetLowering::Expand:
297299 if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
298300 Result = ExpandSEXTINREG(Op);
301 else if (Node->getOpcode() == ISD::BSWAP)
302 Result = ExpandBSWAP(Op);
299303 else if (Node->getOpcode() == ISD::VSELECT)
300304 Result = ExpandVSELECT(Op);
301305 else if (Node->getOpcode() == ISD::SELECT)
681685 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
682686 }
683687
688 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
689 EVT VT = Op.getValueType();
690
691 // Generate a byte wise shuffle mask for the BSWAP.
692 SmallVector ShuffleMask;
693 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
694 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
695 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
696 ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
697
698 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
699
700 // Only emit a shuffle if the mask is legal.
701 if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
702 return DAG.UnrollVectorOp(Op.getNode());
703
704 SDLoc DL(Op);
705 Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
706 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
707 ShuffleMask.data());
708 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
709 }
710
684711 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
685712 // Implement VSELECT in terms of XOR, AND, OR
686713 // on platforms which do not support blend natively.
519519 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
520520 setOperationAction(ISD::MULHU, VT, Expand);
521521 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
522
523 setOperationAction(ISD::BSWAP, VT, Expand);
522524 }
523525
524526 // There is no v1i64/v2i64 multiply, expand v1i64/v2i64 to GPR i64 multiply.
413413 setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
414414 setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
415415 setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
416
417 setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
416418 }
417419
418420 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
448448 setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
449449 setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
450450 setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
451
452 setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
451453
452454 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
453455 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
459459 setOperationAction(ISD::SDIVREM, VT, Expand);
460460 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
461461 setOperationAction(ISD::FPOW, VT, Expand);
462 setOperationAction(ISD::BSWAP, VT, Expand);
462463 setOperationAction(ISD::CTPOP, VT, Expand);
463464 setOperationAction(ISD::CTLZ, VT, Expand);
464465 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
1511515115 if (VT.getSizeInBits() == 64)
1511615116 return false;
1511715117
15118 // FIXME: pshufb, blends, shifts.
15118 // If this is a single-input shuffle with no 128 bit lane crossings we can
15119 // lower it into pshufb.
15120 if ((SVT.is128BitVector() && Subtarget->hasSSSE3()) ||
15121 (SVT.is256BitVector() && Subtarget->hasInt256())) {
15122 bool isLegal = true;
15123 for (unsigned I = 0, E = M.size(); I != E; ++I) {
15124 if (M[I] >= (int)SVT.getVectorNumElements() ||
15125 ShuffleCrosses128bitLane(SVT, I, M[I])) {
15126 isLegal = false;
15127 break;
15128 }
15129 }
15130 if (isLegal)
15131 return true;
15132 }
15133
15134 // FIXME: blends, shifts.
1511915135 return (SVT.getVectorNumElements() == 2 ||
1512015136 ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
1512115137 isMOVLMask(M, SVT) ||
177177 ret void
178178 }
179179
180 define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
181 ; CHECK-LABEL: test_vrev32_bswap:
182 ; CHECK: vrev32.8
183 %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
184 ret <4 x i32> %bswap
185 }
186
187 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
221221 ret void
222222 }
223223
224
225 define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
226 ; CHECK-LABEL: test_vrev32_bswap:
227 ; CHECK: rev32.16b
228 ; CHECK-NOT: rev
229 ; CHECK: ret
230 %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
231 ret <4 x i32> %bswap
232 }
233
234 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
None ; RUN: llc < %s -mcpu=x86_64 | FileCheck %s
0 ; RUN: llc < %s -mcpu=x86-64 | FileCheck %s -check-prefix=CHECK-NOSSSE3
1 ; RUN: llc < %s -mcpu=core2 | FileCheck %s -check-prefix=CHECK-SSSE3
2 ; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2
13 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
24 target triple = "x86_64-unknown-linux-gnu"
35
6 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
7 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
48 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
59
6 define <2 x i64> @foo(<2 x i64> %v) #0 {
10 define <8 x i16> @test1(<8 x i16> %v) #0 {
11 entry:
12 %r = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v)
13 ret <8 x i16> %r
14
15 ; CHECK-NOSSSE3-LABEL: @test1
16 ; CHECK-NOSSSE3: rolw
17 ; CHECK-NOSSSE3: rolw
18 ; CHECK-NOSSSE3: rolw
19 ; CHECK-NOSSSE3: rolw
20 ; CHECK-NOSSSE3: rolw
21 ; CHECK-NOSSSE3: rolw
22 ; CHECK-NOSSSE3: rolw
23 ; CHECK-NOSSSE3: rolw
24 ; CHECK-NOSSSE3: retq
25
26 ; CHECK-SSSE3-LABEL: @test1
27 ; CHECK-SSSE3: pshufb
28 ; CHECK-SSSE3-NEXT: retq
29
30 ; CHECK-AVX2-LABEL: @test1
31 ; CHECK-AVX2: vpshufb
32 ; CHECK-AVX2-NEXT: retq
33 }
34
35 define <4 x i32> @test2(<4 x i32> %v) #0 {
36 entry:
37 %r = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v)
38 ret <4 x i32> %r
39
40 ; CHECK-NOSSSE3-LABEL: @test2
41 ; CHECK-NOSSSE3: bswapl
42 ; CHECK-NOSSSE3: bswapl
43 ; CHECK-NOSSSE3: bswapl
44 ; CHECK-NOSSSE3: bswapl
45 ; CHECK-NOSSSE3: retq
46
47 ; CHECK-SSSE3-LABEL: @test2
48 ; CHECK-SSSE3: pshufb
49 ; CHECK-SSSE3-NEXT: retq
50
51 ; CHECK-AVX2-LABEL: @test2
52 ; CHECK-AVX2: vpshufb
53 ; CHECK-AVX2-NEXT: retq
54 }
55
56 define <2 x i64> @test3(<2 x i64> %v) #0 {
757 entry:
858 %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
959 ret <2 x i64> %r
60
61 ; CHECK-NOSSSE3-LABEL: @test3
62 ; CHECK-NOSSSE3: bswapq
63 ; CHECK-NOSSSE3: bswapq
64 ; CHECK-NOSSSE3: retq
65
66 ; CHECK-SSSE3-LABEL: @test3
67 ; CHECK-SSSE3: pshufb
68 ; CHECK-SSSE3-NEXT: retq
69
70 ; CHECK-AVX2-LABEL: @test3
71 ; CHECK-AVX2: vpshufb
72 ; CHECK-AVX2-NEXT: retq
1073 }
1174
12 ; CHECK-LABEL: @foo
13 ; CHECK: bswapq
14 ; CHECK: bswapq
15 ; CHECK: retq
75 declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
76 declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
77 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
78
79 define <16 x i16> @test4(<16 x i16> %v) #0 {
80 entry:
81 %r = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v)
82 ret <16 x i16> %r
83
84 ; CHECK-SSSE3-LABEL: @test4
85 ; CHECK-SSSE3: pshufb
86 ; CHECK-SSSE3: pshufb
87 ; CHECK-SSSE3-NEXT: retq
88
89 ; CHECK-AVX2-LABEL: @test4
90 ; CHECK-AVX2: vpshufb
91 ; CHECK-AVX2-NEXT: retq
92 }
93
94 define <8 x i32> @test5(<8 x i32> %v) #0 {
95 entry:
96 %r = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v)
97 ret <8 x i32> %r
98
99 ; CHECK-SSSE3-LABEL: @test5
100 ; CHECK-SSSE3: pshufb
101 ; CHECK-SSSE3: pshufb
102 ; CHECK-SSSE3-NEXT: retq
103
104 ; CHECK-AVX2-LABEL: @test5
105 ; CHECK-AVX2: vpshufb
106 ; CHECK-AVX2-NEXT: retq
107 }
108
109 define <4 x i64> @test6(<4 x i64> %v) #0 {
110 entry:
111 %r = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v)
112 ret <4 x i64> %r
113
114 ; CHECK-SSSE3-LABEL: @test6
115 ; CHECK-SSSE3: pshufb
116 ; CHECK-SSSE3: pshufb
117 ; CHECK-SSSE3-NEXT: retq
118
119 ; CHECK-AVX2-LABEL: @test6
120 ; CHECK-AVX2: vpshufb
121 ; CHECK-AVX2-NEXT: retq
122 }
123
16124
17125 attributes #0 = { nounwind uwtable }
18126