llvm.org GIT mirror llvm / b6db372
For AArch64, lowering sext_inreg and generate optimized code by using SXTL. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199296 91177308-0d34-0410-b5e6-96231b3b80d8 Jiangning Liu 6 years ago
3 changed file(s) with 275 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
285285 setExceptionSelectorRegister(AArch64::X1);
286286
287287 if (Subtarget->hasNEON()) {
288 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Expand);
289 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
290 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
291 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v1i64, Expand);
292 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v16i8, Expand);
293 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Expand);
294 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
295 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Expand);
296
288297 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
289298 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
290299 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
35733582 return (Cnt >= 1 && Cnt <= ElementBits);
35743583 }
35753584
3576 /// Checks for immediate versions of vector shifts and lowers them.
3585 static SDValue GenForSextInreg(SDNode *N,
3586 TargetLowering::DAGCombinerInfo &DCI,
3587 EVT SrcVT, EVT DestVT, EVT SubRegVT,
3588 const int *Mask, SDValue Src) {
3589 SelectionDAG &DAG = DCI.DAG;
3590 SDValue Bitcast
3591 = DAG.getNode(ISD::BITCAST, SDLoc(N), SrcVT, Src);
3592 SDValue Sext
3593 = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), DestVT, Bitcast);
3594 SDValue ShuffleVec
3595 = DAG.getVectorShuffle(DestVT, SDLoc(N), Sext, DAG.getUNDEF(DestVT), Mask);
3596 SDValue ExtractSubreg
3597 = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N),
3598 SubRegVT, ShuffleVec,
3599 DAG.getTargetConstant(AArch64::sub_64, MVT::i32)), 0);
3600 return ExtractSubreg;
3601 }
3602
3603 /// Checks for vector shifts and lowers them.
35773604 static SDValue PerformShiftCombine(SDNode *N,
35783605 TargetLowering::DAGCombinerInfo &DCI,
35793606 const AArch64Subtarget *ST) {
35813608 EVT VT = N->getValueType(0);
35823609 if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
35833610 return PerformSRACombine(N, DCI);
3611
3612 // We're looking for an SRA/SHL pair to help generating instruction
3613 // sshll v0.8h, v0.8b, #0
3614 // The instruction STXL is also the alias of this instruction.
3615 //
3616 // For example, for DAG like below,
3617 // v2i32 = sra (v2i32 (shl v2i32, 16)), 16
3618 // we can transform it into
3619 // v2i32 = EXTRACT_SUBREG
3620 // (v4i32 (suffle_vector
3621 // (v4i32 (sext (v4i16 (bitcast v2i32))),
3622 // undef, (0, 2, u, u)),
3623 // sub_64
3624 //
3625 // With this transformation we expect to generate "SSHLL + UZIP1"
3626 // Sometimes UZIP1 can be optimized away by combining with other context.
3627 int64_t ShrCnt, ShlCnt;
3628 if (N->getOpcode() == ISD::SRA
3629 && (VT == MVT::v2i32 || VT == MVT::v4i16)
3630 && isVShiftRImm(N->getOperand(1), VT, ShrCnt)
3631 && N->getOperand(0).getOpcode() == ISD::SHL
3632 && isVShiftRImm(N->getOperand(0).getOperand(1), VT, ShlCnt)) {
3633 SDValue Src = N->getOperand(0).getOperand(0);
3634 if (VT == MVT::v2i32 && ShrCnt == 16 && ShlCnt == 16) {
3635 // sext_inreg(v2i32, v2i16)
3636 // We essentially only care the Mask {0, 2, u, u}
3637 int Mask[4] = {0, 2, 4, 6};
3638 return GenForSextInreg(N, DCI, MVT::v4i16, MVT::v4i32, MVT::v2i32,
3639 Mask, Src);
3640 }
3641 else if (VT == MVT::v2i32 && ShrCnt == 24 && ShlCnt == 24) {
3642 // sext_inreg(v2i16, v2i8)
3643 // We essentially only care the Mask {0, u, 4, u, u, u, u, u, u, u, u, u}
3644 int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
3645 return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v2i32,
3646 Mask, Src);
3647 }
3648 else if (VT == MVT::v4i16 && ShrCnt == 8 && ShlCnt == 8) {
3649 // sext_inreg(v4i16, v4i8)
3650 // We essentially only care the Mask {0, 2, 4, 6, u, u, u, u, u, u, u, u}
3651 int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
3652 return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v4i16,
3653 Mask, Src);
3654 }
3655 }
35843656
35853657 // Nothing to be done for scalar shifts.
35863658 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18761876 def UXTL2vv_8H : NeonI_ext_len_alias<"uxtl2", ".4s", ".8h", USHLLvvi_8H, VPR128, VPR128>;
18771877 def UXTL2vv_4S : NeonI_ext_len_alias<"uxtl2", ".2d", ".4s", USHLLvvi_4S, VPR128, VPR128>;
18781878
1879 def : Pat<(v8i16 (anyext (v8i8 VPR64:$Rn))), (USHLLvvi_8B VPR64:$Rn, 0)>;
1880 def : Pat<(v4i32 (anyext (v4i16 VPR64:$Rn))), (USHLLvvi_4H VPR64:$Rn, 0)>;
1881 def : Pat<(v2i64 (anyext (v2i32 VPR64:$Rn))), (USHLLvvi_2S VPR64:$Rn, 0)>;
1882
18791883 // Rounding/Saturating shift
18801884 class N2VShift_RQ opcode, string asmop, string T,
18811885 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
0 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
1
2 ; For formal arguments, we have the following vector type promotion,
3 ; v2i8 is promoted to v2i32(f64)
4 ; v2i16 is promoted to v2i32(f64)
5 ; v4i8 is promoted to v4i16(f64)
6 ; v8i1 is promoted to v8i16(f128)
7
8 define <2 x i8> @test_sext_inreg_v2i8i16(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
9 ; CHECK-LABEL: test_sext_inreg_v2i8i16
10 ; CHECK: sshll v0.8h, v0.8b, #0
11 ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
12 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0
13 ; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h
14 %1 = sext <2 x i8> %v1 to <2 x i16>
15 %2 = sext <2 x i8> %v2 to <2 x i16>
16 %3 = shufflevector <2 x i16> %1, <2 x i16> %2, <2 x i32>
17 %4 = trunc <2 x i16> %3 to <2 x i8>
18 ret <2 x i8> %4
19 }
20
21 define <2 x i8> @test_sext_inreg_v2i8i16_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
22 ; CHECK-LABEL: test_sext_inreg_v2i8i16_2
23 ; CHECK: sshll v0.8h, v0.8b, #0
24 ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
25 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0
26 ; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h
27 %a1 = shl <2 x i32> %v1,
28 %a2 = ashr <2 x i32> %a1,
29 %b1 = shl <2 x i32> %v2,
30 %b2 = ashr <2 x i32> %b1,
31 %c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32>
32 %d = trunc <2 x i32> %c to <2 x i8>
33 ret <2 x i8> %d
34 }
35
36 define <2 x i8> @test_sext_inreg_v2i8i32(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
37 ; CHECK-LABEL: test_sext_inreg_v2i8i32
38 ; CHECK: sshll v0.8h, v0.8b, #0
39 ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
40 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0
41 ; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h
42 %1 = sext <2 x i8> %v1 to <2 x i32>
43 %2 = sext <2 x i8> %v2 to <2 x i32>
44 %3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32>
45 %4 = trunc <2 x i32> %3 to <2 x i8>
46 ret <2 x i8> %4
47 }
48
49 define <2 x i8> @test_sext_inreg_v2i8i64(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
50 ; CHECK-LABEL: test_sext_inreg_v2i8i64
51 ; CHECK: ushll v1.2d, v1.2s, #0
52 ; CHECK: ushll v0.2d, v0.2s, #0
53 ; CHECK: shl v0.2d, v0.2d, #56
54 ; CHECK: sshr v0.2d, v0.2d, #56
55 ; CHECK: shl v1.2d, v1.2d, #56
56 ; CHECK: sshr v1.2d, v1.2d, #56
57 %1 = sext <2 x i8> %v1 to <2 x i64>
58 %2 = sext <2 x i8> %v2 to <2 x i64>
59 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32>
60 %4 = trunc <2 x i64> %3 to <2 x i8>
61 ret <2 x i8> %4
62 }
63
64 define <4 x i8> @test_sext_inreg_v4i8i16(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
65 ; CHECK-LABEL: test_sext_inreg_v4i8i16
66 ; CHECK: sshll v0.8h, v0.8b, #0
67 ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
68 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0
69 ; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h
70 %1 = sext <4 x i8> %v1 to <4 x i16>
71 %2 = sext <4 x i8> %v2 to <4 x i16>
72 %3 = shufflevector <4 x i16> %1, <4 x i16> %2, <4 x i32>
73 %4 = trunc <4 x i16> %3 to <4 x i8>
74 ret <4 x i8> %4
75 }
76
77 define <4 x i8> @test_sext_inreg_v4i8i16_2(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
78 ; CHECK-LABEL: test_sext_inreg_v4i8i16_2
79 ; CHECK: sshll v0.8h, v0.8b, #0
80 ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
81 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0
82 ; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h
83 %a1 = shl <4 x i16> %v1,
84 %a2 = ashr <4 x i16> %a1,
85 %b1 = shl <4 x i16> %v2,
86 %b2 = ashr <4 x i16> %b1,
87 %c = shufflevector <4 x i16> %a2, <4 x i16> %b2, <4 x i32>
88 %d = trunc <4 x i16> %c to <4 x i8>
89 ret <4 x i8> %d
90 }
91
92 define <4 x i8> @test_sext_inreg_v4i8i32(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
93 ; CHECK-LABEL: test_sext_inreg_v4i8i32
94 ; CHECK: ushll v1.4s, v1.4h, #0
95 ; CHECK: ushll v0.4s, v0.4h, #0
96 ; CHECK: shl v0.4s, v0.4s, #24
97 ; CHECK: sshr v0.4s, v0.4s, #24
98 ; CHECK: shl v1.4s, v1.4s, #24
99 ; CHECK: sshr v1.4s, v1.4s, #24
100 %1 = sext <4 x i8> %v1 to <4 x i32>
101 %2 = sext <4 x i8> %v2 to <4 x i32>
102 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32>
103 %4 = trunc <4 x i32> %3 to <4 x i8>
104 ret <4 x i8> %4
105 }
106
107 define <8 x i8> @test_sext_inreg_v8i8i16(<8 x i8> %v1, <8 x i8> %v2) nounwind readnone {
108 ; CHECK-LABEL: test_sext_inreg_v8i8i16
109 ; CHECK: sshll v0.8h, v0.8b, #0
110 ; CHECK: sshll v1.8h, v1.8b, #0
111 %1 = sext <8 x i8> %v1 to <8 x i16>
112 %2 = sext <8 x i8> %v2 to <8 x i16>
113 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32>
114 %4 = trunc <8 x i16> %3 to <8 x i8>
115 ret <8 x i8> %4
116 }
117
118 define <8 x i1> @test_sext_inreg_v8i1i16(<8 x i1> %v1, <8 x i1> %v2) nounwind readnone {
119 ; CHECK-LABEL: test_sext_inreg_v8i1i16
120 ; CHECK: ushll v1.8h, v1.8b, #0
121 ; CHECK: ushll v0.8h, v0.8b, #0
122 ; CHECK: shl v0.8h, v0.8h, #15
123 ; CHECK: sshr v0.8h, v0.8h, #15
124 ; CHECK: shl v1.8h, v1.8h, #15
125 ; CHECK: sshr v1.8h, v1.8h, #15
126 %1 = sext <8 x i1> %v1 to <8 x i16>
127 %2 = sext <8 x i1> %v2 to <8 x i16>
128 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32>
129 %4 = trunc <8 x i16> %3 to <8 x i1>
130 ret <8 x i1> %4
131 }
132
133 define <2 x i16> @test_sext_inreg_v2i16i32(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
134 ; CHECK-LABEL: test_sext_inreg_v2i16i32
135 ; CHECK: sshll v0.4s, v0.4h, #0
136 ; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
137 ; CHECK-NEXT: sshll v1.4s, v1.4h, #0
138 ; CHECK-NEXT: uzp1 v1.4s, v1.4s, v1.4s
139 %1 = sext <2 x i16> %v1 to <2 x i32>
140 %2 = sext <2 x i16> %v2 to <2 x i32>
141 %3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32>
142 %4 = trunc <2 x i32> %3 to <2 x i16>
143 ret <2 x i16> %4
144 }
145
146 define <2 x i16> @test_sext_inreg_v2i16i32_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
147 ; CHECK-LABEL: test_sext_inreg_v2i16i32_2
148 ; CHECK: sshll v0.4s, v0.4h, #0
149 ; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
150 ; CHECK-NEXT: sshll v1.4s, v1.4h, #0
151 ; CHECK-NEXT: uzp1 v1.4s, v1.4s, v1.4s
152 %a1 = shl <2 x i32> %v1,
153 %a2 = ashr <2 x i32> %a1,
154 %b1 = shl <2 x i32> %v2,
155 %b2 = ashr <2 x i32> %b1,
156 %c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32>
157 %d = trunc <2 x i32> %c to <2 x i16>
158 ret <2 x i16> %d
159 }
160
161 define <2 x i16> @test_sext_inreg_v2i16i64(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
162 ; CHECK-LABEL: test_sext_inreg_v2i16i64
163 ; CHECK: ushll v1.2d, v1.2s, #0
164 ; CHECK: ushll v0.2d, v0.2s, #0
165 ; CHECK: shl v0.2d, v0.2d, #48
166 ; CHECK: sshr v0.2d, v0.2d, #48
167 ; CHECK: shl v1.2d, v1.2d, #48
168 ; CHECK: sshr v1.2d, v1.2d, #48
169 %1 = sext <2 x i16> %v1 to <2 x i64>
170 %2 = sext <2 x i16> %v2 to <2 x i64>
171 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32>
172 %4 = trunc <2 x i64> %3 to <2 x i16>
173 ret <2 x i16> %4
174 }
175
176 define <4 x i16> @test_sext_inreg_v4i16i32(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
177 ; CHECK-LABEL: test_sext_inreg_v4i16i32
178 ; CHECK: sshll v0.4s, v0.4h, #0
179 ; CHECK: sshll v1.4s, v1.4h, #0
180 %1 = sext <4 x i16> %v1 to <4 x i32>
181 %2 = sext <4 x i16> %v2 to <4 x i32>
182 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32>
183 %4 = trunc <4 x i32> %3 to <4 x i16>
184 ret <4 x i16> %4
185 }
186
187 define <2 x i32> @test_sext_inreg_v2i32i64(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
188 ; CHECK-LABEL: test_sext_inreg_v2i32i64
189 ; CHECK: sshll v0.2d, v0.2s, #0
190 ; CHECK: sshll v1.2d, v1.2s, #0
191 %1 = sext <2 x i32> %v1 to <2 x i64>
192 %2 = sext <2 x i32> %v2 to <2 x i64>
193 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32>
194 %4 = trunc <2 x i64> %3 to <2 x i32>
195 ret <2 x i32> %4
196 }
197