llvm.org GIT mirror llvm / 3b40432
[X86][AVX512] Add 512-bit vector cttz costs + tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303293 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
2 changed file(s) with 131 addition(s) and 6 deletion(s). Raw diff Collapse all Expand all
13961396 { ISD::BITREVERSE, MVT::v16i32, 5 },
13971397 { ISD::BITREVERSE, MVT::v32i16, 5 },
13981398 { ISD::BITREVERSE, MVT::v64i8, 5 },
1399 { ISD::CTTZ, MVT::v8i64, 10 },
1400 { ISD::CTTZ, MVT::v16i32, 14 },
1401 { ISD::CTTZ, MVT::v32i16, 12 },
1402 { ISD::CTTZ, MVT::v64i8, 9 },
13991403 };
14001404 static const CostTblEntry AVX512CostTbl[] = {
14011405 { ISD::BITREVERSE, MVT::v8i64, 36 },
14021406 { ISD::BITREVERSE, MVT::v16i32, 24 },
1407 { ISD::CTTZ, MVT::v8i64, 20 },
1408 { ISD::CTTZ, MVT::v16i32, 28 },
14031409 };
14041410 static const CostTblEntry XOPCostTbl[] = {
14051411 { ISD::BITREVERSE, MVT::v4i64, 4 },
None ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT
1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
0 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
6 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F
7 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW
68
79 ; Verify the cost of scalar trailing zero count instructions.
810
7880 declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
7981 declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
8082 declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
83
84 declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1)
85 declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
86 declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>, i1)
87 declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
8188
8289 define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
8390 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64':
8491 ; SSE2: Found an estimated cost of 14 for instruction: %cttz
8592 ; SSE42: Found an estimated cost of 10 for instruction: %cttz
8693 ; AVX: Found an estimated cost of 10 for instruction: %cttz
94 ; AVX512: Found an estimated cost of 10 for instruction: %cttz
8795 %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
8896 ret <2 x i64> %cttz
8997 }
93101 ; SSE2: Found an estimated cost of 14 for instruction: %cttz
94102 ; SSE42: Found an estimated cost of 10 for instruction: %cttz
95103 ; AVX: Found an estimated cost of 10 for instruction: %cttz
104 ; AVX512: Found an estimated cost of 10 for instruction: %cttz
96105 %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
97106 ret <2 x i64> %cttz
98107 }
103112 ; SSE42: Found an estimated cost of 20 for instruction: %cttz
104113 ; AVX1: Found an estimated cost of 22 for instruction: %cttz
105114 ; AVX2: Found an estimated cost of 10 for instruction: %cttz
115 ; AVX512: Found an estimated cost of 10 for instruction: %cttz
106116 %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
107117 ret <4 x i64> %cttz
108118 }
113123 ; SSE42: Found an estimated cost of 20 for instruction: %cttz
114124 ; AVX1: Found an estimated cost of 22 for instruction: %cttz
115125 ; AVX2: Found an estimated cost of 10 for instruction: %cttz
126 ; AVX512: Found an estimated cost of 10 for instruction: %cttz
116127 %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
117128 ret <4 x i64> %cttz
129 }
130
131 define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) {
132 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i64':
133 ; SSE2: Found an estimated cost of 56 for instruction: %cttz
134 ; SSE42: Found an estimated cost of 40 for instruction: %cttz
135 ; AVX1: Found an estimated cost of 44 for instruction: %cttz
136 ; AVX2: Found an estimated cost of 20 for instruction: %cttz
137 ; AVX512F: Found an estimated cost of 20 for instruction: %cttz
138 ; AVX512BW: Found an estimated cost of 10 for instruction: %cttz
139 %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0)
140 ret <8 x i64> %cttz
141 }
142
143 define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) {
144 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i64u':
145 ; SSE2: Found an estimated cost of 56 for instruction: %cttz
146 ; SSE42: Found an estimated cost of 40 for instruction: %cttz
147 ; AVX1: Found an estimated cost of 44 for instruction: %cttz
148 ; AVX2: Found an estimated cost of 20 for instruction: %cttz
149 ; AVX512F: Found an estimated cost of 20 for instruction: %cttz
150 ; AVX512BW: Found an estimated cost of 10 for instruction: %cttz
151 %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1)
152 ret <8 x i64> %cttz
118153 }
119154
120155 define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
122157 ; SSE2: Found an estimated cost of 18 for instruction: %cttz
123158 ; SSE42: Found an estimated cost of 14 for instruction: %cttz
124159 ; AVX: Found an estimated cost of 14 for instruction: %cttz
160 ; AVX512: Found an estimated cost of 14 for instruction: %cttz
125161 %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
126162 ret <4 x i32> %cttz
127163 }
131167 ; SSE2: Found an estimated cost of 18 for instruction: %cttz
132168 ; SSE42: Found an estimated cost of 14 for instruction: %cttz
133169 ; AVX: Found an estimated cost of 14 for instruction: %cttz
170 ; AVX512: Found an estimated cost of 14 for instruction: %cttz
134171 %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
135172 ret <4 x i32> %cttz
136173 }
141178 ; SSE42: Found an estimated cost of 28 for instruction: %cttz
142179 ; AVX1: Found an estimated cost of 30 for instruction: %cttz
143180 ; AVX2: Found an estimated cost of 14 for instruction: %cttz
181 ; AVX512: Found an estimated cost of 14 for instruction: %cttz
144182 %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
145183 ret <8 x i32> %cttz
146184 }
151189 ; SSE42: Found an estimated cost of 28 for instruction: %cttz
152190 ; AVX1: Found an estimated cost of 30 for instruction: %cttz
153191 ; AVX2: Found an estimated cost of 14 for instruction: %cttz
192 ; AVX512: Found an estimated cost of 14 for instruction: %cttz
154193 %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
155194 ret <8 x i32> %cttz
195 }
196
197 define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) {
198 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i32':
199 ; SSE2: Found an estimated cost of 72 for instruction: %cttz
200 ; SSE42: Found an estimated cost of 56 for instruction: %cttz
201 ; AVX1: Found an estimated cost of 60 for instruction: %cttz
202 ; AVX2: Found an estimated cost of 28 for instruction: %cttz
203 ; AVX512F: Found an estimated cost of 28 for instruction: %cttz
204 ; AVX512BW: Found an estimated cost of 14 for instruction: %cttz
205 %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0)
206 ret <16 x i32> %cttz
207 }
208
209 define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) {
210 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i32u':
211 ; SSE2: Found an estimated cost of 72 for instruction: %cttz
212 ; SSE42: Found an estimated cost of 56 for instruction: %cttz
213 ; AVX1: Found an estimated cost of 60 for instruction: %cttz
214 ; AVX2: Found an estimated cost of 28 for instruction: %cttz
215 ; AVX512F: Found an estimated cost of 28 for instruction: %cttz
216 ; AVX512BW: Found an estimated cost of 14 for instruction: %cttz
217 %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1)
218 ret <16 x i32> %cttz
156219 }
157220
158221 define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
160223 ; SSE2: Found an estimated cost of 16 for instruction: %cttz
161224 ; SSE42: Found an estimated cost of 12 for instruction: %cttz
162225 ; AVX: Found an estimated cost of 12 for instruction: %cttz
226 ; AVX512: Found an estimated cost of 12 for instruction: %cttz
163227 %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
164228 ret <8 x i16> %cttz
165229 }
169233 ; SSE2: Found an estimated cost of 16 for instruction: %cttz
170234 ; SSE42: Found an estimated cost of 12 for instruction: %cttz
171235 ; AVX: Found an estimated cost of 12 for instruction: %cttz
236 ; AVX512: Found an estimated cost of 12 for instruction: %cttz
172237 %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
173238 ret <8 x i16> %cttz
174239 }
179244 ; SSE42: Found an estimated cost of 24 for instruction: %cttz
180245 ; AVX1: Found an estimated cost of 26 for instruction: %cttz
181246 ; AVX2: Found an estimated cost of 12 for instruction: %cttz
247 ; AVX512: Found an estimated cost of 12 for instruction: %cttz
182248 %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
183249 ret <16 x i16> %cttz
184250 }
189255 ; SSE42: Found an estimated cost of 24 for instruction: %cttz
190256 ; AVX1: Found an estimated cost of 26 for instruction: %cttz
191257 ; AVX2: Found an estimated cost of 12 for instruction: %cttz
258 ; AVX512: Found an estimated cost of 12 for instruction: %cttz
192259 %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
193260 ret <16 x i16> %cttz
261 }
262
263 define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) {
264 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i16':
265 ; SSE2: Found an estimated cost of 64 for instruction: %cttz
266 ; SSE42: Found an estimated cost of 48 for instruction: %cttz
267 ; AVX1: Found an estimated cost of 52 for instruction: %cttz
268 ; AVX2: Found an estimated cost of 24 for instruction: %cttz
269 ; AVX512F: Found an estimated cost of 24 for instruction: %cttz
270 ; AVX512BW: Found an estimated cost of 12 for instruction: %cttz
271 %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0)
272 ret <32 x i16> %cttz
273 }
274
275 define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) {
276 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i16u':
277 ; SSE2: Found an estimated cost of 64 for instruction: %cttz
278 ; SSE42: Found an estimated cost of 48 for instruction: %cttz
279 ; AVX1: Found an estimated cost of 52 for instruction: %cttz
280 ; AVX2: Found an estimated cost of 24 for instruction: %cttz
281 ; AVX512F: Found an estimated cost of 24 for instruction: %cttz
282 ; AVX512BW: Found an estimated cost of 12 for instruction: %cttz
283 %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1)
284 ret <32 x i16> %cttz
194285 }
195286
196287 define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
198289 ; SSE2: Found an estimated cost of 13 for instruction: %cttz
199290 ; SSE42: Found an estimated cost of 9 for instruction: %cttz
200291 ; AVX: Found an estimated cost of 9 for instruction: %cttz
292 ; AVX512: Found an estimated cost of 9 for instruction: %cttz
201293 %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
202294 ret <16 x i8> %cttz
203295 }
207299 ; SSE2: Found an estimated cost of 13 for instruction: %cttz
208300 ; SSE42: Found an estimated cost of 9 for instruction: %cttz
209301 ; AVX: Found an estimated cost of 9 for instruction: %cttz
302 ; AVX512: Found an estimated cost of 9 for instruction: %cttz
210303 %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
211304 ret <16 x i8> %cttz
212305 }
217310 ; SSE42: Found an estimated cost of 18 for instruction: %cttz
218311 ; AVX1: Found an estimated cost of 20 for instruction: %cttz
219312 ; AVX2: Found an estimated cost of 9 for instruction: %cttz
313 ; AVX512: Found an estimated cost of 9 for instruction: %cttz
220314 %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
221315 ret <32 x i8> %cttz
222316 }
227321 ; SSE42: Found an estimated cost of 18 for instruction: %cttz
228322 ; AVX1: Found an estimated cost of 20 for instruction: %cttz
229323 ; AVX2: Found an estimated cost of 9 for instruction: %cttz
324 ; AVX512: Found an estimated cost of 9 for instruction: %cttz
230325 %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
231326 ret <32 x i8> %cttz
232327 }
328
329 define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) {
330 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v64i8':
331 ; SSE2: Found an estimated cost of 52 for instruction: %cttz
332 ; SSE42: Found an estimated cost of 36 for instruction: %cttz
333 ; AVX1: Found an estimated cost of 40 for instruction: %cttz
334 ; AVX2: Found an estimated cost of 18 for instruction: %cttz
335 ; AVX512F: Found an estimated cost of 18 for instruction: %cttz
336 ; AVX512BW: Found an estimated cost of 9 for instruction: %cttz
337 %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0)
338 ret <64 x i8> %cttz
339 }
340
341 define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) {
342 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v64i8u':
343 ; SSE2: Found an estimated cost of 52 for instruction: %cttz
344 ; SSE42: Found an estimated cost of 36 for instruction: %cttz
345 ; AVX1: Found an estimated cost of 40 for instruction: %cttz
346 ; AVX2: Found an estimated cost of 18 for instruction: %cttz
347 ; AVX512F: Found an estimated cost of 18 for instruction: %cttz
348 ; AVX512BW: Found an estimated cost of 9 for instruction: %cttz
349 %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1)
350 ret <64 x i8> %cttz
351 }