llvm.org GIT mirror llvm / 13c0638
[X86][AVX512] Add 512-bit vector ctlz costs + tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303300 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
2 changed file(s) with 174 addition(s) and 6 deletion(s). Raw diff Collapse all Expand all
13911391 // CTLZ: llvm\test\CodeGen\X86\vector-lzcnt-*.ll
13921392 // CTPOP: llvm\test\CodeGen\X86\vector-popcnt-*.ll
13931393 // CTTZ: llvm\test\CodeGen\X86\vector-tzcnt-*.ll
1394 static const CostTblEntry AVX512CDCostTbl[] = {
1395 { ISD::CTLZ, MVT::v8i64, 1 },
1396 { ISD::CTLZ, MVT::v16i32, 1 },
1397 { ISD::CTLZ, MVT::v32i16, 8 },
1398 { ISD::CTLZ, MVT::v64i8, 20 },
1399 { ISD::CTLZ, MVT::v4i64, 1 },
1400 { ISD::CTLZ, MVT::v8i32, 1 },
1401 { ISD::CTLZ, MVT::v16i16, 4 },
1402 { ISD::CTLZ, MVT::v32i8, 10 },
1403 { ISD::CTLZ, MVT::v2i64, 1 },
1404 { ISD::CTLZ, MVT::v4i32, 1 },
1405 { ISD::CTLZ, MVT::v8i16, 4 },
1406 { ISD::CTLZ, MVT::v16i8, 4 },
1407 };
13941408 static const CostTblEntry AVX512BWCostTbl[] = {
13951409 { ISD::BITREVERSE, MVT::v8i64, 5 },
13961410 { ISD::BITREVERSE, MVT::v16i32, 5 },
13971411 { ISD::BITREVERSE, MVT::v32i16, 5 },
13981412 { ISD::BITREVERSE, MVT::v64i8, 5 },
1413 { ISD::CTLZ, MVT::v8i64, 23 },
1414 { ISD::CTLZ, MVT::v16i32, 22 },
1415 { ISD::CTLZ, MVT::v32i16, 18 },
1416 { ISD::CTLZ, MVT::v64i8, 17 },
13991417 { ISD::CTTZ, MVT::v8i64, 10 },
14001418 { ISD::CTTZ, MVT::v16i32, 14 },
14011419 { ISD::CTTZ, MVT::v32i16, 12 },
14041422 static const CostTblEntry AVX512CostTbl[] = {
14051423 { ISD::BITREVERSE, MVT::v8i64, 36 },
14061424 { ISD::BITREVERSE, MVT::v16i32, 24 },
1425 { ISD::CTLZ, MVT::v8i64, 29 },
1426 { ISD::CTLZ, MVT::v16i32, 35 },
14071427 { ISD::CTTZ, MVT::v8i64, 20 },
14081428 { ISD::CTTZ, MVT::v16i32, 28 },
14091429 };
15651585 MVT MTy = LT.second;
15661586
15671587 // Attempt to lookup cost.
1588 if (ST->hasCDI())
1589 if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
1590 return LT.first * Entry->Cost;
1591
15681592 if (ST->hasBWI())
15691593 if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
15701594 return LT.first * Entry->Cost;
None ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT
1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
0 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
6 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl -mattr=-avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F
7 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -mattr=-avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW
8 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -mattr=+avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512CD
69
710 ; Verify the cost of scalar leading zero count instructions.
811
7881 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
7982 declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
8083 declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
84
85 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1)
86 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
87 declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1)
88 declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1)
8189
8290 define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
8391 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64':
8492 ; SSE2: Found an estimated cost of 25 for instruction: %ctlz
8593 ; SSE42: Found an estimated cost of 23 for instruction: %ctlz
8694 ; AVX: Found an estimated cost of 23 for instruction: %ctlz
95 ; AVX512: Found an estimated cost of 23 for instruction: %ctlz
96 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
8797 %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0)
8898 ret <2 x i64> %ctlz
8999 }
93103 ; SSE2: Found an estimated cost of 25 for instruction: %ctlz
94104 ; SSE42: Found an estimated cost of 23 for instruction: %ctlz
95105 ; AVX: Found an estimated cost of 23 for instruction: %ctlz
106 ; AVX512: Found an estimated cost of 23 for instruction: %ctlz
107 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
96108 %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1)
97109 ret <2 x i64> %ctlz
98110 }
103115 ; SSE42: Found an estimated cost of 46 for instruction: %ctlz
104116 ; AVX1: Found an estimated cost of 48 for instruction: %ctlz
105117 ; AVX2: Found an estimated cost of 23 for instruction: %ctlz
118 ; AVX512: Found an estimated cost of 23 for instruction: %ctlz
119 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
106120 %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0)
107121 ret <4 x i64> %ctlz
108122 }
113127 ; SSE42: Found an estimated cost of 46 for instruction: %ctlz
114128 ; AVX1: Found an estimated cost of 48 for instruction: %ctlz
115129 ; AVX2: Found an estimated cost of 23 for instruction: %ctlz
130 ; AVX512: Found an estimated cost of 23 for instruction: %ctlz
131 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
116132 %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1)
117133 ret <4 x i64> %ctlz
134 }
135
136 define <8 x i64> @var_ctlz_v8i64(<8 x i64> %a) {
137 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i64':
138 ; SSE2: Found an estimated cost of 100 for instruction: %ctlz
139 ; SSE42: Found an estimated cost of 92 for instruction: %ctlz
140 ; AVX1: Found an estimated cost of 96 for instruction: %ctlz
141 ; AVX2: Found an estimated cost of 46 for instruction: %ctlz
142 ; AVX512F: Found an estimated cost of 29 for instruction: %ctlz
143 ; AVX512BW: Found an estimated cost of 23 for instruction: %ctlz
144 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
145 %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 0)
146 ret <8 x i64> %ctlz
147 }
148
149 define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) {
150 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i64u':
151 ; SSE2: Found an estimated cost of 100 for instruction: %ctlz
152 ; SSE42: Found an estimated cost of 92 for instruction: %ctlz
153 ; AVX1: Found an estimated cost of 96 for instruction: %ctlz
154 ; AVX2: Found an estimated cost of 46 for instruction: %ctlz
155 ; AVX512F: Found an estimated cost of 29 for instruction: %ctlz
156 ; AVX512BW: Found an estimated cost of 23 for instruction: %ctlz
157 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
158 %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 1)
159 ret <8 x i64> %ctlz
118160 }
119161
120162 define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
122164 ; SSE2: Found an estimated cost of 26 for instruction: %ctlz
123165 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
124166 ; AVX: Found an estimated cost of 18 for instruction: %ctlz
167 ; AVX512: Found an estimated cost of 18 for instruction: %ctlz
168 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
125169 %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0)
126170 ret <4 x i32> %ctlz
127171 }
131175 ; SSE2: Found an estimated cost of 26 for instruction: %ctlz
132176 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
133177 ; AVX: Found an estimated cost of 18 for instruction: %ctlz
178 ; AVX512: Found an estimated cost of 18 for instruction: %ctlz
179 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
134180 %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1)
135181 ret <4 x i32> %ctlz
136182 }
141187 ; SSE42: Found an estimated cost of 36 for instruction: %ctlz
142188 ; AVX1: Found an estimated cost of 38 for instruction: %ctlz
143189 ; AVX2: Found an estimated cost of 18 for instruction: %ctlz
190 ; AVX512: Found an estimated cost of 18 for instruction: %ctlz
191 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
144192 %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0)
145193 ret <8 x i32> %ctlz
146194 }
151199 ; SSE42: Found an estimated cost of 36 for instruction: %ctlz
152200 ; AVX1: Found an estimated cost of 38 for instruction: %ctlz
153201 ; AVX2: Found an estimated cost of 18 for instruction: %ctlz
202 ; AVX512: Found an estimated cost of 18 for instruction: %ctlz
203 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
154204 %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1)
155205 ret <8 x i32> %ctlz
206 }
207
208 define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) {
209 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i32':
210 ; SSE2: Found an estimated cost of 104 for instruction: %ctlz
211 ; SSE42: Found an estimated cost of 72 for instruction: %ctlz
212 ; AVX1: Found an estimated cost of 76 for instruction: %ctlz
213 ; AVX2: Found an estimated cost of 36 for instruction: %ctlz
214 ; AVX512F: Found an estimated cost of 35 for instruction: %ctlz
215 ; AVX512BW: Found an estimated cost of 22 for instruction: %ctlz
216 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
217 %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 0)
218 ret <16 x i32> %ctlz
219 }
220
221 define <16 x i32> @var_ctlz_v16i32u(<16 x i32> %a) {
222 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i32u':
223 ; SSE2: Found an estimated cost of 104 for instruction: %ctlz
224 ; SSE42: Found an estimated cost of 72 for instruction: %ctlz
225 ; AVX1: Found an estimated cost of 76 for instruction: %ctlz
226 ; AVX2: Found an estimated cost of 36 for instruction: %ctlz
227 ; AVX512F: Found an estimated cost of 35 for instruction: %ctlz
228 ; AVX512BW: Found an estimated cost of 22 for instruction: %ctlz
229 ; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz
230 %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 1)
231 ret <16 x i32> %ctlz
156232 }
157233
158234 define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
160236 ; SSE2: Found an estimated cost of 20 for instruction: %ctlz
161237 ; SSE42: Found an estimated cost of 14 for instruction: %ctlz
162238 ; AVX: Found an estimated cost of 14 for instruction: %ctlz
239 ; AVX512: Found an estimated cost of 14 for instruction: %ctlz
240 ; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
163241 %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0)
164242 ret <8 x i16> %ctlz
165243 }
169247 ; SSE2: Found an estimated cost of 20 for instruction: %ctlz
170248 ; SSE42: Found an estimated cost of 14 for instruction: %ctlz
171249 ; AVX: Found an estimated cost of 14 for instruction: %ctlz
250 ; AVX512: Found an estimated cost of 14 for instruction: %ctlz
251 ; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
172252 %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1)
173253 ret <8 x i16> %ctlz
174254 }
179259 ; SSE42: Found an estimated cost of 28 for instruction: %ctlz
180260 ; AVX1: Found an estimated cost of 30 for instruction: %ctlz
181261 ; AVX2: Found an estimated cost of 14 for instruction: %ctlz
262 ; AVX512: Found an estimated cost of 14 for instruction: %ctlz
263 ; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
182264 %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0)
183265 ret <16 x i16> %ctlz
184266 }
189271 ; SSE42: Found an estimated cost of 28 for instruction: %ctlz
190272 ; AVX1: Found an estimated cost of 30 for instruction: %ctlz
191273 ; AVX2: Found an estimated cost of 14 for instruction: %ctlz
274 ; AVX512: Found an estimated cost of 14 for instruction: %ctlz
275 ; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
192276 %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1)
193277 ret <16 x i16> %ctlz
278 }
279
280 define <32 x i16> @var_ctlz_v32i16(<32 x i16> %a) {
281 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i16':
282 ; SSE2: Found an estimated cost of 80 for instruction: %ctlz
283 ; SSE42: Found an estimated cost of 56 for instruction: %ctlz
284 ; AVX1: Found an estimated cost of 60 for instruction: %ctlz
285 ; AVX2: Found an estimated cost of 28 for instruction: %ctlz
286 ; AVX512F: Found an estimated cost of 28 for instruction: %ctlz
287 ; AVX512BW: Found an estimated cost of 18 for instruction: %ctlz
288 ; AVX512CD: Found an estimated cost of 8 for instruction: %ctlz
289 %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 0)
290 ret <32 x i16> %ctlz
291 }
292
293 define <32 x i16> @var_ctlz_v32i16u(<32 x i16> %a) {
294 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i16u':
295 ; SSE2: Found an estimated cost of 80 for instruction: %ctlz
296 ; SSE42: Found an estimated cost of 56 for instruction: %ctlz
297 ; AVX1: Found an estimated cost of 60 for instruction: %ctlz
298 ; AVX2: Found an estimated cost of 28 for instruction: %ctlz
299 ; AVX512F: Found an estimated cost of 28 for instruction: %ctlz
300 ; AVX512BW: Found an estimated cost of 18 for instruction: %ctlz
301 ; AVX512CD: Found an estimated cost of 8 for instruction: %ctlz
302 %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 1)
303 ret <32 x i16> %ctlz
194304 }
195305
196306 define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
198308 ; SSE2: Found an estimated cost of 17 for instruction: %ctlz
199309 ; SSE42: Found an estimated cost of 9 for instruction: %ctlz
200310 ; AVX: Found an estimated cost of 9 for instruction: %ctlz
311 ; AVX512: Found an estimated cost of 9 for instruction: %ctlz
312 ; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
201313 %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0)
202314 ret <16 x i8> %ctlz
203315 }
207319 ; SSE2: Found an estimated cost of 17 for instruction: %ctlz
208320 ; SSE42: Found an estimated cost of 9 for instruction: %ctlz
209321 ; AVX: Found an estimated cost of 9 for instruction: %ctlz
322 ; AVX512: Found an estimated cost of 9 for instruction: %ctlz
323 ; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz
210324 %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1)
211325 ret <16 x i8> %ctlz
212326 }
217331 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
218332 ; AVX1: Found an estimated cost of 20 for instruction: %ctlz
219333 ; AVX2: Found an estimated cost of 9 for instruction: %ctlz
334 ; AVX512: Found an estimated cost of 9 for instruction: %ctlz
335 ; AVX512CD: Found an estimated cost of 10 for instruction: %ctlz
220336 %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0)
221337 ret <32 x i8> %ctlz
222338 }
227343 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
228344 ; AVX1: Found an estimated cost of 20 for instruction: %ctlz
229345 ; AVX2: Found an estimated cost of 9 for instruction: %ctlz
346 ; AVX512: Found an estimated cost of 9 for instruction: %ctlz
347 ; AVX512CD: Found an estimated cost of 10 for instruction: %ctlz
230348 %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1)
231349 ret <32 x i8> %ctlz
232350 }
351
352 define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) {
353 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v64i8':
354 ; SSE2: Found an estimated cost of 68 for instruction: %ctlz
355 ; SSE42: Found an estimated cost of 36 for instruction: %ctlz
356 ; AVX1: Found an estimated cost of 40 for instruction: %ctlz
357 ; AVX2: Found an estimated cost of 18 for instruction: %ctlz
358 ; AVX512F: Found an estimated cost of 18 for instruction: %ctlz
359 ; AVX512BW: Found an estimated cost of 17 for instruction: %ctlz
360 ; AVX512CD: Found an estimated cost of 20 for instruction: %ctlz
361 %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 0)
362 ret <64 x i8> %ctlz
363 }
364
365 define <64 x i8> @var_ctlz_v64i8u(<64 x i8> %a) {
366 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v64i8u':
367 ; SSE2: Found an estimated cost of 68 for instruction: %ctlz
368 ; SSE42: Found an estimated cost of 36 for instruction: %ctlz
369 ; AVX1: Found an estimated cost of 40 for instruction: %ctlz
370 ; AVX2: Found an estimated cost of 18 for instruction: %ctlz
371 ; AVX512F: Found an estimated cost of 18 for instruction: %ctlz
372 ; AVX512BW: Found an estimated cost of 17 for instruction: %ctlz
373 ; AVX512CD: Found an estimated cost of 20 for instruction: %ctlz
374 %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 1)
375 ret <64 x i8> %ctlz
376 }