llvm.org GIT mirror llvm / ace0d94
[X86] Split ctpop/ctlz/cttz cost tests This will make things a lot easier to test all the permutations of avx512 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303290 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
4 changed file(s) with 599 addition(s) and 587 deletion(s). Raw diff Collapse all Expand all
+0
-587
test/Analysis/CostModel/X86/ctbits-cost.ll less more
None ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT
1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
6
7 ; Verify the cost of scalar population count instructions.
8
9 declare i64 @llvm.ctpop.i64(i64)
10 declare i32 @llvm.ctpop.i32(i32)
11 declare i16 @llvm.ctpop.i16(i16)
12 declare i8 @llvm.ctpop.i8(i8)
13
14 define i64 @var_ctpop_i64(i64 %a) {
15 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i64':
16 ; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
17 ; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
18 %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
19 ret i64 %ctpop
20 }
21
22 define i32 @var_ctpop_i32(i32 %a) {
23 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i32':
24 ; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
25 ; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
26 %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
27 ret i32 %ctpop
28 }
29
30 define i16 @var_ctpop_i16(i16 %a) {
31 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i16':
32 ; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
33 ; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
34 %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
35 ret i16 %ctpop
36 }
37
38 define i8 @var_ctpop_i8(i8 %a) {
39 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i8':
40 ; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
41 ; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
42 %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
43 ret i8 %ctpop
44 }
45
46 ; Verify the cost of vector population count instructions.
47
48 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
49 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
50 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
51 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
52
53 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
54 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
55 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
56 declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
57
58 define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
59 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v2i64':
60 ; SSE2: Found an estimated cost of 12 for instruction: %ctpop
61 ; SSE42: Found an estimated cost of 7 for instruction: %ctpop
62 ; AVX: Found an estimated cost of 7 for instruction: %ctpop
63 %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
64 ret <2 x i64> %ctpop
65 }
66
67 define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
68 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i64':
69 ; SSE2: Found an estimated cost of 24 for instruction: %ctpop
70 ; SSE42: Found an estimated cost of 14 for instruction: %ctpop
71 ; AVX1: Found an estimated cost of 16 for instruction: %ctpop
72 ; AVX2: Found an estimated cost of 7 for instruction: %ctpop
73 %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
74 ret <4 x i64> %ctpop
75 }
76
77 define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
78 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i32':
79 ; SSE2: Found an estimated cost of 15 for instruction: %ctpop
80 ; SSE42: Found an estimated cost of 11 for instruction: %ctpop
81 ; AVX: Found an estimated cost of 11 for instruction: %ctpop
82 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
83 ret <4 x i32> %ctpop
84 }
85
86 define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
87 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i32':
88 ; SSE2: Found an estimated cost of 30 for instruction: %ctpop
89 ; SSE42: Found an estimated cost of 22 for instruction: %ctpop
90 ; AVX1: Found an estimated cost of 24 for instruction: %ctpop
91 ; AVX2: Found an estimated cost of 11 for instruction: %ctpop
92 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
93 ret <8 x i32> %ctpop
94 }
95
96 define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
97 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i16':
98 ; SSE2: Found an estimated cost of 13 for instruction: %ctpop
99 ; SSE42: Found an estimated cost of 9 for instruction: %ctpop
100 ; AVX: Found an estimated cost of 9 for instruction: %ctpop
101 %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
102 ret <8 x i16> %ctpop
103 }
104
105 define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
106 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i16':
107 ; SSE2: Found an estimated cost of 26 for instruction: %ctpop
108 ; SSE42: Found an estimated cost of 18 for instruction: %ctpop
109 ; AVX1: Found an estimated cost of 20 for instruction: %ctpop
110 ; AVX2: Found an estimated cost of 9 for instruction: %ctpop
111 %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
112 ret <16 x i16> %ctpop
113 }
114
115 define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
116 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i8':
117 ; SSE2: Found an estimated cost of 10 for instruction: %ctpop
118 ; SSE42: Found an estimated cost of 6 for instruction: %ctpop
119 ; AVX: Found an estimated cost of 6 for instruction: %ctpop
120 %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
121 ret <16 x i8> %ctpop
122 }
123
124 define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
125 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i8':
126 ; SSE2: Found an estimated cost of 20 for instruction: %ctpop
127 ; SSE42: Found an estimated cost of 12 for instruction: %ctpop
128 ; AVX1: Found an estimated cost of 14 for instruction: %ctpop
129 ; AVX2: Found an estimated cost of 6 for instruction: %ctpop
130 %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
131 ret <32 x i8> %ctpop
132 }
133
134 ; Verify the cost of scalar leading zero count instructions.
135
136 declare i64 @llvm.ctlz.i64(i64, i1)
137 declare i32 @llvm.ctlz.i32(i32, i1)
138 declare i16 @llvm.ctlz.i16(i16, i1)
139 declare i8 @llvm.ctlz.i8(i8, i1)
140
141 define i64 @var_ctlz_i64(i64 %a) {
142 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64':
143 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
144 %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 0)
145 ret i64 %ctlz
146 }
147
148 define i64 @var_ctlz_i64u(i64 %a) {
149 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64u':
150 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
151 %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 1)
152 ret i64 %ctlz
153 }
154
155 define i32 @var_ctlz_i32(i32 %a) {
156 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32':
157 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
158 %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 0)
159 ret i32 %ctlz
160 }
161
162 define i32 @var_ctlz_i32u(i32 %a) {
163 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32u':
164 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
165 %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 1)
166 ret i32 %ctlz
167 }
168
169 define i16 @var_ctlz_i16(i16 %a) {
170 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16':
171 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
172 %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 0)
173 ret i16 %ctlz
174 }
175
176 define i16 @var_ctlz_i16u(i16 %a) {
177 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16u':
178 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
179 %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 1)
180 ret i16 %ctlz
181 }
182
183 define i8 @var_ctlz_i8(i8 %a) {
184 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8':
185 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
186 %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 0)
187 ret i8 %ctlz
188 }
189
190 define i8 @var_ctlz_i8u(i8 %a) {
191 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8u':
192 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
193 %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 1)
194 ret i8 %ctlz
195 }
196
197 ; Verify the cost of vector leading zero count instructions.
198
199 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
200 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
201 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
202 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
203
204 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
205 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
206 declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
207 declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
208
209 define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
210 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64':
211 ; SSE2: Found an estimated cost of 25 for instruction: %ctlz
212 ; SSE42: Found an estimated cost of 23 for instruction: %ctlz
213 ; AVX: Found an estimated cost of 23 for instruction: %ctlz
214 %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0)
215 ret <2 x i64> %ctlz
216 }
217
218 define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) {
219 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64u':
220 ; SSE2: Found an estimated cost of 25 for instruction: %ctlz
221 ; SSE42: Found an estimated cost of 23 for instruction: %ctlz
222 ; AVX: Found an estimated cost of 23 for instruction: %ctlz
223 %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1)
224 ret <2 x i64> %ctlz
225 }
226
227 define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) {
228 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64':
229 ; SSE2: Found an estimated cost of 50 for instruction: %ctlz
230 ; SSE42: Found an estimated cost of 46 for instruction: %ctlz
231 ; AVX1: Found an estimated cost of 48 for instruction: %ctlz
232 ; AVX2: Found an estimated cost of 23 for instruction: %ctlz
233 %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0)
234 ret <4 x i64> %ctlz
235 }
236
237 define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) {
238 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64u':
239 ; SSE2: Found an estimated cost of 50 for instruction: %ctlz
240 ; SSE42: Found an estimated cost of 46 for instruction: %ctlz
241 ; AVX1: Found an estimated cost of 48 for instruction: %ctlz
242 ; AVX2: Found an estimated cost of 23 for instruction: %ctlz
243 %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1)
244 ret <4 x i64> %ctlz
245 }
246
247 define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
248 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32':
249 ; SSE2: Found an estimated cost of 26 for instruction: %ctlz
250 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
251 ; AVX: Found an estimated cost of 18 for instruction: %ctlz
252 %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0)
253 ret <4 x i32> %ctlz
254 }
255
256 define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) {
257 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32u':
258 ; SSE2: Found an estimated cost of 26 for instruction: %ctlz
259 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
260 ; AVX: Found an estimated cost of 18 for instruction: %ctlz
261 %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1)
262 ret <4 x i32> %ctlz
263 }
264
265 define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) {
266 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32':
267 ; SSE2: Found an estimated cost of 52 for instruction: %ctlz
268 ; SSE42: Found an estimated cost of 36 for instruction: %ctlz
269 ; AVX1: Found an estimated cost of 38 for instruction: %ctlz
270 ; AVX2: Found an estimated cost of 18 for instruction: %ctlz
271 %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0)
272 ret <8 x i32> %ctlz
273 }
274
275 define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) {
276 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32u':
277 ; SSE2: Found an estimated cost of 52 for instruction: %ctlz
278 ; SSE42: Found an estimated cost of 36 for instruction: %ctlz
279 ; AVX1: Found an estimated cost of 38 for instruction: %ctlz
280 ; AVX2: Found an estimated cost of 18 for instruction: %ctlz
281 %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1)
282 ret <8 x i32> %ctlz
283 }
284
285 define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
286 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16':
287 ; SSE2: Found an estimated cost of 20 for instruction: %ctlz
288 ; SSE42: Found an estimated cost of 14 for instruction: %ctlz
289 ; AVX: Found an estimated cost of 14 for instruction: %ctlz
290 %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0)
291 ret <8 x i16> %ctlz
292 }
293
294 define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) {
295 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16u':
296 ; SSE2: Found an estimated cost of 20 for instruction: %ctlz
297 ; SSE42: Found an estimated cost of 14 for instruction: %ctlz
298 ; AVX: Found an estimated cost of 14 for instruction: %ctlz
299 %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1)
300 ret <8 x i16> %ctlz
301 }
302
303 define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) {
304 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16':
305 ; SSE2: Found an estimated cost of 40 for instruction: %ctlz
306 ; SSE42: Found an estimated cost of 28 for instruction: %ctlz
307 ; AVX1: Found an estimated cost of 30 for instruction: %ctlz
308 ; AVX2: Found an estimated cost of 14 for instruction: %ctlz
309 %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0)
310 ret <16 x i16> %ctlz
311 }
312
313 define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) {
314 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16u':
315 ; SSE2: Found an estimated cost of 40 for instruction: %ctlz
316 ; SSE42: Found an estimated cost of 28 for instruction: %ctlz
317 ; AVX1: Found an estimated cost of 30 for instruction: %ctlz
318 ; AVX2: Found an estimated cost of 14 for instruction: %ctlz
319 %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1)
320 ret <16 x i16> %ctlz
321 }
322
323 define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
324 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8':
325 ; SSE2: Found an estimated cost of 17 for instruction: %ctlz
326 ; SSE42: Found an estimated cost of 9 for instruction: %ctlz
327 ; AVX: Found an estimated cost of 9 for instruction: %ctlz
328 %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0)
329 ret <16 x i8> %ctlz
330 }
331
332 define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) {
333 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8u':
334 ; SSE2: Found an estimated cost of 17 for instruction: %ctlz
335 ; SSE42: Found an estimated cost of 9 for instruction: %ctlz
336 ; AVX: Found an estimated cost of 9 for instruction: %ctlz
337 %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1)
338 ret <16 x i8> %ctlz
339 }
340
341 define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) {
342 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8':
343 ; SSE2: Found an estimated cost of 34 for instruction: %ctlz
344 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
345 ; AVX1: Found an estimated cost of 20 for instruction: %ctlz
346 ; AVX2: Found an estimated cost of 9 for instruction: %ctlz
347 %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0)
348 ret <32 x i8> %ctlz
349 }
350
351 define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) {
352 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8u':
353 ; SSE2: Found an estimated cost of 34 for instruction: %ctlz
354 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
355 ; AVX1: Found an estimated cost of 20 for instruction: %ctlz
356 ; AVX2: Found an estimated cost of 9 for instruction: %ctlz
357 %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1)
358 ret <32 x i8> %ctlz
359 }
360
361 ; Verify the cost of scalar trailing zero count instructions.
362
363 declare i64 @llvm.cttz.i64(i64, i1)
364 declare i32 @llvm.cttz.i32(i32, i1)
365 declare i16 @llvm.cttz.i16(i16, i1)
366 declare i8 @llvm.cttz.i8(i8, i1)
367
368 define i64 @var_cttz_i64(i64 %a) {
369 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64':
370 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
371 %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 0)
372 ret i64 %cttz
373 }
374
375 define i64 @var_cttz_i64u(i64 %a) {
376 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64u':
377 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
378 %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 1)
379 ret i64 %cttz
380 }
381
382 define i32 @var_cttz_i32(i32 %a) {
383 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32':
384 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
385 %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0)
386 ret i32 %cttz
387 }
388
389 define i32 @var_cttz_i32u(i32 %a) {
390 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32u':
391 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
392 %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 1)
393 ret i32 %cttz
394 }
395
396 define i16 @var_cttz_i16(i16 %a) {
397 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16':
398 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
399 %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 0)
400 ret i16 %cttz
401 }
402
403 define i16 @var_cttz_i16u(i16 %a) {
404 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16u':
405 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
406 %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 1)
407 ret i16 %cttz
408 }
409
410 define i8 @var_cttz_i8(i8 %a) {
411 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8':
412 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
413 %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 0)
414 ret i8 %cttz
415 }
416
417 define i8 @var_cttz_i8u(i8 %a) {
418 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8u':
419 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
420 %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 1)
421 ret i8 %cttz
422 }
423
424 ; Verify the cost of vector trailing zero count instructions.
425
426 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
427 declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
428 declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
429 declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
430
431 declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
432 declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
433 declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
434 declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
435
436 define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
437 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64':
438 ; SSE2: Found an estimated cost of 14 for instruction: %cttz
439 ; SSE42: Found an estimated cost of 10 for instruction: %cttz
440 ; AVX: Found an estimated cost of 10 for instruction: %cttz
441 %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
442 ret <2 x i64> %cttz
443 }
444
445 define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
446 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64u':
447 ; SSE2: Found an estimated cost of 14 for instruction: %cttz
448 ; SSE42: Found an estimated cost of 10 for instruction: %cttz
449 ; AVX: Found an estimated cost of 10 for instruction: %cttz
450 %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
451 ret <2 x i64> %cttz
452 }
453
454 define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
455 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64':
456 ; SSE2: Found an estimated cost of 28 for instruction: %cttz
457 ; SSE42: Found an estimated cost of 20 for instruction: %cttz
458 ; AVX1: Found an estimated cost of 22 for instruction: %cttz
459 ; AVX2: Found an estimated cost of 10 for instruction: %cttz
460 %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
461 ret <4 x i64> %cttz
462 }
463
464 define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
465 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64u':
466 ; SSE2: Found an estimated cost of 28 for instruction: %cttz
467 ; SSE42: Found an estimated cost of 20 for instruction: %cttz
468 ; AVX1: Found an estimated cost of 22 for instruction: %cttz
469 ; AVX2: Found an estimated cost of 10 for instruction: %cttz
470 %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
471 ret <4 x i64> %cttz
472 }
473
474 define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
475 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32':
476 ; SSE2: Found an estimated cost of 18 for instruction: %cttz
477 ; SSE42: Found an estimated cost of 14 for instruction: %cttz
478 ; AVX: Found an estimated cost of 14 for instruction: %cttz
479 %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
480 ret <4 x i32> %cttz
481 }
482
483 define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
484 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32u':
485 ; SSE2: Found an estimated cost of 18 for instruction: %cttz
486 ; SSE42: Found an estimated cost of 14 for instruction: %cttz
487 ; AVX: Found an estimated cost of 14 for instruction: %cttz
488 %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
489 ret <4 x i32> %cttz
490 }
491
492 define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
493 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32':
494 ; SSE2: Found an estimated cost of 36 for instruction: %cttz
495 ; SSE42: Found an estimated cost of 28 for instruction: %cttz
496 ; AVX1: Found an estimated cost of 30 for instruction: %cttz
497 ; AVX2: Found an estimated cost of 14 for instruction: %cttz
498 %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
499 ret <8 x i32> %cttz
500 }
501
502 define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
503 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32u':
504 ; SSE2: Found an estimated cost of 36 for instruction: %cttz
505 ; SSE42: Found an estimated cost of 28 for instruction: %cttz
506 ; AVX1: Found an estimated cost of 30 for instruction: %cttz
507 ; AVX2: Found an estimated cost of 14 for instruction: %cttz
508 %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
509 ret <8 x i32> %cttz
510 }
511
512 define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
513 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16':
514 ; SSE2: Found an estimated cost of 16 for instruction: %cttz
515 ; SSE42: Found an estimated cost of 12 for instruction: %cttz
516 ; AVX: Found an estimated cost of 12 for instruction: %cttz
517 %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
518 ret <8 x i16> %cttz
519 }
520
521 define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
522 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16u':
523 ; SSE2: Found an estimated cost of 16 for instruction: %cttz
524 ; SSE42: Found an estimated cost of 12 for instruction: %cttz
525 ; AVX: Found an estimated cost of 12 for instruction: %cttz
526 %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
527 ret <8 x i16> %cttz
528 }
529
530 define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
531 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16':
532 ; SSE2: Found an estimated cost of 32 for instruction: %cttz
533 ; SSE42: Found an estimated cost of 24 for instruction: %cttz
534 ; AVX1: Found an estimated cost of 26 for instruction: %cttz
535 ; AVX2: Found an estimated cost of 12 for instruction: %cttz
536 %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
537 ret <16 x i16> %cttz
538 }
539
540 define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
541 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16u':
542 ; SSE2: Found an estimated cost of 32 for instruction: %cttz
543 ; SSE42: Found an estimated cost of 24 for instruction: %cttz
544 ; AVX1: Found an estimated cost of 26 for instruction: %cttz
545 ; AVX2: Found an estimated cost of 12 for instruction: %cttz
546 %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
547 ret <16 x i16> %cttz
548 }
549
550 define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
551 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8':
552 ; SSE2: Found an estimated cost of 13 for instruction: %cttz
553 ; SSE42: Found an estimated cost of 9 for instruction: %cttz
554 ; AVX: Found an estimated cost of 9 for instruction: %cttz
555 %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
556 ret <16 x i8> %cttz
557 }
558
559 define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
560 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8u':
561 ; SSE2: Found an estimated cost of 13 for instruction: %cttz
562 ; SSE42: Found an estimated cost of 9 for instruction: %cttz
563 ; AVX: Found an estimated cost of 9 for instruction: %cttz
564 %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
565 ret <16 x i8> %cttz
566 }
567
568 define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
569 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8':
570 ; SSE2: Found an estimated cost of 26 for instruction: %cttz
571 ; SSE42: Found an estimated cost of 18 for instruction: %cttz
572 ; AVX1: Found an estimated cost of 20 for instruction: %cttz
573 ; AVX2: Found an estimated cost of 9 for instruction: %cttz
574 %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
575 ret <32 x i8> %cttz
576 }
577
578 define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
579 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8u':
580 ; SSE2: Found an estimated cost of 26 for instruction: %cttz
581 ; SSE42: Found an estimated cost of 18 for instruction: %cttz
582 ; AVX1: Found an estimated cost of 20 for instruction: %cttz
583 ; AVX2: Found an estimated cost of 9 for instruction: %cttz
584 %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
585 ret <32 x i8> %cttz
586 }
0 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT
1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
6
7 ; Verify the cost of scalar leading zero count instructions.
8
9 declare i64 @llvm.ctlz.i64(i64, i1)
10 declare i32 @llvm.ctlz.i32(i32, i1)
11 declare i16 @llvm.ctlz.i16(i16, i1)
12 declare i8 @llvm.ctlz.i8(i8, i1)
13
14 define i64 @var_ctlz_i64(i64 %a) {
15 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64':
16 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
17 %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 0)
18 ret i64 %ctlz
19 }
20
21 define i64 @var_ctlz_i64u(i64 %a) {
22 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64u':
23 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
24 %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 1)
25 ret i64 %ctlz
26 }
27
28 define i32 @var_ctlz_i32(i32 %a) {
29 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32':
30 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
31 %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 0)
32 ret i32 %ctlz
33 }
34
35 define i32 @var_ctlz_i32u(i32 %a) {
36 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32u':
37 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
38 %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 1)
39 ret i32 %ctlz
40 }
41
42 define i16 @var_ctlz_i16(i16 %a) {
43 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16':
44 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
45 %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 0)
46 ret i16 %ctlz
47 }
48
49 define i16 @var_ctlz_i16u(i16 %a) {
50 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16u':
51 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
52 %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 1)
53 ret i16 %ctlz
54 }
55
56 define i8 @var_ctlz_i8(i8 %a) {
57 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8':
58 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
59 %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 0)
60 ret i8 %ctlz
61 }
62
63 define i8 @var_ctlz_i8u(i8 %a) {
64 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8u':
65 ; CHECK: Found an estimated cost of 1 for instruction: %ctlz
66 %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 1)
67 ret i8 %ctlz
68 }
69
70 ; Verify the cost of vector leading zero count instructions.
71
72 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
73 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
74 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
75 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
76
77 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
78 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
79 declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
80 declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
81
82 define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) {
83 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64':
84 ; SSE2: Found an estimated cost of 25 for instruction: %ctlz
85 ; SSE42: Found an estimated cost of 23 for instruction: %ctlz
86 ; AVX: Found an estimated cost of 23 for instruction: %ctlz
87 %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0)
88 ret <2 x i64> %ctlz
89 }
90
91 define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) {
92 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64u':
93 ; SSE2: Found an estimated cost of 25 for instruction: %ctlz
94 ; SSE42: Found an estimated cost of 23 for instruction: %ctlz
95 ; AVX: Found an estimated cost of 23 for instruction: %ctlz
96 %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1)
97 ret <2 x i64> %ctlz
98 }
99
100 define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) {
101 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64':
102 ; SSE2: Found an estimated cost of 50 for instruction: %ctlz
103 ; SSE42: Found an estimated cost of 46 for instruction: %ctlz
104 ; AVX1: Found an estimated cost of 48 for instruction: %ctlz
105 ; AVX2: Found an estimated cost of 23 for instruction: %ctlz
106 %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0)
107 ret <4 x i64> %ctlz
108 }
109
110 define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) {
111 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64u':
112 ; SSE2: Found an estimated cost of 50 for instruction: %ctlz
113 ; SSE42: Found an estimated cost of 46 for instruction: %ctlz
114 ; AVX1: Found an estimated cost of 48 for instruction: %ctlz
115 ; AVX2: Found an estimated cost of 23 for instruction: %ctlz
116 %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1)
117 ret <4 x i64> %ctlz
118 }
119
120 define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) {
121 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32':
122 ; SSE2: Found an estimated cost of 26 for instruction: %ctlz
123 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
124 ; AVX: Found an estimated cost of 18 for instruction: %ctlz
125 %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0)
126 ret <4 x i32> %ctlz
127 }
128
129 define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) {
130 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32u':
131 ; SSE2: Found an estimated cost of 26 for instruction: %ctlz
132 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
133 ; AVX: Found an estimated cost of 18 for instruction: %ctlz
134 %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1)
135 ret <4 x i32> %ctlz
136 }
137
138 define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) {
139 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32':
140 ; SSE2: Found an estimated cost of 52 for instruction: %ctlz
141 ; SSE42: Found an estimated cost of 36 for instruction: %ctlz
142 ; AVX1: Found an estimated cost of 38 for instruction: %ctlz
143 ; AVX2: Found an estimated cost of 18 for instruction: %ctlz
144 %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0)
145 ret <8 x i32> %ctlz
146 }
147
148 define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) {
149 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32u':
150 ; SSE2: Found an estimated cost of 52 for instruction: %ctlz
151 ; SSE42: Found an estimated cost of 36 for instruction: %ctlz
152 ; AVX1: Found an estimated cost of 38 for instruction: %ctlz
153 ; AVX2: Found an estimated cost of 18 for instruction: %ctlz
154 %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1)
155 ret <8 x i32> %ctlz
156 }
157
158 define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) {
159 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16':
160 ; SSE2: Found an estimated cost of 20 for instruction: %ctlz
161 ; SSE42: Found an estimated cost of 14 for instruction: %ctlz
162 ; AVX: Found an estimated cost of 14 for instruction: %ctlz
163 %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0)
164 ret <8 x i16> %ctlz
165 }
166
167 define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) {
168 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16u':
169 ; SSE2: Found an estimated cost of 20 for instruction: %ctlz
170 ; SSE42: Found an estimated cost of 14 for instruction: %ctlz
171 ; AVX: Found an estimated cost of 14 for instruction: %ctlz
172 %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1)
173 ret <8 x i16> %ctlz
174 }
175
176 define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) {
177 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16':
178 ; SSE2: Found an estimated cost of 40 for instruction: %ctlz
179 ; SSE42: Found an estimated cost of 28 for instruction: %ctlz
180 ; AVX1: Found an estimated cost of 30 for instruction: %ctlz
181 ; AVX2: Found an estimated cost of 14 for instruction: %ctlz
182 %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0)
183 ret <16 x i16> %ctlz
184 }
185
186 define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) {
187 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16u':
188 ; SSE2: Found an estimated cost of 40 for instruction: %ctlz
189 ; SSE42: Found an estimated cost of 28 for instruction: %ctlz
190 ; AVX1: Found an estimated cost of 30 for instruction: %ctlz
191 ; AVX2: Found an estimated cost of 14 for instruction: %ctlz
192 %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1)
193 ret <16 x i16> %ctlz
194 }
195
196 define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) {
197 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8':
198 ; SSE2: Found an estimated cost of 17 for instruction: %ctlz
199 ; SSE42: Found an estimated cost of 9 for instruction: %ctlz
200 ; AVX: Found an estimated cost of 9 for instruction: %ctlz
201 %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0)
202 ret <16 x i8> %ctlz
203 }
204
205 define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) {
206 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8u':
207 ; SSE2: Found an estimated cost of 17 for instruction: %ctlz
208 ; SSE42: Found an estimated cost of 9 for instruction: %ctlz
209 ; AVX: Found an estimated cost of 9 for instruction: %ctlz
210 %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1)
211 ret <16 x i8> %ctlz
212 }
213
214 define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) {
215 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8':
216 ; SSE2: Found an estimated cost of 34 for instruction: %ctlz
217 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
218 ; AVX1: Found an estimated cost of 20 for instruction: %ctlz
219 ; AVX2: Found an estimated cost of 9 for instruction: %ctlz
220 %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0)
221 ret <32 x i8> %ctlz
222 }
223
224 define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) {
225 ; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8u':
226 ; SSE2: Found an estimated cost of 34 for instruction: %ctlz
227 ; SSE42: Found an estimated cost of 18 for instruction: %ctlz
228 ; AVX1: Found an estimated cost of 20 for instruction: %ctlz
229 ; AVX2: Found an estimated cost of 9 for instruction: %ctlz
230 %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1)
231 ret <32 x i8> %ctlz
232 }
0 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT
1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
6
7 ; Verify the cost of scalar population count instructions.
8
9 declare i64 @llvm.ctpop.i64(i64)
10 declare i32 @llvm.ctpop.i32(i32)
11 declare i16 @llvm.ctpop.i16(i16)
12 declare i8 @llvm.ctpop.i8(i8)
13
14 define i64 @var_ctpop_i64(i64 %a) {
15 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i64':
16 ; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
17 ; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
18 %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
19 ret i64 %ctpop
20 }
21
22 define i32 @var_ctpop_i32(i32 %a) {
23 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i32':
24 ; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
25 ; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
26 %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
27 ret i32 %ctpop
28 }
29
30 define i16 @var_ctpop_i16(i16 %a) {
31 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i16':
32 ; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
33 ; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
34 %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
35 ret i16 %ctpop
36 }
37
38 define i8 @var_ctpop_i8(i8 %a) {
39 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i8':
40 ; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop
41 ; POPCNT: Found an estimated cost of 1 for instruction: %ctpop
42 %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
43 ret i8 %ctpop
44 }
45
46 ; Verify the cost of vector population count instructions.
47
48 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
49 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
50 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
51 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
52
53 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
54 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
55 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
56 declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
57
58 define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) {
59 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v2i64':
60 ; SSE2: Found an estimated cost of 12 for instruction: %ctpop
61 ; SSE42: Found an estimated cost of 7 for instruction: %ctpop
62 ; AVX: Found an estimated cost of 7 for instruction: %ctpop
63 %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
64 ret <2 x i64> %ctpop
65 }
66
67 define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) {
68 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i64':
69 ; SSE2: Found an estimated cost of 24 for instruction: %ctpop
70 ; SSE42: Found an estimated cost of 14 for instruction: %ctpop
71 ; AVX1: Found an estimated cost of 16 for instruction: %ctpop
72 ; AVX2: Found an estimated cost of 7 for instruction: %ctpop
73 %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
74 ret <4 x i64> %ctpop
75 }
76
77 define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) {
78 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i32':
79 ; SSE2: Found an estimated cost of 15 for instruction: %ctpop
80 ; SSE42: Found an estimated cost of 11 for instruction: %ctpop
81 ; AVX: Found an estimated cost of 11 for instruction: %ctpop
82 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
83 ret <4 x i32> %ctpop
84 }
85
86 define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) {
87 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i32':
88 ; SSE2: Found an estimated cost of 30 for instruction: %ctpop
89 ; SSE42: Found an estimated cost of 22 for instruction: %ctpop
90 ; AVX1: Found an estimated cost of 24 for instruction: %ctpop
91 ; AVX2: Found an estimated cost of 11 for instruction: %ctpop
92 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
93 ret <8 x i32> %ctpop
94 }
95
96 define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) {
97 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i16':
98 ; SSE2: Found an estimated cost of 13 for instruction: %ctpop
99 ; SSE42: Found an estimated cost of 9 for instruction: %ctpop
100 ; AVX: Found an estimated cost of 9 for instruction: %ctpop
101 %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
102 ret <8 x i16> %ctpop
103 }
104
105 define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) {
106 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i16':
107 ; SSE2: Found an estimated cost of 26 for instruction: %ctpop
108 ; SSE42: Found an estimated cost of 18 for instruction: %ctpop
109 ; AVX1: Found an estimated cost of 20 for instruction: %ctpop
110 ; AVX2: Found an estimated cost of 9 for instruction: %ctpop
111 %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
112 ret <16 x i16> %ctpop
113 }
114
115 define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) {
116 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i8':
117 ; SSE2: Found an estimated cost of 10 for instruction: %ctpop
118 ; SSE42: Found an estimated cost of 6 for instruction: %ctpop
119 ; AVX: Found an estimated cost of 6 for instruction: %ctpop
120 %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
121 ret <16 x i8> %ctpop
122 }
123
124 define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) {
125 ; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i8':
126 ; SSE2: Found an estimated cost of 20 for instruction: %ctpop
127 ; SSE42: Found an estimated cost of 12 for instruction: %ctpop
128 ; AVX1: Found an estimated cost of 14 for instruction: %ctpop
129 ; AVX2: Found an estimated cost of 6 for instruction: %ctpop
130 %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
131 ret <32 x i8> %ctpop
132 }
0 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT
1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT
6
7 ; Verify the cost of scalar trailing zero count instructions.
8
9 declare i64 @llvm.cttz.i64(i64, i1)
10 declare i32 @llvm.cttz.i32(i32, i1)
11 declare i16 @llvm.cttz.i16(i16, i1)
12 declare i8 @llvm.cttz.i8(i8, i1)
13
14 define i64 @var_cttz_i64(i64 %a) {
15 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64':
16 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
17 %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 0)
18 ret i64 %cttz
19 }
20
21 define i64 @var_cttz_i64u(i64 %a) {
22 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64u':
23 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
24 %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 1)
25 ret i64 %cttz
26 }
27
28 define i32 @var_cttz_i32(i32 %a) {
29 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32':
30 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
31 %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0)
32 ret i32 %cttz
33 }
34
35 define i32 @var_cttz_i32u(i32 %a) {
36 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32u':
37 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
38 %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 1)
39 ret i32 %cttz
40 }
41
42 define i16 @var_cttz_i16(i16 %a) {
43 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16':
44 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
45 %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 0)
46 ret i16 %cttz
47 }
48
49 define i16 @var_cttz_i16u(i16 %a) {
50 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16u':
51 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
52 %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 1)
53 ret i16 %cttz
54 }
55
56 define i8 @var_cttz_i8(i8 %a) {
57 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8':
58 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
59 %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 0)
60 ret i8 %cttz
61 }
62
63 define i8 @var_cttz_i8u(i8 %a) {
64 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8u':
65 ; CHECK: Found an estimated cost of 1 for instruction: %cttz
66 %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 1)
67 ret i8 %cttz
68 }
69
70 ; Verify the cost of vector trailing zero count instructions.
71
72 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
73 declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
74 declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
75 declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
76
77 declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1)
78 declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
79 declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1)
80 declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1)
81
82 define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) {
83 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64':
84 ; SSE2: Found an estimated cost of 14 for instruction: %cttz
85 ; SSE42: Found an estimated cost of 10 for instruction: %cttz
86 ; AVX: Found an estimated cost of 10 for instruction: %cttz
87 %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0)
88 ret <2 x i64> %cttz
89 }
90
91 define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) {
92 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64u':
93 ; SSE2: Found an estimated cost of 14 for instruction: %cttz
94 ; SSE42: Found an estimated cost of 10 for instruction: %cttz
95 ; AVX: Found an estimated cost of 10 for instruction: %cttz
96 %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1)
97 ret <2 x i64> %cttz
98 }
99
100 define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) {
101 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64':
102 ; SSE2: Found an estimated cost of 28 for instruction: %cttz
103 ; SSE42: Found an estimated cost of 20 for instruction: %cttz
104 ; AVX1: Found an estimated cost of 22 for instruction: %cttz
105 ; AVX2: Found an estimated cost of 10 for instruction: %cttz
106 %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0)
107 ret <4 x i64> %cttz
108 }
109
110 define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) {
111 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64u':
112 ; SSE2: Found an estimated cost of 28 for instruction: %cttz
113 ; SSE42: Found an estimated cost of 20 for instruction: %cttz
114 ; AVX1: Found an estimated cost of 22 for instruction: %cttz
115 ; AVX2: Found an estimated cost of 10 for instruction: %cttz
116 %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1)
117 ret <4 x i64> %cttz
118 }
119
120 define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) {
121 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32':
122 ; SSE2: Found an estimated cost of 18 for instruction: %cttz
123 ; SSE42: Found an estimated cost of 14 for instruction: %cttz
124 ; AVX: Found an estimated cost of 14 for instruction: %cttz
125 %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0)
126 ret <4 x i32> %cttz
127 }
128
129 define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) {
130 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32u':
131 ; SSE2: Found an estimated cost of 18 for instruction: %cttz
132 ; SSE42: Found an estimated cost of 14 for instruction: %cttz
133 ; AVX: Found an estimated cost of 14 for instruction: %cttz
134 %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1)
135 ret <4 x i32> %cttz
136 }
137
138 define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) {
139 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32':
140 ; SSE2: Found an estimated cost of 36 for instruction: %cttz
141 ; SSE42: Found an estimated cost of 28 for instruction: %cttz
142 ; AVX1: Found an estimated cost of 30 for instruction: %cttz
143 ; AVX2: Found an estimated cost of 14 for instruction: %cttz
144 %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0)
145 ret <8 x i32> %cttz
146 }
147
148 define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) {
149 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32u':
150 ; SSE2: Found an estimated cost of 36 for instruction: %cttz
151 ; SSE42: Found an estimated cost of 28 for instruction: %cttz
152 ; AVX1: Found an estimated cost of 30 for instruction: %cttz
153 ; AVX2: Found an estimated cost of 14 for instruction: %cttz
154 %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1)
155 ret <8 x i32> %cttz
156 }
157
158 define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) {
159 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16':
160 ; SSE2: Found an estimated cost of 16 for instruction: %cttz
161 ; SSE42: Found an estimated cost of 12 for instruction: %cttz
162 ; AVX: Found an estimated cost of 12 for instruction: %cttz
163 %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0)
164 ret <8 x i16> %cttz
165 }
166
167 define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) {
168 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16u':
169 ; SSE2: Found an estimated cost of 16 for instruction: %cttz
170 ; SSE42: Found an estimated cost of 12 for instruction: %cttz
171 ; AVX: Found an estimated cost of 12 for instruction: %cttz
172 %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1)
173 ret <8 x i16> %cttz
174 }
175
176 define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) {
177 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16':
178 ; SSE2: Found an estimated cost of 32 for instruction: %cttz
179 ; SSE42: Found an estimated cost of 24 for instruction: %cttz
180 ; AVX1: Found an estimated cost of 26 for instruction: %cttz
181 ; AVX2: Found an estimated cost of 12 for instruction: %cttz
182 %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0)
183 ret <16 x i16> %cttz
184 }
185
186 define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) {
187 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16u':
188 ; SSE2: Found an estimated cost of 32 for instruction: %cttz
189 ; SSE42: Found an estimated cost of 24 for instruction: %cttz
190 ; AVX1: Found an estimated cost of 26 for instruction: %cttz
191 ; AVX2: Found an estimated cost of 12 for instruction: %cttz
192 %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1)
193 ret <16 x i16> %cttz
194 }
195
196 define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) {
197 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8':
198 ; SSE2: Found an estimated cost of 13 for instruction: %cttz
199 ; SSE42: Found an estimated cost of 9 for instruction: %cttz
200 ; AVX: Found an estimated cost of 9 for instruction: %cttz
201 %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0)
202 ret <16 x i8> %cttz
203 }
204
205 define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) {
206 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8u':
207 ; SSE2: Found an estimated cost of 13 for instruction: %cttz
208 ; SSE42: Found an estimated cost of 9 for instruction: %cttz
209 ; AVX: Found an estimated cost of 9 for instruction: %cttz
210 %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1)
211 ret <16 x i8> %cttz
212 }
213
214 define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) {
215 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8':
216 ; SSE2: Found an estimated cost of 26 for instruction: %cttz
217 ; SSE42: Found an estimated cost of 18 for instruction: %cttz
218 ; AVX1: Found an estimated cost of 20 for instruction: %cttz
219 ; AVX2: Found an estimated cost of 9 for instruction: %cttz
220 %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0)
221 ret <32 x i8> %cttz
222 }
223
224 define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) {
225 ; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8u':
226 ; SSE2: Found an estimated cost of 26 for instruction: %cttz
227 ; SSE42: Found an estimated cost of 18 for instruction: %cttz
228 ; AVX1: Found an estimated cost of 20 for instruction: %cttz
229 ; AVX2: Found an estimated cost of 9 for instruction: %cttz
230 %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1)
231 ret <32 x i8> %cttz
232 }