llvm.org GIT mirror llvm / 0aff5d2
[X86] Regenerated ctlz/cttz scalar tests for 32/64-bit targets with/without LZCNT/TZCNT support git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275710 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
1 changed file(s) with 640 addition(s) and 170 deletion(s). Raw diff Collapse all Expand all
None ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X32
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X64
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=X32-CLZ
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=X64-CLZ
15
26 declare i8 @llvm.cttz.i8(i8, i1)
37 declare i16 @llvm.cttz.i16(i16, i1)
48 declare i32 @llvm.cttz.i32(i32, i1)
59 declare i64 @llvm.cttz.i64(i64, i1)
10
611 declare i8 @llvm.ctlz.i8(i8, i1)
712 declare i16 @llvm.ctlz.i16(i16, i1)
813 declare i32 @llvm.ctlz.i32(i32, i1)
914 declare i64 @llvm.ctlz.i64(i64, i1)
1015
1116 define i8 @cttz_i8(i8 %x) {
12 ; CHECK-LABEL: cttz_i8:
13 ; CHECK: # BB#0:
14 ; CHECK-NEXT: movzbl %dil, %eax
15 ; CHECK-NEXT: bsfl %eax, %eax
16 ; CHECK-NEXT: # kill
17 ; CHECK-NEXT: retq
17 ; X32-LABEL: cttz_i8:
18 ; X32: # BB#0:
19 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
20 ; X32-NEXT: bsfl %eax, %eax
21 ; X32-NEXT: # kill: %AL %AL %EAX
22 ; X32-NEXT: retl
23 ;
24 ; X64-LABEL: cttz_i8:
25 ; X64: # BB#0:
26 ; X64-NEXT: movzbl %dil, %eax
27 ; X64-NEXT: bsfl %eax, %eax
28 ; X64-NEXT: # kill: %AL %AL %EAX
29 ; X64-NEXT: retq
30 ;
31 ; X32-CLZ-LABEL: cttz_i8:
32 ; X32-CLZ: # BB#0:
33 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
34 ; X32-CLZ-NEXT: tzcntl %eax, %eax
35 ; X32-CLZ-NEXT: # kill: %AL %AL %EAX
36 ; X32-CLZ-NEXT: retl
37 ;
38 ; X64-CLZ-LABEL: cttz_i8:
39 ; X64-CLZ: # BB#0:
40 ; X64-CLZ-NEXT: movzbl %dil, %eax
41 ; X64-CLZ-NEXT: tzcntl %eax, %eax
42 ; X64-CLZ-NEXT: # kill: %AL %AL %EAX
43 ; X64-CLZ-NEXT: retq
1844 %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
1945 ret i8 %tmp
2046 }
2147
2248 define i16 @cttz_i16(i16 %x) {
23 ; CHECK-LABEL: cttz_i16:
24 ; CHECK: # BB#0:
25 ; CHECK-NEXT: bsfw %di, %ax
26 ; CHECK-NEXT: retq
49 ; X32-LABEL: cttz_i16:
50 ; X32: # BB#0:
51 ; X32-NEXT: bsfw {{[0-9]+}}(%esp), %ax
52 ; X32-NEXT: retl
53 ;
54 ; X64-LABEL: cttz_i16:
55 ; X64: # BB#0:
56 ; X64-NEXT: bsfw %di, %ax
57 ; X64-NEXT: retq
58 ;
59 ; X32-CLZ-LABEL: cttz_i16:
60 ; X32-CLZ: # BB#0:
61 ; X32-CLZ-NEXT: tzcntw {{[0-9]+}}(%esp), %ax
62 ; X32-CLZ-NEXT: retl
63 ;
64 ; X64-CLZ-LABEL: cttz_i16:
65 ; X64-CLZ: # BB#0:
66 ; X64-CLZ-NEXT: tzcntw %di, %ax
67 ; X64-CLZ-NEXT: retq
2768 %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
2869 ret i16 %tmp
2970 }
3071
3172 define i32 @cttz_i32(i32 %x) {
32 ; CHECK-LABEL: cttz_i32:
33 ; CHECK: # BB#0:
34 ; CHECK-NEXT: bsfl %edi, %eax
35 ; CHECK-NEXT: retq
73 ; X32-LABEL: cttz_i32:
74 ; X32: # BB#0:
75 ; X32-NEXT: bsfl {{[0-9]+}}(%esp), %eax
76 ; X32-NEXT: retl
77 ;
78 ; X64-LABEL: cttz_i32:
79 ; X64: # BB#0:
80 ; X64-NEXT: bsfl %edi, %eax
81 ; X64-NEXT: retq
82 ;
83 ; X32-CLZ-LABEL: cttz_i32:
84 ; X32-CLZ: # BB#0:
85 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
86 ; X32-CLZ-NEXT: retl
87 ;
88 ; X64-CLZ-LABEL: cttz_i32:
89 ; X64-CLZ: # BB#0:
90 ; X64-CLZ-NEXT: tzcntl %edi, %eax
91 ; X64-CLZ-NEXT: retq
3692 %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
3793 ret i32 %tmp
3894 }
3995
4096 define i64 @cttz_i64(i64 %x) {
41 ; CHECK-LABEL: cttz_i64:
42 ; CHECK: # BB#0:
43 ; CHECK-NEXT: bsfq %rdi, %rax
44 ; CHECK-NEXT: retq
97 ; X32-LABEL: cttz_i64:
98 ; X32: # BB#0:
99 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
100 ; X32-NEXT: testl %eax, %eax
101 ; X32-NEXT: jne .LBB3_1
102 ; X32-NEXT: # BB#2:
103 ; X32-NEXT: bsfl {{[0-9]+}}(%esp), %eax
104 ; X32-NEXT: addl $32, %eax
105 ; X32-NEXT: xorl %edx, %edx
106 ; X32-NEXT: retl
107 ; X32-NEXT: .LBB3_1:
108 ; X32-NEXT: bsfl %eax, %eax
109 ; X32-NEXT: xorl %edx, %edx
110 ; X32-NEXT: retl
111 ;
112 ; X64-LABEL: cttz_i64:
113 ; X64: # BB#0:
114 ; X64-NEXT: bsfq %rdi, %rax
115 ; X64-NEXT: retq
116 ;
117 ; X32-CLZ-LABEL: cttz_i64:
118 ; X32-CLZ: # BB#0:
119 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
120 ; X32-CLZ-NEXT: testl %eax, %eax
121 ; X32-CLZ-NEXT: jne .LBB3_1
122 ; X32-CLZ-NEXT: # BB#2:
123 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
124 ; X32-CLZ-NEXT: addl $32, %eax
125 ; X32-CLZ-NEXT: xorl %edx, %edx
126 ; X32-CLZ-NEXT: retl
127 ; X32-CLZ-NEXT: .LBB3_1:
128 ; X32-CLZ-NEXT: tzcntl %eax, %eax
129 ; X32-CLZ-NEXT: xorl %edx, %edx
130 ; X32-CLZ-NEXT: retl
131 ;
132 ; X64-CLZ-LABEL: cttz_i64:
133 ; X64-CLZ: # BB#0:
134 ; X64-CLZ-NEXT: tzcntq %rdi, %rax
135 ; X64-CLZ-NEXT: retq
45136 %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
46137 ret i64 %tmp
47138 }
48139
49140 define i8 @ctlz_i8(i8 %x) {
50 ; CHECK-LABEL: ctlz_i8:
51 ; CHECK: # BB#0:
52 ; CHECK-NEXT: movzbl %dil, %eax
53 ; CHECK-NEXT: bsrl %eax, %eax
54 ; CHECK-NEXT: xorl $7, %eax
55 ; CHECK-NEXT: # kill
56 ; CHECK-NEXT: retq
141 ; X32-LABEL: ctlz_i8:
142 ; X32: # BB#0:
143 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
144 ; X32-NEXT: bsrl %eax, %eax
145 ; X32-NEXT: xorl $7, %eax
146 ; X32-NEXT: # kill: %AL %AL %EAX
147 ; X32-NEXT: retl
148 ;
149 ; X64-LABEL: ctlz_i8:
150 ; X64: # BB#0:
151 ; X64-NEXT: movzbl %dil, %eax
152 ; X64-NEXT: bsrl %eax, %eax
153 ; X64-NEXT: xorl $7, %eax
154 ; X64-NEXT: # kill: %AL %AL %EAX
155 ; X64-NEXT: retq
156 ;
157 ; X32-CLZ-LABEL: ctlz_i8:
158 ; X32-CLZ: # BB#0:
159 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
160 ; X32-CLZ-NEXT: lzcntl %eax, %eax
161 ; X32-CLZ-NEXT: addl $-24, %eax
162 ; X32-CLZ-NEXT: # kill: %AL %AL %EAX
163 ; X32-CLZ-NEXT: retl
164 ;
165 ; X64-CLZ-LABEL: ctlz_i8:
166 ; X64-CLZ: # BB#0:
167 ; X64-CLZ-NEXT: movzbl %dil, %eax
168 ; X64-CLZ-NEXT: lzcntl %eax, %eax
169 ; X64-CLZ-NEXT: addl $-24, %eax
170 ; X64-CLZ-NEXT: # kill: %AL %AL %EAX
171 ; X64-CLZ-NEXT: retq
57172 %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
58173 ret i8 %tmp2
59174 }
60175
61176 define i16 @ctlz_i16(i16 %x) {
62 ; CHECK-LABEL: ctlz_i16:
63 ; CHECK: # BB#0:
64 ; CHECK-NEXT: bsrw %di, %ax
65 ; CHECK-NEXT: xorl $15, %eax
66 ; CHECK-NEXT: # kill
67 ; CHECK-NEXT: retq
177 ; X32-LABEL: ctlz_i16:
178 ; X32: # BB#0:
179 ; X32-NEXT: bsrw {{[0-9]+}}(%esp), %ax
180 ; X32-NEXT: xorl $15, %eax
181 ; X32-NEXT: # kill: %AX %AX %EAX
182 ; X32-NEXT: retl
183 ;
184 ; X64-LABEL: ctlz_i16:
185 ; X64: # BB#0:
186 ; X64-NEXT: bsrw %di, %ax
187 ; X64-NEXT: xorl $15, %eax
188 ; X64-NEXT: # kill: %AX %AX %EAX
189 ; X64-NEXT: retq
190 ;
191 ; X32-CLZ-LABEL: ctlz_i16:
192 ; X32-CLZ: # BB#0:
193 ; X32-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax
194 ; X32-CLZ-NEXT: retl
195 ;
196 ; X64-CLZ-LABEL: ctlz_i16:
197 ; X64-CLZ: # BB#0:
198 ; X64-CLZ-NEXT: lzcntw %di, %ax
199 ; X64-CLZ-NEXT: retq
68200 %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
69201 ret i16 %tmp2
70202 }
71203
72204 define i32 @ctlz_i32(i32 %x) {
73 ; CHECK-LABEL: ctlz_i32:
74 ; CHECK: # BB#0:
75 ; CHECK-NEXT: bsrl %edi, %eax
76 ; CHECK-NEXT: xorl $31, %eax
77 ; CHECK-NEXT: retq
205 ; X32-LABEL: ctlz_i32:
206 ; X32: # BB#0:
207 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %eax
208 ; X32-NEXT: xorl $31, %eax
209 ; X32-NEXT: retl
210 ;
211 ; X64-LABEL: ctlz_i32:
212 ; X64: # BB#0:
213 ; X64-NEXT: bsrl %edi, %eax
214 ; X64-NEXT: xorl $31, %eax
215 ; X64-NEXT: retq
216 ;
217 ; X32-CLZ-LABEL: ctlz_i32:
218 ; X32-CLZ: # BB#0:
219 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
220 ; X32-CLZ-NEXT: retl
221 ;
222 ; X64-CLZ-LABEL: ctlz_i32:
223 ; X64-CLZ: # BB#0:
224 ; X64-CLZ-NEXT: lzcntl %edi, %eax
225 ; X64-CLZ-NEXT: retq
78226 %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
79227 ret i32 %tmp
80228 }
81229
82230 define i64 @ctlz_i64(i64 %x) {
83 ; CHECK-LABEL: ctlz_i64:
84 ; CHECK: # BB#0:
85 ; CHECK-NEXT: bsrq %rdi, %rax
86 ; CHECK-NEXT: xorq $63, %rax
87 ; CHECK-NEXT: retq
231 ; X32-LABEL: ctlz_i64:
232 ; X32: # BB#0:
233 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
234 ; X32-NEXT: testl %eax, %eax
235 ; X32-NEXT: jne .LBB7_1
236 ; X32-NEXT: # BB#2:
237 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %eax
238 ; X32-NEXT: xorl $31, %eax
239 ; X32-NEXT: addl $32, %eax
240 ; X32-NEXT: xorl %edx, %edx
241 ; X32-NEXT: retl
242 ; X32-NEXT: .LBB7_1:
243 ; X32-NEXT: bsrl %eax, %eax
244 ; X32-NEXT: xorl $31, %eax
245 ; X32-NEXT: xorl %edx, %edx
246 ; X32-NEXT: retl
247 ;
248 ; X64-LABEL: ctlz_i64:
249 ; X64: # BB#0:
250 ; X64-NEXT: bsrq %rdi, %rax
251 ; X64-NEXT: xorq $63, %rax
252 ; X64-NEXT: retq
253 ;
254 ; X32-CLZ-LABEL: ctlz_i64:
255 ; X32-CLZ: # BB#0:
256 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
257 ; X32-CLZ-NEXT: testl %eax, %eax
258 ; X32-CLZ-NEXT: jne .LBB7_1
259 ; X32-CLZ-NEXT: # BB#2:
260 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
261 ; X32-CLZ-NEXT: addl $32, %eax
262 ; X32-CLZ-NEXT: xorl %edx, %edx
263 ; X32-CLZ-NEXT: retl
264 ; X32-CLZ-NEXT: .LBB7_1:
265 ; X32-CLZ-NEXT: lzcntl %eax, %eax
266 ; X32-CLZ-NEXT: xorl %edx, %edx
267 ; X32-CLZ-NEXT: retl
268 ;
269 ; X64-CLZ-LABEL: ctlz_i64:
270 ; X64-CLZ: # BB#0:
271 ; X64-CLZ-NEXT: lzcntq %rdi, %rax
272 ; X64-CLZ-NEXT: retq
88273 %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
89274 ret i64 %tmp
90275 }
91276
277 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
92278 define i8 @ctlz_i8_zero_test(i8 %n) {
93 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
94
95 ; CHECK-LABEL: ctlz_i8_zero_test:
96 ; CHECK: # BB#0:
97 ; CHECK-NEXT: movb $8, %al
98 ; CHECK-NEXT: testb %dil, %dil
99 ; CHECK-NEXT: je .LBB8_2
100 ; CHECK-NEXT: # BB#1: # %cond.false
101 ; CHECK-NEXT: movzbl %dil, %eax
102 ; CHECK-NEXT: bsrl %eax, %eax
103 ; CHECK-NEXT: xorl $7, %eax
104 ; CHECK-NEXT: .LBB8_2: # %cond.end
105 ; CHECK-NEXT: # kill
106 ; CHECK-NEXT: retq
279 ; X32-LABEL: ctlz_i8_zero_test:
280 ; X32: # BB#0:
281 ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
282 ; X32-NEXT: movb $8, %al
283 ; X32-NEXT: testb %cl, %cl
284 ; X32-NEXT: je .LBB8_2
285 ; X32-NEXT: # BB#1: # %cond.false
286 ; X32-NEXT: movzbl %cl, %eax
287 ; X32-NEXT: bsrl %eax, %eax
288 ; X32-NEXT: xorl $7, %eax
289 ; X32-NEXT: .LBB8_2: # %cond.end
290 ; X32-NEXT: # kill: %AL %AL %EAX
291 ; X32-NEXT: retl
292 ;
293 ; X64-LABEL: ctlz_i8_zero_test:
294 ; X64: # BB#0:
295 ; X64-NEXT: movb $8, %al
296 ; X64-NEXT: testb %dil, %dil
297 ; X64-NEXT: je .LBB8_2
298 ; X64-NEXT: # BB#1: # %cond.false
299 ; X64-NEXT: movzbl %dil, %eax
300 ; X64-NEXT: bsrl %eax, %eax
301 ; X64-NEXT: xorl $7, %eax
302 ; X64-NEXT: .LBB8_2: # %cond.end
303 ; X64-NEXT: # kill: %AL %AL %EAX
304 ; X64-NEXT: retq
305 ;
306 ; X32-CLZ-LABEL: ctlz_i8_zero_test:
307 ; X32-CLZ: # BB#0:
308 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
309 ; X32-CLZ-NEXT: lzcntl %eax, %eax
310 ; X32-CLZ-NEXT: addl $-24, %eax
311 ; X32-CLZ-NEXT: # kill: %AL %AL %EAX
312 ; X32-CLZ-NEXT: retl
313 ;
314 ; X64-CLZ-LABEL: ctlz_i8_zero_test:
315 ; X64-CLZ: # BB#0:
316 ; X64-CLZ-NEXT: movzbl %dil, %eax
317 ; X64-CLZ-NEXT: lzcntl %eax, %eax
318 ; X64-CLZ-NEXT: addl $-24, %eax
319 ; X64-CLZ-NEXT: # kill: %AL %AL %EAX
320 ; X64-CLZ-NEXT: retq
107321 %tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false)
108322 ret i8 %tmp1
109323 }
110324
325 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
111326 define i16 @ctlz_i16_zero_test(i16 %n) {
112 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
113
114 ; CHECK-LABEL: ctlz_i16_zero_test:
115 ; CHECK: # BB#0:
116 ; CHECK-NEXT: movw $16, %ax
117 ; CHECK-NEXT: testw %di, %di
118 ; CHECK-NEXT: je .LBB9_2
119 ; CHECK-NEXT: # BB#1: # %cond.false
120 ; CHECK-NEXT: bsrw %di, %ax
121 ; CHECK-NEXT: xorl $15, %eax
122 ; CHECK-NEXT: .LBB9_2: # %cond.end
123 ; CHECK-NEXT: # kill
124 ; CHECK-NEXT: retq
327 ; X32-LABEL: ctlz_i16_zero_test:
328 ; X32: # BB#0:
329 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
330 ; X32-NEXT: movw $16, %ax
331 ; X32-NEXT: testw %cx, %cx
332 ; X32-NEXT: je .LBB9_2
333 ; X32-NEXT: # BB#1: # %cond.false
334 ; X32-NEXT: bsrw %cx, %ax
335 ; X32-NEXT: xorl $15, %eax
336 ; X32-NEXT: .LBB9_2: # %cond.end
337 ; X32-NEXT: # kill: %AX %AX %EAX
338 ; X32-NEXT: retl
339 ;
340 ; X64-LABEL: ctlz_i16_zero_test:
341 ; X64: # BB#0:
342 ; X64-NEXT: movw $16, %ax
343 ; X64-NEXT: testw %di, %di
344 ; X64-NEXT: je .LBB9_2
345 ; X64-NEXT: # BB#1: # %cond.false
346 ; X64-NEXT: bsrw %di, %ax
347 ; X64-NEXT: xorl $15, %eax
348 ; X64-NEXT: .LBB9_2: # %cond.end
349 ; X64-NEXT: # kill: %AX %AX %EAX
350 ; X64-NEXT: retq
351 ;
352 ; X32-CLZ-LABEL: ctlz_i16_zero_test:
353 ; X32-CLZ: # BB#0:
354 ; X32-CLZ-NEXT: lzcntw {{[0-9]+}}(%esp), %ax
355 ; X32-CLZ-NEXT: retl
356 ;
357 ; X64-CLZ-LABEL: ctlz_i16_zero_test:
358 ; X64-CLZ: # BB#0:
359 ; X64-CLZ-NEXT: lzcntw %di, %ax
360 ; X64-CLZ-NEXT: retq
125361 %tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false)
126362 ret i16 %tmp1
127363 }
128364
365 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
129366 define i32 @ctlz_i32_zero_test(i32 %n) {
130 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
131
132 ; CHECK-LABEL: ctlz_i32_zero_test:
133 ; CHECK: # BB#0:
134 ; CHECK-NEXT: movl $32, %eax
135 ; CHECK-NEXT: testl %edi, %edi
136 ; CHECK-NEXT: je .LBB10_2
137 ; CHECK-NEXT: # BB#1: # %cond.false
138 ; CHECK-NEXT: bsrl %edi, %eax
139 ; CHECK-NEXT: xorl $31, %eax
140 ; CHECK-NEXT: .LBB10_2: # %cond.end
141 ; CHECK-NEXT: retq
367 ; X32-LABEL: ctlz_i32_zero_test:
368 ; X32: # BB#0:
369 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
370 ; X32-NEXT: movl $32, %eax
371 ; X32-NEXT: testl %ecx, %ecx
372 ; X32-NEXT: je .LBB10_2
373 ; X32-NEXT: # BB#1: # %cond.false
374 ; X32-NEXT: bsrl %ecx, %eax
375 ; X32-NEXT: xorl $31, %eax
376 ; X32-NEXT: .LBB10_2: # %cond.end
377 ; X32-NEXT: retl
378 ;
379 ; X64-LABEL: ctlz_i32_zero_test:
380 ; X64: # BB#0:
381 ; X64-NEXT: movl $32, %eax
382 ; X64-NEXT: testl %edi, %edi
383 ; X64-NEXT: je .LBB10_2
384 ; X64-NEXT: # BB#1: # %cond.false
385 ; X64-NEXT: bsrl %edi, %eax
386 ; X64-NEXT: xorl $31, %eax
387 ; X64-NEXT: .LBB10_2: # %cond.end
388 ; X64-NEXT: retq
389 ;
390 ; X32-CLZ-LABEL: ctlz_i32_zero_test:
391 ; X32-CLZ: # BB#0:
392 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
393 ; X32-CLZ-NEXT: retl
394 ;
395 ; X64-CLZ-LABEL: ctlz_i32_zero_test:
396 ; X64-CLZ: # BB#0:
397 ; X64-CLZ-NEXT: lzcntl %edi, %eax
398 ; X64-CLZ-NEXT: retq
142399 %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
143400 ret i32 %tmp1
144401 }
145402
403 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
146404 define i64 @ctlz_i64_zero_test(i64 %n) {
147 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
148
149 ; CHECK-LABEL: ctlz_i64_zero_test:
150 ; CHECK: # BB#0:
151 ; CHECK-NEXT: movl $64, %eax
152 ; CHECK-NEXT: testq %rdi, %rdi
153 ; CHECK-NEXT: je .LBB11_2
154 ; CHECK-NEXT: # BB#1: # %cond.false
155 ; CHECK-NEXT: bsrq %rdi, %rax
156 ; CHECK-NEXT: xorq $63, %rax
157 ; CHECK-NEXT: .LBB11_2: # %cond.end
158 ; CHECK-NEXT: retq
405 ; X32-LABEL: ctlz_i64_zero_test:
406 ; X32: # BB#0:
407 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
408 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %edx
409 ; X32-NEXT: movl $63, %eax
410 ; X32-NEXT: je .LBB11_2
411 ; X32-NEXT: # BB#1:
412 ; X32-NEXT: movl %edx, %eax
413 ; X32-NEXT: .LBB11_2:
414 ; X32-NEXT: testl %ecx, %ecx
415 ; X32-NEXT: jne .LBB11_3
416 ; X32-NEXT: # BB#4:
417 ; X32-NEXT: xorl $31, %eax
418 ; X32-NEXT: addl $32, %eax
419 ; X32-NEXT: xorl %edx, %edx
420 ; X32-NEXT: retl
421 ; X32-NEXT: .LBB11_3:
422 ; X32-NEXT: bsrl %ecx, %eax
423 ; X32-NEXT: xorl $31, %eax
424 ; X32-NEXT: xorl %edx, %edx
425 ; X32-NEXT: retl
426 ;
427 ; X64-LABEL: ctlz_i64_zero_test:
428 ; X64: # BB#0:
429 ; X64-NEXT: movl $64, %eax
430 ; X64-NEXT: testq %rdi, %rdi
431 ; X64-NEXT: je .LBB11_2
432 ; X64-NEXT: # BB#1: # %cond.false
433 ; X64-NEXT: bsrq %rdi, %rax
434 ; X64-NEXT: xorq $63, %rax
435 ; X64-NEXT: .LBB11_2: # %cond.end
436 ; X64-NEXT: retq
437 ;
438 ; X32-CLZ-LABEL: ctlz_i64_zero_test:
439 ; X32-CLZ: # BB#0:
440 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
441 ; X32-CLZ-NEXT: testl %eax, %eax
442 ; X32-CLZ-NEXT: jne .LBB11_1
443 ; X32-CLZ-NEXT: # BB#2:
444 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
445 ; X32-CLZ-NEXT: addl $32, %eax
446 ; X32-CLZ-NEXT: xorl %edx, %edx
447 ; X32-CLZ-NEXT: retl
448 ; X32-CLZ-NEXT: .LBB11_1:
449 ; X32-CLZ-NEXT: lzcntl %eax, %eax
450 ; X32-CLZ-NEXT: xorl %edx, %edx
451 ; X32-CLZ-NEXT: retl
452 ;
453 ; X64-CLZ-LABEL: ctlz_i64_zero_test:
454 ; X64-CLZ: # BB#0:
455 ; X64-CLZ-NEXT: lzcntq %rdi, %rax
456 ; X64-CLZ-NEXT: retq
159457 %tmp1 = call i64 @llvm.ctlz.i64(i64 %n, i1 false)
160458 ret i64 %tmp1
161459 }
162460
461 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
163462 define i8 @cttz_i8_zero_test(i8 %n) {
164 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
165
166 ; CHECK-LABEL: cttz_i8_zero_test:
167 ; CHECK: # BB#0:
168 ; CHECK-NEXT: movb $8, %al
169 ; CHECK-NEXT: testb %dil, %dil
170 ; CHECK-NEXT: je .LBB12_2
171 ; CHECK-NEXT: # BB#1: # %cond.false
172 ; CHECK-NEXT: movzbl %dil, %eax
173 ; CHECK-NEXT: bsfl %eax, %eax
174 ; CHECK-NEXT: .LBB12_2: # %cond.end
175 ; CHECK-NEXT: # kill
176 ; CHECK-NEXT: retq
463 ; X32-LABEL: cttz_i8_zero_test:
464 ; X32: # BB#0:
465 ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
466 ; X32-NEXT: movb $8, %al
467 ; X32-NEXT: testb %cl, %cl
468 ; X32-NEXT: je .LBB12_2
469 ; X32-NEXT: # BB#1: # %cond.false
470 ; X32-NEXT: movzbl %cl, %eax
471 ; X32-NEXT: bsfl %eax, %eax
472 ; X32-NEXT: .LBB12_2: # %cond.end
473 ; X32-NEXT: # kill: %AL %AL %EAX
474 ; X32-NEXT: retl
475 ;
476 ; X64-LABEL: cttz_i8_zero_test:
477 ; X64: # BB#0:
478 ; X64-NEXT: movb $8, %al
479 ; X64-NEXT: testb %dil, %dil
480 ; X64-NEXT: je .LBB12_2
481 ; X64-NEXT: # BB#1: # %cond.false
482 ; X64-NEXT: movzbl %dil, %eax
483 ; X64-NEXT: bsfl %eax, %eax
484 ; X64-NEXT: .LBB12_2: # %cond.end
485 ; X64-NEXT: # kill: %AL %AL %EAX
486 ; X64-NEXT: retq
487 ;
488 ; X32-CLZ-LABEL: cttz_i8_zero_test:
489 ; X32-CLZ: # BB#0:
490 ; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
491 ; X32-CLZ-NEXT: orl $256, %eax # imm = 0x100
492 ; X32-CLZ-NEXT: tzcntl %eax, %eax
493 ; X32-CLZ-NEXT: # kill: %AL %AL %EAX
494 ; X32-CLZ-NEXT: retl
495 ;
496 ; X64-CLZ-LABEL: cttz_i8_zero_test:
497 ; X64-CLZ: # BB#0:
498 ; X64-CLZ-NEXT: movzbl %dil, %eax
499 ; X64-CLZ-NEXT: orl $256, %eax # imm = 0x100
500 ; X64-CLZ-NEXT: tzcntl %eax, %eax
501 ; X64-CLZ-NEXT: # kill: %AL %AL %EAX
502 ; X64-CLZ-NEXT: retq
177503 %tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
178504 ret i8 %tmp1
179505 }
180506
507 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
181508 define i16 @cttz_i16_zero_test(i16 %n) {
182 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
183
184 ; CHECK-LABEL: cttz_i16_zero_test:
185 ; CHECK: # BB#0:
186 ; CHECK-NEXT: movw $16, %ax
187 ; CHECK-NEXT: testw %di, %di
188 ; CHECK-NEXT: je .LBB13_2
189 ; CHECK-NEXT: # BB#1: # %cond.false
190 ; CHECK-NEXT: bsfw %di, %ax
191 ; CHECK-NEXT: .LBB13_2: # %cond.end
192 ; CHECK-NEXT: retq
509 ; X32-LABEL: cttz_i16_zero_test:
510 ; X32: # BB#0:
511 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
512 ; X32-NEXT: movw $16, %ax
513 ; X32-NEXT: testw %cx, %cx
514 ; X32-NEXT: je .LBB13_2
515 ; X32-NEXT: # BB#1: # %cond.false
516 ; X32-NEXT: bsfw %cx, %ax
517 ; X32-NEXT: .LBB13_2: # %cond.end
518 ; X32-NEXT: retl
519 ;
520 ; X64-LABEL: cttz_i16_zero_test:
521 ; X64: # BB#0:
522 ; X64-NEXT: movw $16, %ax
523 ; X64-NEXT: testw %di, %di
524 ; X64-NEXT: je .LBB13_2
525 ; X64-NEXT: # BB#1: # %cond.false
526 ; X64-NEXT: bsfw %di, %ax
527 ; X64-NEXT: .LBB13_2: # %cond.end
528 ; X64-NEXT: retq
529 ;
530 ; X32-CLZ-LABEL: cttz_i16_zero_test:
531 ; X32-CLZ: # BB#0:
532 ; X32-CLZ-NEXT: tzcntw {{[0-9]+}}(%esp), %ax
533 ; X32-CLZ-NEXT: retl
534 ;
535 ; X64-CLZ-LABEL: cttz_i16_zero_test:
536 ; X64-CLZ: # BB#0:
537 ; X64-CLZ-NEXT: tzcntw %di, %ax
538 ; X64-CLZ-NEXT: retq
193539 %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
194540 ret i16 %tmp1
195541 }
196542
543 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
197544 define i32 @cttz_i32_zero_test(i32 %n) {
198 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
199
200 ; CHECK-LABEL: cttz_i32_zero_test:
201 ; CHECK: # BB#0:
202 ; CHECK-NEXT: movl $32, %eax
203 ; CHECK-NEXT: testl %edi, %edi
204 ; CHECK-NEXT: je .LBB14_2
205 ; CHECK-NEXT: # BB#1: # %cond.false
206 ; CHECK-NEXT: bsfl %edi, %eax
207 ; CHECK-NEXT: .LBB14_2: # %cond.end
208 ; CHECK-NEXT: retq
545 ; X32-LABEL: cttz_i32_zero_test:
546 ; X32: # BB#0:
547 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
548 ; X32-NEXT: movl $32, %eax
549 ; X32-NEXT: testl %ecx, %ecx
550 ; X32-NEXT: je .LBB14_2
551 ; X32-NEXT: # BB#1: # %cond.false
552 ; X32-NEXT: bsfl %ecx, %eax
553 ; X32-NEXT: .LBB14_2: # %cond.end
554 ; X32-NEXT: retl
555 ;
556 ; X64-LABEL: cttz_i32_zero_test:
557 ; X64: # BB#0:
558 ; X64-NEXT: movl $32, %eax
559 ; X64-NEXT: testl %edi, %edi
560 ; X64-NEXT: je .LBB14_2
561 ; X64-NEXT: # BB#1: # %cond.false
562 ; X64-NEXT: bsfl %edi, %eax
563 ; X64-NEXT: .LBB14_2: # %cond.end
564 ; X64-NEXT: retq
565 ;
566 ; X32-CLZ-LABEL: cttz_i32_zero_test:
567 ; X32-CLZ: # BB#0:
568 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
569 ; X32-CLZ-NEXT: retl
570 ;
571 ; X64-CLZ-LABEL: cttz_i32_zero_test:
572 ; X64-CLZ: # BB#0:
573 ; X64-CLZ-NEXT: tzcntl %edi, %eax
574 ; X64-CLZ-NEXT: retq
209575 %tmp1 = call i32 @llvm.cttz.i32(i32 %n, i1 false)
210576 ret i32 %tmp1
211577 }
212578
579 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
213580 define i64 @cttz_i64_zero_test(i64 %n) {
214 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
215
216 ; CHECK-LABEL: cttz_i64_zero_test:
217 ; CHECK: # BB#0:
218 ; CHECK-NEXT: movl $64, %eax
219 ; CHECK-NEXT: testq %rdi, %rdi
220 ; CHECK-NEXT: je .LBB15_2
221 ; CHECK-NEXT: # BB#1: # %cond.false
222 ; CHECK-NEXT: bsfq %rdi, %rax
223 ; CHECK-NEXT: .LBB15_2: # %cond.end
224 ; CHECK-NEXT: retq
581 ; X32-LABEL: cttz_i64_zero_test:
582 ; X32: # BB#0:
583 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
584 ; X32-NEXT: bsfl {{[0-9]+}}(%esp), %edx
585 ; X32-NEXT: movl $32, %eax
586 ; X32-NEXT: je .LBB15_2
587 ; X32-NEXT: # BB#1:
588 ; X32-NEXT: movl %edx, %eax
589 ; X32-NEXT: .LBB15_2:
590 ; X32-NEXT: testl %ecx, %ecx
591 ; X32-NEXT: jne .LBB15_3
592 ; X32-NEXT: # BB#4:
593 ; X32-NEXT: addl $32, %eax
594 ; X32-NEXT: xorl %edx, %edx
595 ; X32-NEXT: retl
596 ; X32-NEXT: .LBB15_3:
597 ; X32-NEXT: bsfl %ecx, %eax
598 ; X32-NEXT: xorl %edx, %edx
599 ; X32-NEXT: retl
600 ;
601 ; X64-LABEL: cttz_i64_zero_test:
602 ; X64: # BB#0:
603 ; X64-NEXT: movl $64, %eax
604 ; X64-NEXT: testq %rdi, %rdi
605 ; X64-NEXT: je .LBB15_2
606 ; X64-NEXT: # BB#1: # %cond.false
607 ; X64-NEXT: bsfq %rdi, %rax
608 ; X64-NEXT: .LBB15_2: # %cond.end
609 ; X64-NEXT: retq
610 ;
611 ; X32-CLZ-LABEL: cttz_i64_zero_test:
612 ; X32-CLZ: # BB#0:
613 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
614 ; X32-CLZ-NEXT: testl %eax, %eax
615 ; X32-CLZ-NEXT: jne .LBB15_1
616 ; X32-CLZ-NEXT: # BB#2:
617 ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
618 ; X32-CLZ-NEXT: addl $32, %eax
619 ; X32-CLZ-NEXT: xorl %edx, %edx
620 ; X32-CLZ-NEXT: retl
621 ; X32-CLZ-NEXT: .LBB15_1:
622 ; X32-CLZ-NEXT: tzcntl %eax, %eax
623 ; X32-CLZ-NEXT: xorl %edx, %edx
624 ; X32-CLZ-NEXT: retl
625 ;
626 ; X64-CLZ-LABEL: cttz_i64_zero_test:
627 ; X64-CLZ: # BB#0:
628 ; X64-CLZ-NEXT: tzcntq %rdi, %rax
629 ; X64-CLZ-NEXT: retq
225630 %tmp1 = call i64 @llvm.cttz.i64(i64 %n, i1 false)
226631 ret i64 %tmp1
227632 }
228633
229 define i32 @ctlz_i32_fold_cmov(i32 %n) {
230634 ; Don't generate the cmovne when the source is known non-zero (and bsr would
231635 ; not set ZF).
232636 ; rdar://9490949
233637 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
234638 ; codegen doesn't know how to delete the movl and je.
235
236 ; CHECK-LABEL: ctlz_i32_fold_cmov:
237 ; CHECK: # BB#0:
238 ; CHECK-NEXT: orl $1, %edi
239 ; CHECK-NEXT: movl $32, %eax
240 ; CHECK-NEXT: je .LBB16_2
241 ; CHECK-NEXT: # BB#1: # %cond.false
242 ; CHECK-NEXT: bsrl %edi, %eax
243 ; CHECK-NEXT: xorl $31, %eax
244 ; CHECK-NEXT: .LBB16_2: # %cond.end
245 ; CHECK-NEXT: retq
639 define i32 @ctlz_i32_fold_cmov(i32 %n) {
640 ; X32-LABEL: ctlz_i32_fold_cmov:
641 ; X32: # BB#0:
642 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
643 ; X32-NEXT: orl $1, %ecx
644 ; X32-NEXT: movl $32, %eax
645 ; X32-NEXT: je .LBB16_2
646 ; X32-NEXT: # BB#1: # %cond.false
647 ; X32-NEXT: bsrl %ecx, %eax
648 ; X32-NEXT: xorl $31, %eax
649 ; X32-NEXT: .LBB16_2: # %cond.end
650 ; X32-NEXT: retl
651 ;
652 ; X64-LABEL: ctlz_i32_fold_cmov:
653 ; X64: # BB#0:
654 ; X64-NEXT: orl $1, %edi
655 ; X64-NEXT: movl $32, %eax
656 ; X64-NEXT: je .LBB16_2
657 ; X64-NEXT: # BB#1: # %cond.false
658 ; X64-NEXT: bsrl %edi, %eax
659 ; X64-NEXT: xorl $31, %eax
660 ; X64-NEXT: .LBB16_2: # %cond.end
661 ; X64-NEXT: retq
662 ;
663 ; X32-CLZ-LABEL: ctlz_i32_fold_cmov:
664 ; X32-CLZ: # BB#0:
665 ; X32-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax
666 ; X32-CLZ-NEXT: orl $1, %eax
667 ; X32-CLZ-NEXT: lzcntl %eax, %eax
668 ; X32-CLZ-NEXT: retl
669 ;
670 ; X64-CLZ-LABEL: ctlz_i32_fold_cmov:
671 ; X64-CLZ: # BB#0:
672 ; X64-CLZ-NEXT: orl $1, %edi
673 ; X64-CLZ-NEXT: lzcntl %edi, %eax
674 ; X64-CLZ-NEXT: retq
246675 %or = or i32 %n, 1
247676 %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
248677 ret i32 %tmp1
249678 }
250679
251 define i32 @ctlz_bsr(i32 %n) {
252680 ; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
253681 ; the most significant bit, which is what 'bsr' does natively.
254
255 ; CHECK-LABEL: ctlz_bsr:
256 ; CHECK: # BB#0:
257 ; CHECK-NEXT: bsrl %edi, %eax
258 ; CHECK-NEXT: retq
682 ; FIXME: We should probably select BSR instead of LZCNT in these circumstances.
683 define i32 @ctlz_bsr(i32 %n) {
684 ; X32-LABEL: ctlz_bsr:
685 ; X32: # BB#0:
686 ; X32-NEXT: bsrl {{[0-9]+}}(%esp), %eax
687 ; X32-NEXT: retl
688 ;
689 ; X64-LABEL: ctlz_bsr:
690 ; X64: # BB#0:
691 ; X64-NEXT: bsrl %edi, %eax
692 ; X64-NEXT: retq
693 ;
694 ; X32-CLZ-LABEL: ctlz_bsr:
695 ; X32-CLZ: # BB#0:
696 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
697 ; X32-CLZ-NEXT: xorl $31, %eax
698 ; X32-CLZ-NEXT: retl
699 ;
700 ; X64-CLZ-LABEL: ctlz_bsr:
701 ; X64-CLZ: # BB#0:
702 ; X64-CLZ-NEXT: lzcntl %edi, %eax
703 ; X64-CLZ-NEXT: xorl $31, %eax
704 ; X64-CLZ-NEXT: retq
259705 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
260706 %bsr = xor i32 %ctlz, 31
261707 ret i32 %bsr
262708 }
263709
264 define i32 @ctlz_bsr_zero_test(i32 %n) {
265710 ; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
266711 ; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
267712 ; codegen doesn't know how to combine the $32 and $31 into $63.
268
269 ; CHECK-LABEL: ctlz_bsr_zero_test:
270 ; CHECK: # BB#0:
271 ; CHECK-NEXT: movl $32, %eax
272 ; CHECK-NEXT: testl %edi, %edi
273 ; CHECK-NEXT: je .LBB18_2
274 ; CHECK-NEXT: # BB#1: # %cond.false
275 ; CHECK-NEXT: bsrl %edi, %eax
276 ; CHECK-NEXT: xorl $31, %eax
277 ; CHECK-NEXT: .LBB18_2: # %cond.end
278 ; CHECK-NEXT: xorl $31, %eax
279 ; CHECK-NEXT: retq
713 define i32 @ctlz_bsr_zero_test(i32 %n) {
714 ; X32-LABEL: ctlz_bsr_zero_test:
715 ; X32: # BB#0:
716 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
717 ; X32-NEXT: movl $32, %eax
718 ; X32-NEXT: testl %ecx, %ecx
719 ; X32-NEXT: je .LBB18_2
720 ; X32-NEXT: # BB#1: # %cond.false
721 ; X32-NEXT: bsrl %ecx, %eax
722 ; X32-NEXT: xorl $31, %eax
723 ; X32-NEXT: .LBB18_2: # %cond.end
724 ; X32-NEXT: xorl $31, %eax
725 ; X32-NEXT: retl
726 ;
727 ; X64-LABEL: ctlz_bsr_zero_test:
728 ; X64: # BB#0:
729 ; X64-NEXT: movl $32, %eax
730 ; X64-NEXT: testl %edi, %edi
731 ; X64-NEXT: je .LBB18_2
732 ; X64-NEXT: # BB#1: # %cond.false
733 ; X64-NEXT: bsrl %edi, %eax
734 ; X64-NEXT: xorl $31, %eax
735 ; X64-NEXT: .LBB18_2: # %cond.end
736 ; X64-NEXT: xorl $31, %eax
737 ; X64-NEXT: retq
738 ;
739 ; X32-CLZ-LABEL: ctlz_bsr_zero_test:
740 ; X32-CLZ: # BB#0:
741 ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax
742 ; X32-CLZ-NEXT: xorl $31, %eax
743 ; X32-CLZ-NEXT: retl
744 ;
745 ; X64-CLZ-LABEL: ctlz_bsr_zero_test:
746 ; X64-CLZ: # BB#0:
747 ; X64-CLZ-NEXT: lzcntl %edi, %eax
748 ; X64-CLZ-NEXT: xorl $31, %eax
749 ; X64-CLZ-NEXT: retq
280750 %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
281751 %bsr = xor i32 %ctlz, 31
282752 ret i32 %bsr