llvm.org GIT mirror llvm / c1307b1
[X86] Added extra widening tests for and/xor/or bit operations To make sure we're dealing with both cases of legal/illegal number of vector elements and legal/illegal vector element types git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265929 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
1 changed file(s) with 733 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
11 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE42
22 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE42
3
4 ;
5 ; AND/XOR/OR i24 as v3i8
6 ;
37
48 define i24 @and_i24_as_v3i8(i24 %a, i24 %b) nounwind {
59 ; X32-SSE-LABEL: and_i24_as_v3i8:
8892 ret i24 %4
8993 }
9094
95 ;
96 ; AND/XOR/OR i24 as v8i3
97 ;
98
99 define i24 @and_i24_as_v8i3(i24 %a, i24 %b) nounwind {
100 ; X32-SSE-LABEL: and_i24_as_v8i3:
101 ; X32-SSE: # BB#0:
102 ; X32-SSE-NEXT: pushl %ebp
103 ; X32-SSE-NEXT: movl %esp, %ebp
104 ; X32-SSE-NEXT: andl $-8, %esp
105 ; X32-SSE-NEXT: subl $24, %esp
106 ; X32-SSE-NEXT: movl 12(%ebp), %eax
107 ; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
108 ; X32-SSE-NEXT: shrl $16, %eax
109 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
110 ; X32-SSE-NEXT: movl 8(%ebp), %eax
111 ; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
112 ; X32-SSE-NEXT: shrl $16, %eax
113 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
114 ; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
115 ; X32-SSE-NEXT: movl %eax, %ecx
116 ; X32-SSE-NEXT: shrl $3, %ecx
117 ; X32-SSE-NEXT: andl $7, %ecx
118 ; X32-SSE-NEXT: movl %eax, %edx
119 ; X32-SSE-NEXT: andl $7, %edx
120 ; X32-SSE-NEXT: movd %edx, %xmm1
121 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
122 ; X32-SSE-NEXT: movl %eax, %ecx
123 ; X32-SSE-NEXT: shrl $6, %ecx
124 ; X32-SSE-NEXT: andl $7, %ecx
125 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
126 ; X32-SSE-NEXT: movl %eax, %ecx
127 ; X32-SSE-NEXT: shrl $9, %ecx
128 ; X32-SSE-NEXT: andl $7, %ecx
129 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
130 ; X32-SSE-NEXT: movl %eax, %ecx
131 ; X32-SSE-NEXT: shrl $12, %ecx
132 ; X32-SSE-NEXT: andl $7, %ecx
133 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
134 ; X32-SSE-NEXT: shrl $15, %eax
135 ; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
136 ; X32-SSE-NEXT: pxor %xmm2, %xmm2
137 ; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
138 ; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
139 ; X32-SSE-NEXT: movl %eax, %ecx
140 ; X32-SSE-NEXT: shrl $3, %ecx
141 ; X32-SSE-NEXT: andl $7, %ecx
142 ; X32-SSE-NEXT: movl %eax, %edx
143 ; X32-SSE-NEXT: andl $7, %edx
144 ; X32-SSE-NEXT: movd %edx, %xmm0
145 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
146 ; X32-SSE-NEXT: movl %eax, %ecx
147 ; X32-SSE-NEXT: shrl $6, %ecx
148 ; X32-SSE-NEXT: andl $7, %ecx
149 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
150 ; X32-SSE-NEXT: movl %eax, %ecx
151 ; X32-SSE-NEXT: shrl $9, %ecx
152 ; X32-SSE-NEXT: andl $7, %ecx
153 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
154 ; X32-SSE-NEXT: movl %eax, %ecx
155 ; X32-SSE-NEXT: shrl $12, %ecx
156 ; X32-SSE-NEXT: andl $7, %ecx
157 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
158 ; X32-SSE-NEXT: shrl $15, %eax
159 ; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0
160 ; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7]
161 ; X32-SSE-NEXT: pand %xmm1, %xmm0
162 ; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
163 ; X32-SSE-NEXT: andl $15, %eax
164 ; X32-SSE-NEXT: movb %al, (%esp)
165 ; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
166 ; X32-SSE-NEXT: andl $15, %eax
167 ; X32-SSE-NEXT: movb %al, (%esp)
168 ; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
169 ; X32-SSE-NEXT: andl $15, %eax
170 ; X32-SSE-NEXT: movb %al, (%esp)
171 ; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
172 ; X32-SSE-NEXT: andl $15, %eax
173 ; X32-SSE-NEXT: movb %al, (%esp)
174 ; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
175 ; X32-SSE-NEXT: andl $15, %eax
176 ; X32-SSE-NEXT: movb %al, (%esp)
177 ; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
178 ; X32-SSE-NEXT: andl $15, %eax
179 ; X32-SSE-NEXT: movb %al, (%esp)
180 ; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
181 ; X32-SSE-NEXT: andl $15, %eax
182 ; X32-SSE-NEXT: movb %al, (%esp)
183 ; X32-SSE-NEXT: movd %xmm0, %eax
184 ; X32-SSE-NEXT: andl $15, %eax
185 ; X32-SSE-NEXT: movb %al, (%esp)
186 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
187 ; X32-SSE-NEXT: shll $16, %ecx
188 ; X32-SSE-NEXT: movzwl (%esp), %eax
189 ; X32-SSE-NEXT: orl %ecx, %eax
190 ; X32-SSE-NEXT: movl %ebp, %esp
191 ; X32-SSE-NEXT: popl %ebp
192 ; X32-SSE-NEXT: retl
193 ;
194 ; X64-SSE-LABEL: and_i24_as_v8i3:
195 ; X64-SSE: # BB#0:
196 ; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp)
197 ; X64-SSE-NEXT: shrl $16, %esi
198 ; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
199 ; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp)
200 ; X64-SSE-NEXT: shrl $16, %edi
201 ; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
202 ; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
203 ; X64-SSE-NEXT: movl %eax, %ecx
204 ; X64-SSE-NEXT: shrl $3, %ecx
205 ; X64-SSE-NEXT: andl $7, %ecx
206 ; X64-SSE-NEXT: movl %eax, %edx
207 ; X64-SSE-NEXT: andl $7, %edx
208 ; X64-SSE-NEXT: movd %edx, %xmm0
209 ; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
210 ; X64-SSE-NEXT: movl %eax, %ecx
211 ; X64-SSE-NEXT: shrl $6, %ecx
212 ; X64-SSE-NEXT: andl $7, %ecx
213 ; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
214 ; X64-SSE-NEXT: movl %eax, %ecx
215 ; X64-SSE-NEXT: shrl $9, %ecx
216 ; X64-SSE-NEXT: andl $7, %ecx
217 ; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
218 ; X64-SSE-NEXT: movl %eax, %ecx
219 ; X64-SSE-NEXT: shrl $12, %ecx
220 ; X64-SSE-NEXT: andl $7, %ecx
221 ; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
222 ; X64-SSE-NEXT: shrl $15, %eax
223 ; X64-SSE-NEXT: movzwl %ax, %eax
224 ; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
225 ; X64-SSE-NEXT: xorl %eax, %eax
226 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
227 ; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
228 ; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
229 ; X64-SSE-NEXT: movl %ecx, %edx
230 ; X64-SSE-NEXT: shrl $3, %edx
231 ; X64-SSE-NEXT: andl $7, %edx
232 ; X64-SSE-NEXT: movl %ecx, %esi
233 ; X64-SSE-NEXT: andl $7, %esi
234 ; X64-SSE-NEXT: movd %esi, %xmm1
235 ; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1
236 ; X64-SSE-NEXT: movl %ecx, %edx
237 ; X64-SSE-NEXT: shrl $6, %edx
238 ; X64-SSE-NEXT: andl $7, %edx
239 ; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1
240 ; X64-SSE-NEXT: movl %ecx, %edx
241 ; X64-SSE-NEXT: shrl $9, %edx
242 ; X64-SSE-NEXT: andl $7, %edx
243 ; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1
244 ; X64-SSE-NEXT: movl %ecx, %edx
245 ; X64-SSE-NEXT: shrl $12, %edx
246 ; X64-SSE-NEXT: andl $7, %edx
247 ; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1
248 ; X64-SSE-NEXT: shrl $15, %ecx
249 ; X64-SSE-NEXT: movzwl %cx, %ecx
250 ; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1
251 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
252 ; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1
253 ; X64-SSE-NEXT: pand %xmm0, %xmm1
254 ; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
255 ; X64-SSE-NEXT: andl $15, %eax
256 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
257 ; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
258 ; X64-SSE-NEXT: andl $15, %eax
259 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
260 ; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
261 ; X64-SSE-NEXT: andl $15, %eax
262 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
263 ; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
264 ; X64-SSE-NEXT: andl $15, %eax
265 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
266 ; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
267 ; X64-SSE-NEXT: andl $15, %eax
268 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
269 ; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
270 ; X64-SSE-NEXT: andl $15, %eax
271 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
272 ; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
273 ; X64-SSE-NEXT: andl $15, %eax
274 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
275 ; X64-SSE-NEXT: movd %xmm1, %eax
276 ; X64-SSE-NEXT: andl $15, %eax
277 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
278 ; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
279 ; X64-SSE-NEXT: shll $16, %ecx
280 ; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
281 ; X64-SSE-NEXT: orl %ecx, %eax
282 ; X64-SSE-NEXT: retq
283 %1 = bitcast i24 %a to <8 x i3>
284 %2 = bitcast i24 %b to <8 x i3>
285 %3 = and <8 x i3> %1, %2
286 %4 = bitcast <8 x i3> %3 to i24
287 ret i24 %4
288 }
289
290 define i24 @xor_i24_as_v8i3(i24 %a, i24 %b) nounwind {
291 ; X32-SSE-LABEL: xor_i24_as_v8i3:
292 ; X32-SSE: # BB#0:
293 ; X32-SSE-NEXT: pushl %ebp
294 ; X32-SSE-NEXT: movl %esp, %ebp
295 ; X32-SSE-NEXT: andl $-8, %esp
296 ; X32-SSE-NEXT: subl $24, %esp
297 ; X32-SSE-NEXT: movl 12(%ebp), %eax
298 ; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
299 ; X32-SSE-NEXT: shrl $16, %eax
300 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
301 ; X32-SSE-NEXT: movl 8(%ebp), %eax
302 ; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
303 ; X32-SSE-NEXT: shrl $16, %eax
304 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
305 ; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
306 ; X32-SSE-NEXT: movl %eax, %ecx
307 ; X32-SSE-NEXT: shrl $3, %ecx
308 ; X32-SSE-NEXT: andl $7, %ecx
309 ; X32-SSE-NEXT: movl %eax, %edx
310 ; X32-SSE-NEXT: andl $7, %edx
311 ; X32-SSE-NEXT: movd %edx, %xmm1
312 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
313 ; X32-SSE-NEXT: movl %eax, %ecx
314 ; X32-SSE-NEXT: shrl $6, %ecx
315 ; X32-SSE-NEXT: andl $7, %ecx
316 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
317 ; X32-SSE-NEXT: movl %eax, %ecx
318 ; X32-SSE-NEXT: shrl $9, %ecx
319 ; X32-SSE-NEXT: andl $7, %ecx
320 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
321 ; X32-SSE-NEXT: movl %eax, %ecx
322 ; X32-SSE-NEXT: shrl $12, %ecx
323 ; X32-SSE-NEXT: andl $7, %ecx
324 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
325 ; X32-SSE-NEXT: shrl $15, %eax
326 ; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
327 ; X32-SSE-NEXT: pxor %xmm2, %xmm2
328 ; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
329 ; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
330 ; X32-SSE-NEXT: movl %eax, %ecx
331 ; X32-SSE-NEXT: shrl $3, %ecx
332 ; X32-SSE-NEXT: andl $7, %ecx
333 ; X32-SSE-NEXT: movl %eax, %edx
334 ; X32-SSE-NEXT: andl $7, %edx
335 ; X32-SSE-NEXT: movd %edx, %xmm0
336 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
337 ; X32-SSE-NEXT: movl %eax, %ecx
338 ; X32-SSE-NEXT: shrl $6, %ecx
339 ; X32-SSE-NEXT: andl $7, %ecx
340 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
341 ; X32-SSE-NEXT: movl %eax, %ecx
342 ; X32-SSE-NEXT: shrl $9, %ecx
343 ; X32-SSE-NEXT: andl $7, %ecx
344 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
345 ; X32-SSE-NEXT: movl %eax, %ecx
346 ; X32-SSE-NEXT: shrl $12, %ecx
347 ; X32-SSE-NEXT: andl $7, %ecx
348 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
349 ; X32-SSE-NEXT: shrl $15, %eax
350 ; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0
351 ; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7]
352 ; X32-SSE-NEXT: pxor %xmm1, %xmm0
353 ; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
354 ; X32-SSE-NEXT: andl $15, %eax
355 ; X32-SSE-NEXT: movb %al, (%esp)
356 ; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
357 ; X32-SSE-NEXT: andl $15, %eax
358 ; X32-SSE-NEXT: movb %al, (%esp)
359 ; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
360 ; X32-SSE-NEXT: andl $15, %eax
361 ; X32-SSE-NEXT: movb %al, (%esp)
362 ; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
363 ; X32-SSE-NEXT: andl $15, %eax
364 ; X32-SSE-NEXT: movb %al, (%esp)
365 ; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
366 ; X32-SSE-NEXT: andl $15, %eax
367 ; X32-SSE-NEXT: movb %al, (%esp)
368 ; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
369 ; X32-SSE-NEXT: andl $15, %eax
370 ; X32-SSE-NEXT: movb %al, (%esp)
371 ; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
372 ; X32-SSE-NEXT: andl $15, %eax
373 ; X32-SSE-NEXT: movb %al, (%esp)
374 ; X32-SSE-NEXT: movd %xmm0, %eax
375 ; X32-SSE-NEXT: andl $15, %eax
376 ; X32-SSE-NEXT: movb %al, (%esp)
377 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
378 ; X32-SSE-NEXT: shll $16, %ecx
379 ; X32-SSE-NEXT: movzwl (%esp), %eax
380 ; X32-SSE-NEXT: orl %ecx, %eax
381 ; X32-SSE-NEXT: movl %ebp, %esp
382 ; X32-SSE-NEXT: popl %ebp
383 ; X32-SSE-NEXT: retl
384 ;
385 ; X64-SSE-LABEL: xor_i24_as_v8i3:
386 ; X64-SSE: # BB#0:
387 ; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp)
388 ; X64-SSE-NEXT: shrl $16, %esi
389 ; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
390 ; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp)
391 ; X64-SSE-NEXT: shrl $16, %edi
392 ; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
393 ; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
394 ; X64-SSE-NEXT: movl %eax, %ecx
395 ; X64-SSE-NEXT: shrl $3, %ecx
396 ; X64-SSE-NEXT: andl $7, %ecx
397 ; X64-SSE-NEXT: movl %eax, %edx
398 ; X64-SSE-NEXT: andl $7, %edx
399 ; X64-SSE-NEXT: movd %edx, %xmm0
400 ; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
401 ; X64-SSE-NEXT: movl %eax, %ecx
402 ; X64-SSE-NEXT: shrl $6, %ecx
403 ; X64-SSE-NEXT: andl $7, %ecx
404 ; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
405 ; X64-SSE-NEXT: movl %eax, %ecx
406 ; X64-SSE-NEXT: shrl $9, %ecx
407 ; X64-SSE-NEXT: andl $7, %ecx
408 ; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
409 ; X64-SSE-NEXT: movl %eax, %ecx
410 ; X64-SSE-NEXT: shrl $12, %ecx
411 ; X64-SSE-NEXT: andl $7, %ecx
412 ; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
413 ; X64-SSE-NEXT: shrl $15, %eax
414 ; X64-SSE-NEXT: movzwl %ax, %eax
415 ; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
416 ; X64-SSE-NEXT: xorl %eax, %eax
417 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
418 ; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
419 ; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
420 ; X64-SSE-NEXT: movl %ecx, %edx
421 ; X64-SSE-NEXT: shrl $3, %edx
422 ; X64-SSE-NEXT: andl $7, %edx
423 ; X64-SSE-NEXT: movl %ecx, %esi
424 ; X64-SSE-NEXT: andl $7, %esi
425 ; X64-SSE-NEXT: movd %esi, %xmm1
426 ; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1
427 ; X64-SSE-NEXT: movl %ecx, %edx
428 ; X64-SSE-NEXT: shrl $6, %edx
429 ; X64-SSE-NEXT: andl $7, %edx
430 ; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1
431 ; X64-SSE-NEXT: movl %ecx, %edx
432 ; X64-SSE-NEXT: shrl $9, %edx
433 ; X64-SSE-NEXT: andl $7, %edx
434 ; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1
435 ; X64-SSE-NEXT: movl %ecx, %edx
436 ; X64-SSE-NEXT: shrl $12, %edx
437 ; X64-SSE-NEXT: andl $7, %edx
438 ; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1
439 ; X64-SSE-NEXT: shrl $15, %ecx
440 ; X64-SSE-NEXT: movzwl %cx, %ecx
441 ; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1
442 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
443 ; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1
444 ; X64-SSE-NEXT: pxor %xmm0, %xmm1
445 ; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
446 ; X64-SSE-NEXT: andl $15, %eax
447 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
448 ; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
449 ; X64-SSE-NEXT: andl $15, %eax
450 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
451 ; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
452 ; X64-SSE-NEXT: andl $15, %eax
453 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
454 ; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
455 ; X64-SSE-NEXT: andl $15, %eax
456 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
457 ; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
458 ; X64-SSE-NEXT: andl $15, %eax
459 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
460 ; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
461 ; X64-SSE-NEXT: andl $15, %eax
462 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
463 ; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
464 ; X64-SSE-NEXT: andl $15, %eax
465 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
466 ; X64-SSE-NEXT: movd %xmm1, %eax
467 ; X64-SSE-NEXT: andl $15, %eax
468 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
469 ; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
470 ; X64-SSE-NEXT: shll $16, %ecx
471 ; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
472 ; X64-SSE-NEXT: orl %ecx, %eax
473 ; X64-SSE-NEXT: retq
474 %1 = bitcast i24 %a to <8 x i3>
475 %2 = bitcast i24 %b to <8 x i3>
476 %3 = xor <8 x i3> %1, %2
477 %4 = bitcast <8 x i3> %3 to i24
478 ret i24 %4
479 }
480
481 define i24 @or_i24_as_v8i3(i24 %a, i24 %b) nounwind {
482 ; X32-SSE-LABEL: or_i24_as_v8i3:
483 ; X32-SSE: # BB#0:
484 ; X32-SSE-NEXT: pushl %ebp
485 ; X32-SSE-NEXT: movl %esp, %ebp
486 ; X32-SSE-NEXT: andl $-8, %esp
487 ; X32-SSE-NEXT: subl $24, %esp
488 ; X32-SSE-NEXT: movl 12(%ebp), %eax
489 ; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
490 ; X32-SSE-NEXT: shrl $16, %eax
491 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
492 ; X32-SSE-NEXT: movl 8(%ebp), %eax
493 ; X32-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
494 ; X32-SSE-NEXT: shrl $16, %eax
495 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
496 ; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
497 ; X32-SSE-NEXT: movl %eax, %ecx
498 ; X32-SSE-NEXT: shrl $3, %ecx
499 ; X32-SSE-NEXT: andl $7, %ecx
500 ; X32-SSE-NEXT: movl %eax, %edx
501 ; X32-SSE-NEXT: andl $7, %edx
502 ; X32-SSE-NEXT: movd %edx, %xmm1
503 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
504 ; X32-SSE-NEXT: movl %eax, %ecx
505 ; X32-SSE-NEXT: shrl $6, %ecx
506 ; X32-SSE-NEXT: andl $7, %ecx
507 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
508 ; X32-SSE-NEXT: movl %eax, %ecx
509 ; X32-SSE-NEXT: shrl $9, %ecx
510 ; X32-SSE-NEXT: andl $7, %ecx
511 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
512 ; X32-SSE-NEXT: movl %eax, %ecx
513 ; X32-SSE-NEXT: shrl $12, %ecx
514 ; X32-SSE-NEXT: andl $7, %ecx
515 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
516 ; X32-SSE-NEXT: shrl $15, %eax
517 ; X32-SSE-NEXT: pinsrw $5, %eax, %xmm1
518 ; X32-SSE-NEXT: pxor %xmm2, %xmm2
519 ; X32-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
520 ; X32-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
521 ; X32-SSE-NEXT: movl %eax, %ecx
522 ; X32-SSE-NEXT: shrl $3, %ecx
523 ; X32-SSE-NEXT: andl $7, %ecx
524 ; X32-SSE-NEXT: movl %eax, %edx
525 ; X32-SSE-NEXT: andl $7, %edx
526 ; X32-SSE-NEXT: movd %edx, %xmm0
527 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
528 ; X32-SSE-NEXT: movl %eax, %ecx
529 ; X32-SSE-NEXT: shrl $6, %ecx
530 ; X32-SSE-NEXT: andl $7, %ecx
531 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
532 ; X32-SSE-NEXT: movl %eax, %ecx
533 ; X32-SSE-NEXT: shrl $9, %ecx
534 ; X32-SSE-NEXT: andl $7, %ecx
535 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
536 ; X32-SSE-NEXT: movl %eax, %ecx
537 ; X32-SSE-NEXT: shrl $12, %ecx
538 ; X32-SSE-NEXT: andl $7, %ecx
539 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
540 ; X32-SSE-NEXT: shrl $15, %eax
541 ; X32-SSE-NEXT: pinsrw $5, %eax, %xmm0
542 ; X32-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm2[6,7]
543 ; X32-SSE-NEXT: por %xmm1, %xmm0
544 ; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
545 ; X32-SSE-NEXT: andl $15, %eax
546 ; X32-SSE-NEXT: movb %al, (%esp)
547 ; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
548 ; X32-SSE-NEXT: andl $15, %eax
549 ; X32-SSE-NEXT: movb %al, (%esp)
550 ; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
551 ; X32-SSE-NEXT: andl $15, %eax
552 ; X32-SSE-NEXT: movb %al, (%esp)
553 ; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
554 ; X32-SSE-NEXT: andl $15, %eax
555 ; X32-SSE-NEXT: movb %al, (%esp)
556 ; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
557 ; X32-SSE-NEXT: andl $15, %eax
558 ; X32-SSE-NEXT: movb %al, (%esp)
559 ; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
560 ; X32-SSE-NEXT: andl $15, %eax
561 ; X32-SSE-NEXT: movb %al, (%esp)
562 ; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
563 ; X32-SSE-NEXT: andl $15, %eax
564 ; X32-SSE-NEXT: movb %al, (%esp)
565 ; X32-SSE-NEXT: movd %xmm0, %eax
566 ; X32-SSE-NEXT: andl $15, %eax
567 ; X32-SSE-NEXT: movb %al, (%esp)
568 ; X32-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
569 ; X32-SSE-NEXT: shll $16, %ecx
570 ; X32-SSE-NEXT: movzwl (%esp), %eax
571 ; X32-SSE-NEXT: orl %ecx, %eax
572 ; X32-SSE-NEXT: movl %ebp, %esp
573 ; X32-SSE-NEXT: popl %ebp
574 ; X32-SSE-NEXT: retl
575 ;
576 ; X64-SSE-LABEL: or_i24_as_v8i3:
577 ; X64-SSE: # BB#0:
578 ; X64-SSE-NEXT: movw %si, -{{[0-9]+}}(%rsp)
579 ; X64-SSE-NEXT: shrl $16, %esi
580 ; X64-SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
581 ; X64-SSE-NEXT: movw %di, -{{[0-9]+}}(%rsp)
582 ; X64-SSE-NEXT: shrl $16, %edi
583 ; X64-SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
584 ; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
585 ; X64-SSE-NEXT: movl %eax, %ecx
586 ; X64-SSE-NEXT: shrl $3, %ecx
587 ; X64-SSE-NEXT: andl $7, %ecx
588 ; X64-SSE-NEXT: movl %eax, %edx
589 ; X64-SSE-NEXT: andl $7, %edx
590 ; X64-SSE-NEXT: movd %edx, %xmm0
591 ; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
592 ; X64-SSE-NEXT: movl %eax, %ecx
593 ; X64-SSE-NEXT: shrl $6, %ecx
594 ; X64-SSE-NEXT: andl $7, %ecx
595 ; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
596 ; X64-SSE-NEXT: movl %eax, %ecx
597 ; X64-SSE-NEXT: shrl $9, %ecx
598 ; X64-SSE-NEXT: andl $7, %ecx
599 ; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
600 ; X64-SSE-NEXT: movl %eax, %ecx
601 ; X64-SSE-NEXT: shrl $12, %ecx
602 ; X64-SSE-NEXT: andl $7, %ecx
603 ; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
604 ; X64-SSE-NEXT: shrl $15, %eax
605 ; X64-SSE-NEXT: movzwl %ax, %eax
606 ; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
607 ; X64-SSE-NEXT: xorl %eax, %eax
608 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
609 ; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
610 ; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
611 ; X64-SSE-NEXT: movl %ecx, %edx
612 ; X64-SSE-NEXT: shrl $3, %edx
613 ; X64-SSE-NEXT: andl $7, %edx
614 ; X64-SSE-NEXT: movl %ecx, %esi
615 ; X64-SSE-NEXT: andl $7, %esi
616 ; X64-SSE-NEXT: movd %esi, %xmm1
617 ; X64-SSE-NEXT: pinsrw $1, %edx, %xmm1
618 ; X64-SSE-NEXT: movl %ecx, %edx
619 ; X64-SSE-NEXT: shrl $6, %edx
620 ; X64-SSE-NEXT: andl $7, %edx
621 ; X64-SSE-NEXT: pinsrw $2, %edx, %xmm1
622 ; X64-SSE-NEXT: movl %ecx, %edx
623 ; X64-SSE-NEXT: shrl $9, %edx
624 ; X64-SSE-NEXT: andl $7, %edx
625 ; X64-SSE-NEXT: pinsrw $3, %edx, %xmm1
626 ; X64-SSE-NEXT: movl %ecx, %edx
627 ; X64-SSE-NEXT: shrl $12, %edx
628 ; X64-SSE-NEXT: andl $7, %edx
629 ; X64-SSE-NEXT: pinsrw $4, %edx, %xmm1
630 ; X64-SSE-NEXT: shrl $15, %ecx
631 ; X64-SSE-NEXT: movzwl %cx, %ecx
632 ; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm1
633 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
634 ; X64-SSE-NEXT: pinsrw $7, %eax, %xmm1
635 ; X64-SSE-NEXT: por %xmm0, %xmm1
636 ; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
637 ; X64-SSE-NEXT: andl $15, %eax
638 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
639 ; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
640 ; X64-SSE-NEXT: andl $15, %eax
641 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
642 ; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
643 ; X64-SSE-NEXT: andl $15, %eax
644 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
645 ; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
646 ; X64-SSE-NEXT: andl $15, %eax
647 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
648 ; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
649 ; X64-SSE-NEXT: andl $15, %eax
650 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
651 ; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
652 ; X64-SSE-NEXT: andl $15, %eax
653 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
654 ; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
655 ; X64-SSE-NEXT: andl $15, %eax
656 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
657 ; X64-SSE-NEXT: movd %xmm1, %eax
658 ; X64-SSE-NEXT: andl $15, %eax
659 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
660 ; X64-SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
661 ; X64-SSE-NEXT: shll $16, %ecx
662 ; X64-SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
663 ; X64-SSE-NEXT: orl %ecx, %eax
664 ; X64-SSE-NEXT: retq
665 %1 = bitcast i24 %a to <8 x i3>
666 %2 = bitcast i24 %b to <8 x i3>
667 %3 = or <8 x i3> %1, %2
668 %4 = bitcast <8 x i3> %3 to i24
669 ret i24 %4
670 }
671
672 ;
673 ; AND/XOR/OR v3i8 as i24
674 ;
675
676 define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
677 ; X32-SSE-LABEL: and_v3i8_as_i24:
678 ; X32-SSE: # BB#0:
679 ; X32-SSE-NEXT: subl $12, %esp
680 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
681 ; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
682 ; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
683 ; X32-SSE-NEXT: movd %xmm0, %eax
684 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
685 ; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
686 ; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
687 ; X32-SSE-NEXT: movd %xmm0, %ecx
688 ; X32-SSE-NEXT: andl %eax, %ecx
689 ; X32-SSE-NEXT: movd %ecx, %xmm0
690 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
691 ; X32-SSE-NEXT: pextrb $0, %xmm0, %eax
692 ; X32-SSE-NEXT: pextrb $4, %xmm0, %edx
693 ; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx
694 ; X32-SSE-NEXT: addl $12, %esp
695 ; X32-SSE-NEXT: retl
696 ;
697 ; X64-SSE-LABEL: and_v3i8_as_i24:
698 ; X64-SSE: # BB#0:
699 ; X64-SSE-NEXT: movd %ecx, %xmm0
700 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
701 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
702 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u>
703 ; X64-SSE-NEXT: pshufb %xmm1, %xmm0
704 ; X64-SSE-NEXT: movd %xmm0, %eax
705 ; X64-SSE-NEXT: movd %edi, %xmm0
706 ; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0
707 ; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0
708 ; X64-SSE-NEXT: pshufb %xmm1, %xmm0
709 ; X64-SSE-NEXT: movd %xmm0, %ecx
710 ; X64-SSE-NEXT: andl %eax, %ecx
711 ; X64-SSE-NEXT: movd %ecx, %xmm0
712 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
713 ; X64-SSE-NEXT: pextrb $0, %xmm0, %eax
714 ; X64-SSE-NEXT: pextrb $4, %xmm0, %edx
715 ; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx
716 ; X64-SSE-NEXT: retq
717 %1 = bitcast <3 x i8> %a to i24
718 %2 = bitcast <3 x i8> %b to i24
719 %3 = and i24 %1, %2
720 %4 = bitcast i24 %3 to <3 x i8>
721 ret <3 x i8> %4
722 }
723
724 define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
725 ; X32-SSE-LABEL: xor_v3i8_as_i24:
726 ; X32-SSE: # BB#0:
727 ; X32-SSE-NEXT: subl $12, %esp
728 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
729 ; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
730 ; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
731 ; X32-SSE-NEXT: movd %xmm0, %eax
732 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
733 ; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
734 ; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
735 ; X32-SSE-NEXT: movd %xmm0, %ecx
736 ; X32-SSE-NEXT: xorl %eax, %ecx
737 ; X32-SSE-NEXT: movd %ecx, %xmm0
738 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
739 ; X32-SSE-NEXT: pextrb $0, %xmm0, %eax
740 ; X32-SSE-NEXT: pextrb $4, %xmm0, %edx
741 ; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx
742 ; X32-SSE-NEXT: addl $12, %esp
743 ; X32-SSE-NEXT: retl
744 ;
745 ; X64-SSE-LABEL: xor_v3i8_as_i24:
746 ; X64-SSE: # BB#0:
747 ; X64-SSE-NEXT: movd %ecx, %xmm0
748 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
749 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
750 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u>
751 ; X64-SSE-NEXT: pshufb %xmm1, %xmm0
752 ; X64-SSE-NEXT: movd %xmm0, %eax
753 ; X64-SSE-NEXT: movd %edi, %xmm0
754 ; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0
755 ; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0
756 ; X64-SSE-NEXT: pshufb %xmm1, %xmm0
757 ; X64-SSE-NEXT: movd %xmm0, %ecx
758 ; X64-SSE-NEXT: xorl %eax, %ecx
759 ; X64-SSE-NEXT: movd %ecx, %xmm0
760 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
761 ; X64-SSE-NEXT: pextrb $0, %xmm0, %eax
762 ; X64-SSE-NEXT: pextrb $4, %xmm0, %edx
763 ; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx
764 ; X64-SSE-NEXT: retq
765 %1 = bitcast <3 x i8> %a to i24
766 %2 = bitcast <3 x i8> %b to i24
767 %3 = xor i24 %1, %2
768 %4 = bitcast i24 %3 to <3 x i8>
769 ret <3 x i8> %4
770 }
771
772 define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
773 ; X32-SSE-LABEL: or_v3i8_as_i24:
774 ; X32-SSE: # BB#0:
775 ; X32-SSE-NEXT: subl $12, %esp
776 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
777 ; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
778 ; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
779 ; X32-SSE-NEXT: movd %xmm0, %eax
780 ; X32-SSE-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
781 ; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
782 ; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
783 ; X32-SSE-NEXT: movd %xmm0, %ecx
784 ; X32-SSE-NEXT: orl %eax, %ecx
785 ; X32-SSE-NEXT: movd %ecx, %xmm0
786 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
787 ; X32-SSE-NEXT: pextrb $0, %xmm0, %eax
788 ; X32-SSE-NEXT: pextrb $4, %xmm0, %edx
789 ; X32-SSE-NEXT: pextrb $8, %xmm0, %ecx
790 ; X32-SSE-NEXT: addl $12, %esp
791 ; X32-SSE-NEXT: retl
792 ;
793 ; X64-SSE-LABEL: or_v3i8_as_i24:
794 ; X64-SSE: # BB#0:
795 ; X64-SSE-NEXT: movd %ecx, %xmm0
796 ; X64-SSE-NEXT: pinsrd $1, %r8d, %xmm0
797 ; X64-SSE-NEXT: pinsrd $2, %r9d, %xmm0
798 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,4,8,128,u,u,u,u,u,u,u,u,u,u,u,u>
799 ; X64-SSE-NEXT: pshufb %xmm1, %xmm0
800 ; X64-SSE-NEXT: movd %xmm0, %eax
801 ; X64-SSE-NEXT: movd %edi, %xmm0
802 ; X64-SSE-NEXT: pinsrd $1, %esi, %xmm0
803 ; X64-SSE-NEXT: pinsrd $2, %edx, %xmm0
804 ; X64-SSE-NEXT: pshufb %xmm1, %xmm0
805 ; X64-SSE-NEXT: movd %xmm0, %ecx
806 ; X64-SSE-NEXT: orl %eax, %ecx
807 ; X64-SSE-NEXT: movd %ecx, %xmm0
808 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
809 ; X64-SSE-NEXT: pextrb $0, %xmm0, %eax
810 ; X64-SSE-NEXT: pextrb $4, %xmm0, %edx
811 ; X64-SSE-NEXT: pextrb $8, %xmm0, %ecx
812 ; X64-SSE-NEXT: retq
813 %1 = bitcast <3 x i8> %a to i24
814 %2 = bitcast <3 x i8> %b to i24
815 %3 = or i24 %1, %2
816 %4 = bitcast i24 %3 to <3 x i8>
817 ret <3 x i8> %4
818 }
819
820 ;
821 ; AND/XOR/OR v8i3 as i24
822 ;
823
91824 define <8 x i3> @and_v8i3_as_i24(<8 x i3> %a, <8 x i3> %b) nounwind {
92825 ; X32-SSE-LABEL: and_v8i3_as_i24:
93826 ; X32-SSE: # BB#0: