llvm.org GIT mirror llvm / 942c2d3
[X86] Added extra widening tests for and/xor/or bit operations Add tests for bitcasting an illegal vector to/from a legal scalar Additional tests requested for D18944 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265930 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
1 changed file(s) with 1315 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32-SSE --check-prefix=X32-SSE42
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE --check-prefix=X64-SSE42
3
4 ;
5 ; AND/XOR/OR i32 as v4i8
6 ;
7
8 define i32 @and_i32_as_v4i8(i32 %a, i32 %b) nounwind {
9 ; X32-SSE-LABEL: and_i32_as_v4i8:
10 ; X32-SSE: # BB#0:
11 ; X32-SSE-NEXT: pushl %eax
12 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
13 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
14 ; X32-SSE-NEXT: pand %xmm0, %xmm1
15 ; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
16 ; X32-SSE-NEXT: movd %xmm1, %eax
17 ; X32-SSE-NEXT: popl %ecx
18 ; X32-SSE-NEXT: retl
19 ;
20 ; X64-SSE-LABEL: and_i32_as_v4i8:
21 ; X64-SSE: # BB#0:
22 ; X64-SSE-NEXT: movd %esi, %xmm0
23 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
24 ; X64-SSE-NEXT: movd %edi, %xmm1
25 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
26 ; X64-SSE-NEXT: pand %xmm0, %xmm1
27 ; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
28 ; X64-SSE-NEXT: movd %xmm1, %eax
29 ; X64-SSE-NEXT: retq
30 %1 = bitcast i32 %a to <4 x i8>
31 %2 = bitcast i32 %b to <4 x i8>
32 %3 = and <4 x i8> %1, %2
33 %4 = bitcast <4 x i8> %3 to i32
34 ret i32 %4
35 }
36
37 define i32 @xor_i32_as_v4i8(i32 %a, i32 %b) nounwind {
38 ; X32-SSE-LABEL: xor_i32_as_v4i8:
39 ; X32-SSE: # BB#0:
40 ; X32-SSE-NEXT: pushl %eax
41 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
42 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
43 ; X32-SSE-NEXT: pxor %xmm0, %xmm1
44 ; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
45 ; X32-SSE-NEXT: movd %xmm1, %eax
46 ; X32-SSE-NEXT: popl %ecx
47 ; X32-SSE-NEXT: retl
48 ;
49 ; X64-SSE-LABEL: xor_i32_as_v4i8:
50 ; X64-SSE: # BB#0:
51 ; X64-SSE-NEXT: movd %esi, %xmm0
52 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
53 ; X64-SSE-NEXT: movd %edi, %xmm1
54 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
55 ; X64-SSE-NEXT: pxor %xmm0, %xmm1
56 ; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
57 ; X64-SSE-NEXT: movd %xmm1, %eax
58 ; X64-SSE-NEXT: retq
59 %1 = bitcast i32 %a to <4 x i8>
60 %2 = bitcast i32 %b to <4 x i8>
61 %3 = xor <4 x i8> %1, %2
62 %4 = bitcast <4 x i8> %3 to i32
63 ret i32 %4
64 }
65
66 define i32 @or_i32_as_v4i8(i32 %a, i32 %b) nounwind {
67 ; X32-SSE-LABEL: or_i32_as_v4i8:
68 ; X32-SSE: # BB#0:
69 ; X32-SSE-NEXT: pushl %eax
70 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
71 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
72 ; X32-SSE-NEXT: por %xmm0, %xmm1
73 ; X32-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
74 ; X32-SSE-NEXT: movd %xmm1, %eax
75 ; X32-SSE-NEXT: popl %ecx
76 ; X32-SSE-NEXT: retl
77 ;
78 ; X64-SSE-LABEL: or_i32_as_v4i8:
79 ; X64-SSE: # BB#0:
80 ; X64-SSE-NEXT: movd %esi, %xmm0
81 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
82 ; X64-SSE-NEXT: movd %edi, %xmm1
83 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
84 ; X64-SSE-NEXT: por %xmm0, %xmm1
85 ; X64-SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
86 ; X64-SSE-NEXT: movd %xmm1, %eax
87 ; X64-SSE-NEXT: retq
88 %1 = bitcast i32 %a to <4 x i8>
89 %2 = bitcast i32 %b to <4 x i8>
90 %3 = or <4 x i8> %1, %2
91 %4 = bitcast <4 x i8> %3 to i32
92 ret i32 %4
93 }
94
95 ;
96 ; AND/XOR/OR i32 as v8i4
97 ;
98
99 define i32 @and_i32_as_v8i4(i32 %a, i32 %b) nounwind {
100 ; X32-SSE-LABEL: and_i32_as_v8i4:
101 ; X32-SSE: # BB#0:
102 ; X32-SSE-NEXT: pushl %ebp
103 ; X32-SSE-NEXT: movl %esp, %ebp
104 ; X32-SSE-NEXT: andl $-8, %esp
105 ; X32-SSE-NEXT: subl $24, %esp
106 ; X32-SSE-NEXT: movl 12(%ebp), %eax
107 ; X32-SSE-NEXT: movl %eax, %ecx
108 ; X32-SSE-NEXT: shrl $4, %ecx
109 ; X32-SSE-NEXT: andl $15, %ecx
110 ; X32-SSE-NEXT: movl %eax, %edx
111 ; X32-SSE-NEXT: andl $15, %edx
112 ; X32-SSE-NEXT: movd %edx, %xmm0
113 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
114 ; X32-SSE-NEXT: movl %eax, %ecx
115 ; X32-SSE-NEXT: shrl $8, %ecx
116 ; X32-SSE-NEXT: andl $15, %ecx
117 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
118 ; X32-SSE-NEXT: movl %eax, %ecx
119 ; X32-SSE-NEXT: shrl $12, %ecx
120 ; X32-SSE-NEXT: andl $15, %ecx
121 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
122 ; X32-SSE-NEXT: movl %eax, %ecx
123 ; X32-SSE-NEXT: shrl $16, %ecx
124 ; X32-SSE-NEXT: andl $15, %ecx
125 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
126 ; X32-SSE-NEXT: movl %eax, %ecx
127 ; X32-SSE-NEXT: shrl $20, %ecx
128 ; X32-SSE-NEXT: andl $15, %ecx
129 ; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
130 ; X32-SSE-NEXT: movl %eax, %ecx
131 ; X32-SSE-NEXT: shrl $24, %ecx
132 ; X32-SSE-NEXT: andl $15, %ecx
133 ; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
134 ; X32-SSE-NEXT: shrl $28, %eax
135 ; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
136 ; X32-SSE-NEXT: movl 8(%ebp), %eax
137 ; X32-SSE-NEXT: movl %eax, %ecx
138 ; X32-SSE-NEXT: shrl $4, %ecx
139 ; X32-SSE-NEXT: andl $15, %ecx
140 ; X32-SSE-NEXT: movl %eax, %edx
141 ; X32-SSE-NEXT: andl $15, %edx
142 ; X32-SSE-NEXT: movd %edx, %xmm1
143 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
144 ; X32-SSE-NEXT: movl %eax, %ecx
145 ; X32-SSE-NEXT: shrl $8, %ecx
146 ; X32-SSE-NEXT: andl $15, %ecx
147 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
148 ; X32-SSE-NEXT: movl %eax, %ecx
149 ; X32-SSE-NEXT: shrl $12, %ecx
150 ; X32-SSE-NEXT: andl $15, %ecx
151 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
152 ; X32-SSE-NEXT: movl %eax, %ecx
153 ; X32-SSE-NEXT: shrl $16, %ecx
154 ; X32-SSE-NEXT: andl $15, %ecx
155 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
156 ; X32-SSE-NEXT: movl %eax, %ecx
157 ; X32-SSE-NEXT: shrl $20, %ecx
158 ; X32-SSE-NEXT: andl $15, %ecx
159 ; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm1
160 ; X32-SSE-NEXT: movl %eax, %ecx
161 ; X32-SSE-NEXT: shrl $24, %ecx
162 ; X32-SSE-NEXT: andl $15, %ecx
163 ; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm1
164 ; X32-SSE-NEXT: shrl $28, %eax
165 ; X32-SSE-NEXT: pinsrw $7, %eax, %xmm1
166 ; X32-SSE-NEXT: pand %xmm0, %xmm1
167 ; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
168 ; X32-SSE-NEXT: andl $15, %eax
169 ; X32-SSE-NEXT: movb %al, (%esp)
170 ; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
171 ; X32-SSE-NEXT: andl $15, %eax
172 ; X32-SSE-NEXT: movb %al, (%esp)
173 ; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
174 ; X32-SSE-NEXT: andl $15, %eax
175 ; X32-SSE-NEXT: movb %al, (%esp)
176 ; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
177 ; X32-SSE-NEXT: andl $15, %eax
178 ; X32-SSE-NEXT: movb %al, (%esp)
179 ; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
180 ; X32-SSE-NEXT: andl $15, %eax
181 ; X32-SSE-NEXT: movb %al, (%esp)
182 ; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
183 ; X32-SSE-NEXT: andl $15, %eax
184 ; X32-SSE-NEXT: movb %al, (%esp)
185 ; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
186 ; X32-SSE-NEXT: andl $15, %eax
187 ; X32-SSE-NEXT: movb %al, (%esp)
188 ; X32-SSE-NEXT: movd %xmm1, %eax
189 ; X32-SSE-NEXT: andl $15, %eax
190 ; X32-SSE-NEXT: movb %al, (%esp)
191 ; X32-SSE-NEXT: movl (%esp), %eax
192 ; X32-SSE-NEXT: movl %ebp, %esp
193 ; X32-SSE-NEXT: popl %ebp
194 ; X32-SSE-NEXT: retl
195 ;
196 ; X64-SSE-LABEL: and_i32_as_v8i4:
197 ; X64-SSE: # BB#0:
198 ; X64-SSE-NEXT: movl %esi, %eax
199 ; X64-SSE-NEXT: shrl $4, %eax
200 ; X64-SSE-NEXT: andl $15, %eax
201 ; X64-SSE-NEXT: movl %esi, %ecx
202 ; X64-SSE-NEXT: andl $15, %ecx
203 ; X64-SSE-NEXT: movd %ecx, %xmm0
204 ; X64-SSE-NEXT: pinsrw $1, %eax, %xmm0
205 ; X64-SSE-NEXT: movl %esi, %eax
206 ; X64-SSE-NEXT: shrl $8, %eax
207 ; X64-SSE-NEXT: andl $15, %eax
208 ; X64-SSE-NEXT: pinsrw $2, %eax, %xmm0
209 ; X64-SSE-NEXT: movl %esi, %eax
210 ; X64-SSE-NEXT: shrl $12, %eax
211 ; X64-SSE-NEXT: andl $15, %eax
212 ; X64-SSE-NEXT: pinsrw $3, %eax, %xmm0
213 ; X64-SSE-NEXT: movl %esi, %eax
214 ; X64-SSE-NEXT: shrl $16, %eax
215 ; X64-SSE-NEXT: andl $15, %eax
216 ; X64-SSE-NEXT: pinsrw $4, %eax, %xmm0
217 ; X64-SSE-NEXT: movl %esi, %eax
218 ; X64-SSE-NEXT: shrl $20, %eax
219 ; X64-SSE-NEXT: andl $15, %eax
220 ; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
221 ; X64-SSE-NEXT: movl %esi, %eax
222 ; X64-SSE-NEXT: shrl $24, %eax
223 ; X64-SSE-NEXT: andl $15, %eax
224 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
225 ; X64-SSE-NEXT: shrl $28, %esi
226 ; X64-SSE-NEXT: pinsrw $7, %esi, %xmm0
227 ; X64-SSE-NEXT: movl %edi, %eax
228 ; X64-SSE-NEXT: shrl $4, %eax
229 ; X64-SSE-NEXT: andl $15, %eax
230 ; X64-SSE-NEXT: movl %edi, %ecx
231 ; X64-SSE-NEXT: andl $15, %ecx
232 ; X64-SSE-NEXT: movd %ecx, %xmm1
233 ; X64-SSE-NEXT: pinsrw $1, %eax, %xmm1
234 ; X64-SSE-NEXT: movl %edi, %eax
235 ; X64-SSE-NEXT: shrl $8, %eax
236 ; X64-SSE-NEXT: andl $15, %eax
237 ; X64-SSE-NEXT: pinsrw $2, %eax, %xmm1
238 ; X64-SSE-NEXT: movl %edi, %eax
239 ; X64-SSE-NEXT: shrl $12, %eax
240 ; X64-SSE-NEXT: andl $15, %eax
241 ; X64-SSE-NEXT: pinsrw $3, %eax, %xmm1
242 ; X64-SSE-NEXT: movl %edi, %eax
243 ; X64-SSE-NEXT: shrl $16, %eax
244 ; X64-SSE-NEXT: andl $15, %eax
245 ; X64-SSE-NEXT: pinsrw $4, %eax, %xmm1
246 ; X64-SSE-NEXT: movl %edi, %eax
247 ; X64-SSE-NEXT: shrl $20, %eax
248 ; X64-SSE-NEXT: andl $15, %eax
249 ; X64-SSE-NEXT: pinsrw $5, %eax, %xmm1
250 ; X64-SSE-NEXT: movl %edi, %eax
251 ; X64-SSE-NEXT: shrl $24, %eax
252 ; X64-SSE-NEXT: andl $15, %eax
253 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
254 ; X64-SSE-NEXT: shrl $28, %edi
255 ; X64-SSE-NEXT: pinsrw $7, %edi, %xmm1
256 ; X64-SSE-NEXT: pand %xmm0, %xmm1
257 ; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
258 ; X64-SSE-NEXT: andl $15, %eax
259 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
260 ; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
261 ; X64-SSE-NEXT: andl $15, %eax
262 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
263 ; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
264 ; X64-SSE-NEXT: andl $15, %eax
265 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
266 ; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
267 ; X64-SSE-NEXT: andl $15, %eax
268 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
269 ; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
270 ; X64-SSE-NEXT: andl $15, %eax
271 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
272 ; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
273 ; X64-SSE-NEXT: andl $15, %eax
274 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
275 ; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
276 ; X64-SSE-NEXT: andl $15, %eax
277 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
278 ; X64-SSE-NEXT: movd %xmm1, %eax
279 ; X64-SSE-NEXT: andl $15, %eax
280 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
281 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
282 ; X64-SSE-NEXT: retq
283 %1 = bitcast i32 %a to <8 x i4>
284 %2 = bitcast i32 %b to <8 x i4>
285 %3 = and <8 x i4> %1, %2
286 %4 = bitcast <8 x i4> %3 to i32
287 ret i32 %4
288 }
289
290 define i32 @xor_i32_as_v8i4(i32 %a, i32 %b) nounwind {
291 ; X32-SSE-LABEL: xor_i32_as_v8i4:
292 ; X32-SSE: # BB#0:
293 ; X32-SSE-NEXT: pushl %ebp
294 ; X32-SSE-NEXT: movl %esp, %ebp
295 ; X32-SSE-NEXT: andl $-8, %esp
296 ; X32-SSE-NEXT: subl $24, %esp
297 ; X32-SSE-NEXT: movl 12(%ebp), %eax
298 ; X32-SSE-NEXT: movl %eax, %ecx
299 ; X32-SSE-NEXT: shrl $4, %ecx
300 ; X32-SSE-NEXT: andl $15, %ecx
301 ; X32-SSE-NEXT: movl %eax, %edx
302 ; X32-SSE-NEXT: andl $15, %edx
303 ; X32-SSE-NEXT: movd %edx, %xmm0
304 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
305 ; X32-SSE-NEXT: movl %eax, %ecx
306 ; X32-SSE-NEXT: shrl $8, %ecx
307 ; X32-SSE-NEXT: andl $15, %ecx
308 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
309 ; X32-SSE-NEXT: movl %eax, %ecx
310 ; X32-SSE-NEXT: shrl $12, %ecx
311 ; X32-SSE-NEXT: andl $15, %ecx
312 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
313 ; X32-SSE-NEXT: movl %eax, %ecx
314 ; X32-SSE-NEXT: shrl $16, %ecx
315 ; X32-SSE-NEXT: andl $15, %ecx
316 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
317 ; X32-SSE-NEXT: movl %eax, %ecx
318 ; X32-SSE-NEXT: shrl $20, %ecx
319 ; X32-SSE-NEXT: andl $15, %ecx
320 ; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
321 ; X32-SSE-NEXT: movl %eax, %ecx
322 ; X32-SSE-NEXT: shrl $24, %ecx
323 ; X32-SSE-NEXT: andl $15, %ecx
324 ; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
325 ; X32-SSE-NEXT: shrl $28, %eax
326 ; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
327 ; X32-SSE-NEXT: movl 8(%ebp), %eax
328 ; X32-SSE-NEXT: movl %eax, %ecx
329 ; X32-SSE-NEXT: shrl $4, %ecx
330 ; X32-SSE-NEXT: andl $15, %ecx
331 ; X32-SSE-NEXT: movl %eax, %edx
332 ; X32-SSE-NEXT: andl $15, %edx
333 ; X32-SSE-NEXT: movd %edx, %xmm1
334 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
335 ; X32-SSE-NEXT: movl %eax, %ecx
336 ; X32-SSE-NEXT: shrl $8, %ecx
337 ; X32-SSE-NEXT: andl $15, %ecx
338 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
339 ; X32-SSE-NEXT: movl %eax, %ecx
340 ; X32-SSE-NEXT: shrl $12, %ecx
341 ; X32-SSE-NEXT: andl $15, %ecx
342 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
343 ; X32-SSE-NEXT: movl %eax, %ecx
344 ; X32-SSE-NEXT: shrl $16, %ecx
345 ; X32-SSE-NEXT: andl $15, %ecx
346 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
347 ; X32-SSE-NEXT: movl %eax, %ecx
348 ; X32-SSE-NEXT: shrl $20, %ecx
349 ; X32-SSE-NEXT: andl $15, %ecx
350 ; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm1
351 ; X32-SSE-NEXT: movl %eax, %ecx
352 ; X32-SSE-NEXT: shrl $24, %ecx
353 ; X32-SSE-NEXT: andl $15, %ecx
354 ; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm1
355 ; X32-SSE-NEXT: shrl $28, %eax
356 ; X32-SSE-NEXT: pinsrw $7, %eax, %xmm1
357 ; X32-SSE-NEXT: pxor %xmm0, %xmm1
358 ; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
359 ; X32-SSE-NEXT: andl $15, %eax
360 ; X32-SSE-NEXT: movb %al, (%esp)
361 ; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
362 ; X32-SSE-NEXT: andl $15, %eax
363 ; X32-SSE-NEXT: movb %al, (%esp)
364 ; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
365 ; X32-SSE-NEXT: andl $15, %eax
366 ; X32-SSE-NEXT: movb %al, (%esp)
367 ; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
368 ; X32-SSE-NEXT: andl $15, %eax
369 ; X32-SSE-NEXT: movb %al, (%esp)
370 ; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
371 ; X32-SSE-NEXT: andl $15, %eax
372 ; X32-SSE-NEXT: movb %al, (%esp)
373 ; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
374 ; X32-SSE-NEXT: andl $15, %eax
375 ; X32-SSE-NEXT: movb %al, (%esp)
376 ; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
377 ; X32-SSE-NEXT: andl $15, %eax
378 ; X32-SSE-NEXT: movb %al, (%esp)
379 ; X32-SSE-NEXT: movd %xmm1, %eax
380 ; X32-SSE-NEXT: andl $15, %eax
381 ; X32-SSE-NEXT: movb %al, (%esp)
382 ; X32-SSE-NEXT: movl (%esp), %eax
383 ; X32-SSE-NEXT: movl %ebp, %esp
384 ; X32-SSE-NEXT: popl %ebp
385 ; X32-SSE-NEXT: retl
386 ;
387 ; X64-SSE-LABEL: xor_i32_as_v8i4:
388 ; X64-SSE: # BB#0:
389 ; X64-SSE-NEXT: movl %esi, %eax
390 ; X64-SSE-NEXT: shrl $4, %eax
391 ; X64-SSE-NEXT: andl $15, %eax
392 ; X64-SSE-NEXT: movl %esi, %ecx
393 ; X64-SSE-NEXT: andl $15, %ecx
394 ; X64-SSE-NEXT: movd %ecx, %xmm0
395 ; X64-SSE-NEXT: pinsrw $1, %eax, %xmm0
396 ; X64-SSE-NEXT: movl %esi, %eax
397 ; X64-SSE-NEXT: shrl $8, %eax
398 ; X64-SSE-NEXT: andl $15, %eax
399 ; X64-SSE-NEXT: pinsrw $2, %eax, %xmm0
400 ; X64-SSE-NEXT: movl %esi, %eax
401 ; X64-SSE-NEXT: shrl $12, %eax
402 ; X64-SSE-NEXT: andl $15, %eax
403 ; X64-SSE-NEXT: pinsrw $3, %eax, %xmm0
404 ; X64-SSE-NEXT: movl %esi, %eax
405 ; X64-SSE-NEXT: shrl $16, %eax
406 ; X64-SSE-NEXT: andl $15, %eax
407 ; X64-SSE-NEXT: pinsrw $4, %eax, %xmm0
408 ; X64-SSE-NEXT: movl %esi, %eax
409 ; X64-SSE-NEXT: shrl $20, %eax
410 ; X64-SSE-NEXT: andl $15, %eax
411 ; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
412 ; X64-SSE-NEXT: movl %esi, %eax
413 ; X64-SSE-NEXT: shrl $24, %eax
414 ; X64-SSE-NEXT: andl $15, %eax
415 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
416 ; X64-SSE-NEXT: shrl $28, %esi
417 ; X64-SSE-NEXT: pinsrw $7, %esi, %xmm0
418 ; X64-SSE-NEXT: movl %edi, %eax
419 ; X64-SSE-NEXT: shrl $4, %eax
420 ; X64-SSE-NEXT: andl $15, %eax
421 ; X64-SSE-NEXT: movl %edi, %ecx
422 ; X64-SSE-NEXT: andl $15, %ecx
423 ; X64-SSE-NEXT: movd %ecx, %xmm1
424 ; X64-SSE-NEXT: pinsrw $1, %eax, %xmm1
425 ; X64-SSE-NEXT: movl %edi, %eax
426 ; X64-SSE-NEXT: shrl $8, %eax
427 ; X64-SSE-NEXT: andl $15, %eax
428 ; X64-SSE-NEXT: pinsrw $2, %eax, %xmm1
429 ; X64-SSE-NEXT: movl %edi, %eax
430 ; X64-SSE-NEXT: shrl $12, %eax
431 ; X64-SSE-NEXT: andl $15, %eax
432 ; X64-SSE-NEXT: pinsrw $3, %eax, %xmm1
433 ; X64-SSE-NEXT: movl %edi, %eax
434 ; X64-SSE-NEXT: shrl $16, %eax
435 ; X64-SSE-NEXT: andl $15, %eax
436 ; X64-SSE-NEXT: pinsrw $4, %eax, %xmm1
437 ; X64-SSE-NEXT: movl %edi, %eax
438 ; X64-SSE-NEXT: shrl $20, %eax
439 ; X64-SSE-NEXT: andl $15, %eax
440 ; X64-SSE-NEXT: pinsrw $5, %eax, %xmm1
441 ; X64-SSE-NEXT: movl %edi, %eax
442 ; X64-SSE-NEXT: shrl $24, %eax
443 ; X64-SSE-NEXT: andl $15, %eax
444 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
445 ; X64-SSE-NEXT: shrl $28, %edi
446 ; X64-SSE-NEXT: pinsrw $7, %edi, %xmm1
447 ; X64-SSE-NEXT: pxor %xmm0, %xmm1
448 ; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
449 ; X64-SSE-NEXT: andl $15, %eax
450 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
451 ; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
452 ; X64-SSE-NEXT: andl $15, %eax
453 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
454 ; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
455 ; X64-SSE-NEXT: andl $15, %eax
456 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
457 ; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
458 ; X64-SSE-NEXT: andl $15, %eax
459 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
460 ; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
461 ; X64-SSE-NEXT: andl $15, %eax
462 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
463 ; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
464 ; X64-SSE-NEXT: andl $15, %eax
465 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
466 ; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
467 ; X64-SSE-NEXT: andl $15, %eax
468 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
469 ; X64-SSE-NEXT: movd %xmm1, %eax
470 ; X64-SSE-NEXT: andl $15, %eax
471 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
472 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
473 ; X64-SSE-NEXT: retq
474 %1 = bitcast i32 %a to <8 x i4>
475 %2 = bitcast i32 %b to <8 x i4>
476 %3 = xor <8 x i4> %1, %2
477 %4 = bitcast <8 x i4> %3 to i32
478 ret i32 %4
479 }
480
481 define i32 @or_i32_as_v8i4(i32 %a, i32 %b) nounwind {
482 ; X32-SSE-LABEL: or_i32_as_v8i4:
483 ; X32-SSE: # BB#0:
484 ; X32-SSE-NEXT: pushl %ebp
485 ; X32-SSE-NEXT: movl %esp, %ebp
486 ; X32-SSE-NEXT: andl $-8, %esp
487 ; X32-SSE-NEXT: subl $24, %esp
488 ; X32-SSE-NEXT: movl 12(%ebp), %eax
489 ; X32-SSE-NEXT: movl %eax, %ecx
490 ; X32-SSE-NEXT: shrl $4, %ecx
491 ; X32-SSE-NEXT: andl $15, %ecx
492 ; X32-SSE-NEXT: movl %eax, %edx
493 ; X32-SSE-NEXT: andl $15, %edx
494 ; X32-SSE-NEXT: movd %edx, %xmm0
495 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
496 ; X32-SSE-NEXT: movl %eax, %ecx
497 ; X32-SSE-NEXT: shrl $8, %ecx
498 ; X32-SSE-NEXT: andl $15, %ecx
499 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
500 ; X32-SSE-NEXT: movl %eax, %ecx
501 ; X32-SSE-NEXT: shrl $12, %ecx
502 ; X32-SSE-NEXT: andl $15, %ecx
503 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
504 ; X32-SSE-NEXT: movl %eax, %ecx
505 ; X32-SSE-NEXT: shrl $16, %ecx
506 ; X32-SSE-NEXT: andl $15, %ecx
507 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
508 ; X32-SSE-NEXT: movl %eax, %ecx
509 ; X32-SSE-NEXT: shrl $20, %ecx
510 ; X32-SSE-NEXT: andl $15, %ecx
511 ; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
512 ; X32-SSE-NEXT: movl %eax, %ecx
513 ; X32-SSE-NEXT: shrl $24, %ecx
514 ; X32-SSE-NEXT: andl $15, %ecx
515 ; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
516 ; X32-SSE-NEXT: shrl $28, %eax
517 ; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
518 ; X32-SSE-NEXT: movl 8(%ebp), %eax
519 ; X32-SSE-NEXT: movl %eax, %ecx
520 ; X32-SSE-NEXT: shrl $4, %ecx
521 ; X32-SSE-NEXT: andl $15, %ecx
522 ; X32-SSE-NEXT: movl %eax, %edx
523 ; X32-SSE-NEXT: andl $15, %edx
524 ; X32-SSE-NEXT: movd %edx, %xmm1
525 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm1
526 ; X32-SSE-NEXT: movl %eax, %ecx
527 ; X32-SSE-NEXT: shrl $8, %ecx
528 ; X32-SSE-NEXT: andl $15, %ecx
529 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm1
530 ; X32-SSE-NEXT: movl %eax, %ecx
531 ; X32-SSE-NEXT: shrl $12, %ecx
532 ; X32-SSE-NEXT: andl $15, %ecx
533 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm1
534 ; X32-SSE-NEXT: movl %eax, %ecx
535 ; X32-SSE-NEXT: shrl $16, %ecx
536 ; X32-SSE-NEXT: andl $15, %ecx
537 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm1
538 ; X32-SSE-NEXT: movl %eax, %ecx
539 ; X32-SSE-NEXT: shrl $20, %ecx
540 ; X32-SSE-NEXT: andl $15, %ecx
541 ; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm1
542 ; X32-SSE-NEXT: movl %eax, %ecx
543 ; X32-SSE-NEXT: shrl $24, %ecx
544 ; X32-SSE-NEXT: andl $15, %ecx
545 ; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm1
546 ; X32-SSE-NEXT: shrl $28, %eax
547 ; X32-SSE-NEXT: pinsrw $7, %eax, %xmm1
548 ; X32-SSE-NEXT: por %xmm0, %xmm1
549 ; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
550 ; X32-SSE-NEXT: andl $15, %eax
551 ; X32-SSE-NEXT: movb %al, (%esp)
552 ; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
553 ; X32-SSE-NEXT: andl $15, %eax
554 ; X32-SSE-NEXT: movb %al, (%esp)
555 ; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
556 ; X32-SSE-NEXT: andl $15, %eax
557 ; X32-SSE-NEXT: movb %al, (%esp)
558 ; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
559 ; X32-SSE-NEXT: andl $15, %eax
560 ; X32-SSE-NEXT: movb %al, (%esp)
561 ; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
562 ; X32-SSE-NEXT: andl $15, %eax
563 ; X32-SSE-NEXT: movb %al, (%esp)
564 ; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
565 ; X32-SSE-NEXT: andl $15, %eax
566 ; X32-SSE-NEXT: movb %al, (%esp)
567 ; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
568 ; X32-SSE-NEXT: andl $15, %eax
569 ; X32-SSE-NEXT: movb %al, (%esp)
570 ; X32-SSE-NEXT: movd %xmm1, %eax
571 ; X32-SSE-NEXT: andl $15, %eax
572 ; X32-SSE-NEXT: movb %al, (%esp)
573 ; X32-SSE-NEXT: movl (%esp), %eax
574 ; X32-SSE-NEXT: movl %ebp, %esp
575 ; X32-SSE-NEXT: popl %ebp
576 ; X32-SSE-NEXT: retl
577 ;
578 ; X64-SSE-LABEL: or_i32_as_v8i4:
579 ; X64-SSE: # BB#0:
580 ; X64-SSE-NEXT: movl %esi, %eax
581 ; X64-SSE-NEXT: shrl $4, %eax
582 ; X64-SSE-NEXT: andl $15, %eax
583 ; X64-SSE-NEXT: movl %esi, %ecx
584 ; X64-SSE-NEXT: andl $15, %ecx
585 ; X64-SSE-NEXT: movd %ecx, %xmm0
586 ; X64-SSE-NEXT: pinsrw $1, %eax, %xmm0
587 ; X64-SSE-NEXT: movl %esi, %eax
588 ; X64-SSE-NEXT: shrl $8, %eax
589 ; X64-SSE-NEXT: andl $15, %eax
590 ; X64-SSE-NEXT: pinsrw $2, %eax, %xmm0
591 ; X64-SSE-NEXT: movl %esi, %eax
592 ; X64-SSE-NEXT: shrl $12, %eax
593 ; X64-SSE-NEXT: andl $15, %eax
594 ; X64-SSE-NEXT: pinsrw $3, %eax, %xmm0
595 ; X64-SSE-NEXT: movl %esi, %eax
596 ; X64-SSE-NEXT: shrl $16, %eax
597 ; X64-SSE-NEXT: andl $15, %eax
598 ; X64-SSE-NEXT: pinsrw $4, %eax, %xmm0
599 ; X64-SSE-NEXT: movl %esi, %eax
600 ; X64-SSE-NEXT: shrl $20, %eax
601 ; X64-SSE-NEXT: andl $15, %eax
602 ; X64-SSE-NEXT: pinsrw $5, %eax, %xmm0
603 ; X64-SSE-NEXT: movl %esi, %eax
604 ; X64-SSE-NEXT: shrl $24, %eax
605 ; X64-SSE-NEXT: andl $15, %eax
606 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm0
607 ; X64-SSE-NEXT: shrl $28, %esi
608 ; X64-SSE-NEXT: pinsrw $7, %esi, %xmm0
609 ; X64-SSE-NEXT: movl %edi, %eax
610 ; X64-SSE-NEXT: shrl $4, %eax
611 ; X64-SSE-NEXT: andl $15, %eax
612 ; X64-SSE-NEXT: movl %edi, %ecx
613 ; X64-SSE-NEXT: andl $15, %ecx
614 ; X64-SSE-NEXT: movd %ecx, %xmm1
615 ; X64-SSE-NEXT: pinsrw $1, %eax, %xmm1
616 ; X64-SSE-NEXT: movl %edi, %eax
617 ; X64-SSE-NEXT: shrl $8, %eax
618 ; X64-SSE-NEXT: andl $15, %eax
619 ; X64-SSE-NEXT: pinsrw $2, %eax, %xmm1
620 ; X64-SSE-NEXT: movl %edi, %eax
621 ; X64-SSE-NEXT: shrl $12, %eax
622 ; X64-SSE-NEXT: andl $15, %eax
623 ; X64-SSE-NEXT: pinsrw $3, %eax, %xmm1
624 ; X64-SSE-NEXT: movl %edi, %eax
625 ; X64-SSE-NEXT: shrl $16, %eax
626 ; X64-SSE-NEXT: andl $15, %eax
627 ; X64-SSE-NEXT: pinsrw $4, %eax, %xmm1
628 ; X64-SSE-NEXT: movl %edi, %eax
629 ; X64-SSE-NEXT: shrl $20, %eax
630 ; X64-SSE-NEXT: andl $15, %eax
631 ; X64-SSE-NEXT: pinsrw $5, %eax, %xmm1
632 ; X64-SSE-NEXT: movl %edi, %eax
633 ; X64-SSE-NEXT: shrl $24, %eax
634 ; X64-SSE-NEXT: andl $15, %eax
635 ; X64-SSE-NEXT: pinsrw $6, %eax, %xmm1
636 ; X64-SSE-NEXT: shrl $28, %edi
637 ; X64-SSE-NEXT: pinsrw $7, %edi, %xmm1
638 ; X64-SSE-NEXT: por %xmm0, %xmm1
639 ; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
640 ; X64-SSE-NEXT: andl $15, %eax
641 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
642 ; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
643 ; X64-SSE-NEXT: andl $15, %eax
644 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
645 ; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
646 ; X64-SSE-NEXT: andl $15, %eax
647 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
648 ; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
649 ; X64-SSE-NEXT: andl $15, %eax
650 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
651 ; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
652 ; X64-SSE-NEXT: andl $15, %eax
653 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
654 ; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
655 ; X64-SSE-NEXT: andl $15, %eax
656 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
657 ; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
658 ; X64-SSE-NEXT: andl $15, %eax
659 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
660 ; X64-SSE-NEXT: movd %xmm1, %eax
661 ; X64-SSE-NEXT: andl $15, %eax
662 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
663 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
664 ; X64-SSE-NEXT: retq
665 %1 = bitcast i32 %a to <8 x i4>
666 %2 = bitcast i32 %b to <8 x i4>
667 %3 = or <8 x i4> %1, %2
668 %4 = bitcast <8 x i4> %3 to i32
669 ret i32 %4
670 }
671
672 ;
673 ; AND/XOR/OR v4i8 as i32
674 ;
675
676 define <4 x i8> @and_v4i8_as_i32(<4 x i8> %a, <4 x i8> %b) nounwind {
677 ; X32-SSE-LABEL: and_v4i8_as_i32:
678 ; X32-SSE: # BB#0:
679 ; X32-SSE-NEXT: subl $12, %esp
680 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
681 ; X32-SSE-NEXT: pshufb %xmm2, %xmm1
682 ; X32-SSE-NEXT: movd %xmm1, %eax
683 ; X32-SSE-NEXT: pshufb %xmm2, %xmm0
684 ; X32-SSE-NEXT: movd %xmm0, %ecx
685 ; X32-SSE-NEXT: andl %eax, %ecx
686 ; X32-SSE-NEXT: movd %ecx, %xmm0
687 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
688 ; X32-SSE-NEXT: addl $12, %esp
689 ; X32-SSE-NEXT: retl
690 ;
691 ; X64-SSE-LABEL: and_v4i8_as_i32:
692 ; X64-SSE: # BB#0:
693 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
694 ; X64-SSE-NEXT: pshufb %xmm2, %xmm1
695 ; X64-SSE-NEXT: movd %xmm1, %eax
696 ; X64-SSE-NEXT: pshufb %xmm2, %xmm0
697 ; X64-SSE-NEXT: movd %xmm0, %ecx
698 ; X64-SSE-NEXT: andl %eax, %ecx
699 ; X64-SSE-NEXT: movd %ecx, %xmm0
700 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
701 ; X64-SSE-NEXT: retq
702 %1 = bitcast <4 x i8> %a to i32
703 %2 = bitcast <4 x i8> %b to i32
704 %3 = and i32 %1, %2
705 %4 = bitcast i32 %3 to <4 x i8>
706 ret <4 x i8> %4
707 }
708
709 define <4 x i8> @xor_v4i8_as_i32(<4 x i8> %a, <4 x i8> %b) nounwind {
710 ; X32-SSE-LABEL: xor_v4i8_as_i32:
711 ; X32-SSE: # BB#0:
712 ; X32-SSE-NEXT: subl $12, %esp
713 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
714 ; X32-SSE-NEXT: pshufb %xmm2, %xmm1
715 ; X32-SSE-NEXT: movd %xmm1, %eax
716 ; X32-SSE-NEXT: pshufb %xmm2, %xmm0
717 ; X32-SSE-NEXT: movd %xmm0, %ecx
718 ; X32-SSE-NEXT: xorl %eax, %ecx
719 ; X32-SSE-NEXT: movd %ecx, %xmm0
720 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
721 ; X32-SSE-NEXT: addl $12, %esp
722 ; X32-SSE-NEXT: retl
723 ;
724 ; X64-SSE-LABEL: xor_v4i8_as_i32:
725 ; X64-SSE: # BB#0:
726 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
727 ; X64-SSE-NEXT: pshufb %xmm2, %xmm1
728 ; X64-SSE-NEXT: movd %xmm1, %eax
729 ; X64-SSE-NEXT: pshufb %xmm2, %xmm0
730 ; X64-SSE-NEXT: movd %xmm0, %ecx
731 ; X64-SSE-NEXT: xorl %eax, %ecx
732 ; X64-SSE-NEXT: movd %ecx, %xmm0
733 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
734 ; X64-SSE-NEXT: retq
735 %1 = bitcast <4 x i8> %a to i32
736 %2 = bitcast <4 x i8> %b to i32
737 %3 = xor i32 %1, %2
738 %4 = bitcast i32 %3 to <4 x i8>
739 ret <4 x i8> %4
740 }
741
742 define <4 x i8> @or_v4i8_as_i32(<4 x i8> %a, <4 x i8> %b) nounwind {
743 ; X32-SSE-LABEL: or_v4i8_as_i32:
744 ; X32-SSE: # BB#0:
745 ; X32-SSE-NEXT: subl $12, %esp
746 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
747 ; X32-SSE-NEXT: pshufb %xmm2, %xmm1
748 ; X32-SSE-NEXT: movd %xmm1, %eax
749 ; X32-SSE-NEXT: pshufb %xmm2, %xmm0
750 ; X32-SSE-NEXT: movd %xmm0, %ecx
751 ; X32-SSE-NEXT: orl %eax, %ecx
752 ; X32-SSE-NEXT: movd %ecx, %xmm0
753 ; X32-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
754 ; X32-SSE-NEXT: addl $12, %esp
755 ; X32-SSE-NEXT: retl
756 ;
757 ; X64-SSE-LABEL: or_v4i8_as_i32:
758 ; X64-SSE: # BB#0:
759 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
760 ; X64-SSE-NEXT: pshufb %xmm2, %xmm1
761 ; X64-SSE-NEXT: movd %xmm1, %eax
762 ; X64-SSE-NEXT: pshufb %xmm2, %xmm0
763 ; X64-SSE-NEXT: movd %xmm0, %ecx
764 ; X64-SSE-NEXT: orl %eax, %ecx
765 ; X64-SSE-NEXT: movd %ecx, %xmm0
766 ; X64-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
767 ; X64-SSE-NEXT: retq
768 %1 = bitcast <4 x i8> %a to i32
769 %2 = bitcast <4 x i8> %b to i32
770 %3 = or i32 %1, %2
771 %4 = bitcast i32 %3 to <4 x i8>
772 ret <4 x i8> %4
773 }
774
775 ;
776 ; AND/XOR/OR v8i4 as i32
777 ;
778
779 define <8 x i4> @and_v8i4_as_i32(<8 x i4> %a, <8 x i4> %b) nounwind {
780 ; X32-SSE-LABEL: and_v8i4_as_i32:
781 ; X32-SSE: # BB#0:
782 ; X32-SSE-NEXT: pushl %ebp
783 ; X32-SSE-NEXT: movl %esp, %ebp
784 ; X32-SSE-NEXT: andl $-8, %esp
785 ; X32-SSE-NEXT: subl $24, %esp
786 ; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
787 ; X32-SSE-NEXT: andl $15, %eax
788 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
789 ; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
790 ; X32-SSE-NEXT: andl $15, %eax
791 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
792 ; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
793 ; X32-SSE-NEXT: andl $15, %eax
794 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
795 ; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
796 ; X32-SSE-NEXT: andl $15, %eax
797 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
798 ; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
799 ; X32-SSE-NEXT: andl $15, %eax
800 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
801 ; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
802 ; X32-SSE-NEXT: andl $15, %eax
803 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
804 ; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
805 ; X32-SSE-NEXT: andl $15, %eax
806 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
807 ; X32-SSE-NEXT: movd %xmm0, %eax
808 ; X32-SSE-NEXT: andl $15, %eax
809 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
810 ; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
811 ; X32-SSE-NEXT: andl $15, %eax
812 ; X32-SSE-NEXT: movb %al, (%esp)
813 ; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
814 ; X32-SSE-NEXT: andl $15, %eax
815 ; X32-SSE-NEXT: movb %al, (%esp)
816 ; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
817 ; X32-SSE-NEXT: andl $15, %eax
818 ; X32-SSE-NEXT: movb %al, (%esp)
819 ; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
820 ; X32-SSE-NEXT: andl $15, %eax
821 ; X32-SSE-NEXT: movb %al, (%esp)
822 ; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
823 ; X32-SSE-NEXT: andl $15, %eax
824 ; X32-SSE-NEXT: movb %al, (%esp)
825 ; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
826 ; X32-SSE-NEXT: andl $15, %eax
827 ; X32-SSE-NEXT: movb %al, (%esp)
828 ; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
829 ; X32-SSE-NEXT: andl $15, %eax
830 ; X32-SSE-NEXT: movb %al, (%esp)
831 ; X32-SSE-NEXT: movd %xmm1, %eax
832 ; X32-SSE-NEXT: andl $15, %eax
833 ; X32-SSE-NEXT: movb %al, (%esp)
834 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
835 ; X32-SSE-NEXT: andl (%esp), %eax
836 ; X32-SSE-NEXT: movl %eax, %ecx
837 ; X32-SSE-NEXT: shrl $4, %ecx
838 ; X32-SSE-NEXT: andl $15, %ecx
839 ; X32-SSE-NEXT: movl %eax, %edx
840 ; X32-SSE-NEXT: andl $15, %edx
841 ; X32-SSE-NEXT: movd %edx, %xmm0
842 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
843 ; X32-SSE-NEXT: movl %eax, %ecx
844 ; X32-SSE-NEXT: shrl $8, %ecx
845 ; X32-SSE-NEXT: andl $15, %ecx
846 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
847 ; X32-SSE-NEXT: movl %eax, %ecx
848 ; X32-SSE-NEXT: shrl $12, %ecx
849 ; X32-SSE-NEXT: andl $15, %ecx
850 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
851 ; X32-SSE-NEXT: movl %eax, %ecx
852 ; X32-SSE-NEXT: shrl $16, %ecx
853 ; X32-SSE-NEXT: andl $15, %ecx
854 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
855 ; X32-SSE-NEXT: movl %eax, %ecx
856 ; X32-SSE-NEXT: shrl $20, %ecx
857 ; X32-SSE-NEXT: andl $15, %ecx
858 ; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
859 ; X32-SSE-NEXT: movl %eax, %ecx
860 ; X32-SSE-NEXT: shrl $24, %ecx
861 ; X32-SSE-NEXT: andl $15, %ecx
862 ; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
863 ; X32-SSE-NEXT: shrl $28, %eax
864 ; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
865 ; X32-SSE-NEXT: movl %ebp, %esp
866 ; X32-SSE-NEXT: popl %ebp
867 ; X32-SSE-NEXT: retl
868 ;
869 ; X64-SSE-LABEL: and_v8i4_as_i32:
870 ; X64-SSE: # BB#0:
871 ; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
872 ; X64-SSE-NEXT: andl $15, %eax
873 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
874 ; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
875 ; X64-SSE-NEXT: andl $15, %eax
876 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
877 ; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
878 ; X64-SSE-NEXT: andl $15, %eax
879 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
880 ; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
881 ; X64-SSE-NEXT: andl $15, %eax
882 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
883 ; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
884 ; X64-SSE-NEXT: andl $15, %eax
885 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
886 ; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
887 ; X64-SSE-NEXT: andl $15, %eax
888 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
889 ; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
890 ; X64-SSE-NEXT: andl $15, %eax
891 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
892 ; X64-SSE-NEXT: movd %xmm0, %eax
893 ; X64-SSE-NEXT: andl $15, %eax
894 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
895 ; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
896 ; X64-SSE-NEXT: andl $15, %eax
897 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
898 ; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
899 ; X64-SSE-NEXT: andl $15, %eax
900 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
901 ; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
902 ; X64-SSE-NEXT: andl $15, %eax
903 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
904 ; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
905 ; X64-SSE-NEXT: andl $15, %eax
906 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
907 ; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
908 ; X64-SSE-NEXT: andl $15, %eax
909 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
910 ; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
911 ; X64-SSE-NEXT: andl $15, %eax
912 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
913 ; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
914 ; X64-SSE-NEXT: andl $15, %eax
915 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
916 ; X64-SSE-NEXT: movd %xmm1, %eax
917 ; X64-SSE-NEXT: andl $15, %eax
918 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
919 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
920 ; X64-SSE-NEXT: andl -{{[0-9]+}}(%rsp), %eax
921 ; X64-SSE-NEXT: movl %eax, %ecx
922 ; X64-SSE-NEXT: shrl $4, %ecx
923 ; X64-SSE-NEXT: andl $15, %ecx
924 ; X64-SSE-NEXT: movl %eax, %edx
925 ; X64-SSE-NEXT: andl $15, %edx
926 ; X64-SSE-NEXT: movd %edx, %xmm0
927 ; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
928 ; X64-SSE-NEXT: movl %eax, %ecx
929 ; X64-SSE-NEXT: shrl $8, %ecx
930 ; X64-SSE-NEXT: andl $15, %ecx
931 ; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
932 ; X64-SSE-NEXT: movl %eax, %ecx
933 ; X64-SSE-NEXT: shrl $12, %ecx
934 ; X64-SSE-NEXT: andl $15, %ecx
935 ; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
936 ; X64-SSE-NEXT: movl %eax, %ecx
937 ; X64-SSE-NEXT: shrl $16, %ecx
938 ; X64-SSE-NEXT: andl $15, %ecx
939 ; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
940 ; X64-SSE-NEXT: movl %eax, %ecx
941 ; X64-SSE-NEXT: shrl $20, %ecx
942 ; X64-SSE-NEXT: andl $15, %ecx
943 ; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm0
944 ; X64-SSE-NEXT: movl %eax, %ecx
945 ; X64-SSE-NEXT: shrl $24, %ecx
946 ; X64-SSE-NEXT: andl $15, %ecx
947 ; X64-SSE-NEXT: pinsrw $6, %ecx, %xmm0
948 ; X64-SSE-NEXT: shrl $28, %eax
949 ; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
950 ; X64-SSE-NEXT: retq
951 %1 = bitcast <8 x i4> %a to i32
952 %2 = bitcast <8 x i4> %b to i32
953 %3 = and i32 %1, %2
954 %4 = bitcast i32 %3 to <8 x i4>
955 ret <8 x i4> %4
956 }
957
958 define <8 x i4> @xor_v8i4_as_i32(<8 x i4> %a, <8 x i4> %b) nounwind {
959 ; X32-SSE-LABEL: xor_v8i4_as_i32:
960 ; X32-SSE: # BB#0:
961 ; X32-SSE-NEXT: pushl %ebp
962 ; X32-SSE-NEXT: movl %esp, %ebp
963 ; X32-SSE-NEXT: andl $-8, %esp
964 ; X32-SSE-NEXT: subl $24, %esp
965 ; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
966 ; X32-SSE-NEXT: andl $15, %eax
967 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
968 ; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
969 ; X32-SSE-NEXT: andl $15, %eax
970 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
971 ; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
972 ; X32-SSE-NEXT: andl $15, %eax
973 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
974 ; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
975 ; X32-SSE-NEXT: andl $15, %eax
976 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
977 ; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
978 ; X32-SSE-NEXT: andl $15, %eax
979 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
980 ; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
981 ; X32-SSE-NEXT: andl $15, %eax
982 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
983 ; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
984 ; X32-SSE-NEXT: andl $15, %eax
985 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
986 ; X32-SSE-NEXT: movd %xmm0, %eax
987 ; X32-SSE-NEXT: andl $15, %eax
988 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
989 ; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
990 ; X32-SSE-NEXT: andl $15, %eax
991 ; X32-SSE-NEXT: movb %al, (%esp)
992 ; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
993 ; X32-SSE-NEXT: andl $15, %eax
994 ; X32-SSE-NEXT: movb %al, (%esp)
995 ; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
996 ; X32-SSE-NEXT: andl $15, %eax
997 ; X32-SSE-NEXT: movb %al, (%esp)
998 ; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
999 ; X32-SSE-NEXT: andl $15, %eax
1000 ; X32-SSE-NEXT: movb %al, (%esp)
1001 ; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
1002 ; X32-SSE-NEXT: andl $15, %eax
1003 ; X32-SSE-NEXT: movb %al, (%esp)
1004 ; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
1005 ; X32-SSE-NEXT: andl $15, %eax
1006 ; X32-SSE-NEXT: movb %al, (%esp)
1007 ; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
1008 ; X32-SSE-NEXT: andl $15, %eax
1009 ; X32-SSE-NEXT: movb %al, (%esp)
1010 ; X32-SSE-NEXT: movd %xmm1, %eax
1011 ; X32-SSE-NEXT: andl $15, %eax
1012 ; X32-SSE-NEXT: movb %al, (%esp)
1013 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
1014 ; X32-SSE-NEXT: xorl (%esp), %eax
1015 ; X32-SSE-NEXT: movl %eax, %ecx
1016 ; X32-SSE-NEXT: shrl $4, %ecx
1017 ; X32-SSE-NEXT: andl $15, %ecx
1018 ; X32-SSE-NEXT: movl %eax, %edx
1019 ; X32-SSE-NEXT: andl $15, %edx
1020 ; X32-SSE-NEXT: movd %edx, %xmm0
1021 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
1022 ; X32-SSE-NEXT: movl %eax, %ecx
1023 ; X32-SSE-NEXT: shrl $8, %ecx
1024 ; X32-SSE-NEXT: andl $15, %ecx
1025 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
1026 ; X32-SSE-NEXT: movl %eax, %ecx
1027 ; X32-SSE-NEXT: shrl $12, %ecx
1028 ; X32-SSE-NEXT: andl $15, %ecx
1029 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
1030 ; X32-SSE-NEXT: movl %eax, %ecx
1031 ; X32-SSE-NEXT: shrl $16, %ecx
1032 ; X32-SSE-NEXT: andl $15, %ecx
1033 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
1034 ; X32-SSE-NEXT: movl %eax, %ecx
1035 ; X32-SSE-NEXT: shrl $20, %ecx
1036 ; X32-SSE-NEXT: andl $15, %ecx
1037 ; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
1038 ; X32-SSE-NEXT: movl %eax, %ecx
1039 ; X32-SSE-NEXT: shrl $24, %ecx
1040 ; X32-SSE-NEXT: andl $15, %ecx
1041 ; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
1042 ; X32-SSE-NEXT: shrl $28, %eax
1043 ; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
1044 ; X32-SSE-NEXT: movl %ebp, %esp
1045 ; X32-SSE-NEXT: popl %ebp
1046 ; X32-SSE-NEXT: retl
1047 ;
1048 ; X64-SSE-LABEL: xor_v8i4_as_i32:
1049 ; X64-SSE: # BB#0:
1050 ; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
1051 ; X64-SSE-NEXT: andl $15, %eax
1052 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1053 ; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
1054 ; X64-SSE-NEXT: andl $15, %eax
1055 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1056 ; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
1057 ; X64-SSE-NEXT: andl $15, %eax
1058 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1059 ; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
1060 ; X64-SSE-NEXT: andl $15, %eax
1061 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1062 ; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
1063 ; X64-SSE-NEXT: andl $15, %eax
1064 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1065 ; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
1066 ; X64-SSE-NEXT: andl $15, %eax
1067 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1068 ; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
1069 ; X64-SSE-NEXT: andl $15, %eax
1070 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1071 ; X64-SSE-NEXT: movd %xmm0, %eax
1072 ; X64-SSE-NEXT: andl $15, %eax
1073 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1074 ; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
1075 ; X64-SSE-NEXT: andl $15, %eax
1076 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1077 ; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
1078 ; X64-SSE-NEXT: andl $15, %eax
1079 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1080 ; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
1081 ; X64-SSE-NEXT: andl $15, %eax
1082 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1083 ; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
1084 ; X64-SSE-NEXT: andl $15, %eax
1085 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1086 ; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
1087 ; X64-SSE-NEXT: andl $15, %eax
1088 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1089 ; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
1090 ; X64-SSE-NEXT: andl $15, %eax
1091 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1092 ; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
1093 ; X64-SSE-NEXT: andl $15, %eax
1094 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1095 ; X64-SSE-NEXT: movd %xmm1, %eax
1096 ; X64-SSE-NEXT: andl $15, %eax
1097 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1098 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
1099 ; X64-SSE-NEXT: xorl -{{[0-9]+}}(%rsp), %eax
1100 ; X64-SSE-NEXT: movl %eax, %ecx
1101 ; X64-SSE-NEXT: shrl $4, %ecx
1102 ; X64-SSE-NEXT: andl $15, %ecx
1103 ; X64-SSE-NEXT: movl %eax, %edx
1104 ; X64-SSE-NEXT: andl $15, %edx
1105 ; X64-SSE-NEXT: movd %edx, %xmm0
1106 ; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
1107 ; X64-SSE-NEXT: movl %eax, %ecx
1108 ; X64-SSE-NEXT: shrl $8, %ecx
1109 ; X64-SSE-NEXT: andl $15, %ecx
1110 ; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
1111 ; X64-SSE-NEXT: movl %eax, %ecx
1112 ; X64-SSE-NEXT: shrl $12, %ecx
1113 ; X64-SSE-NEXT: andl $15, %ecx
1114 ; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
1115 ; X64-SSE-NEXT: movl %eax, %ecx
1116 ; X64-SSE-NEXT: shrl $16, %ecx
1117 ; X64-SSE-NEXT: andl $15, %ecx
1118 ; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
1119 ; X64-SSE-NEXT: movl %eax, %ecx
1120 ; X64-SSE-NEXT: shrl $20, %ecx
1121 ; X64-SSE-NEXT: andl $15, %ecx
1122 ; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm0
1123 ; X64-SSE-NEXT: movl %eax, %ecx
1124 ; X64-SSE-NEXT: shrl $24, %ecx
1125 ; X64-SSE-NEXT: andl $15, %ecx
1126 ; X64-SSE-NEXT: pinsrw $6, %ecx, %xmm0
1127 ; X64-SSE-NEXT: shrl $28, %eax
1128 ; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
1129 ; X64-SSE-NEXT: retq
1130 %1 = bitcast <8 x i4> %a to i32
1131 %2 = bitcast <8 x i4> %b to i32
1132 %3 = xor i32 %1, %2
1133 %4 = bitcast i32 %3 to <8 x i4>
1134 ret <8 x i4> %4
1135 }
1136
1137 define <8 x i4> @or_v8i4_as_i32(<8 x i4> %a, <8 x i4> %b) nounwind {
1138 ; X32-SSE-LABEL: or_v8i4_as_i32:
1139 ; X32-SSE: # BB#0:
1140 ; X32-SSE-NEXT: pushl %ebp
1141 ; X32-SSE-NEXT: movl %esp, %ebp
1142 ; X32-SSE-NEXT: andl $-8, %esp
1143 ; X32-SSE-NEXT: subl $24, %esp
1144 ; X32-SSE-NEXT: pextrw $7, %xmm0, %eax
1145 ; X32-SSE-NEXT: andl $15, %eax
1146 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
1147 ; X32-SSE-NEXT: pextrw $6, %xmm0, %eax
1148 ; X32-SSE-NEXT: andl $15, %eax
1149 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
1150 ; X32-SSE-NEXT: pextrw $5, %xmm0, %eax
1151 ; X32-SSE-NEXT: andl $15, %eax
1152 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
1153 ; X32-SSE-NEXT: pextrw $4, %xmm0, %eax
1154 ; X32-SSE-NEXT: andl $15, %eax
1155 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
1156 ; X32-SSE-NEXT: pextrw $3, %xmm0, %eax
1157 ; X32-SSE-NEXT: andl $15, %eax
1158 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
1159 ; X32-SSE-NEXT: pextrw $2, %xmm0, %eax
1160 ; X32-SSE-NEXT: andl $15, %eax
1161 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
1162 ; X32-SSE-NEXT: pextrw $1, %xmm0, %eax
1163 ; X32-SSE-NEXT: andl $15, %eax
1164 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
1165 ; X32-SSE-NEXT: movd %xmm0, %eax
1166 ; X32-SSE-NEXT: andl $15, %eax
1167 ; X32-SSE-NEXT: movb %al, {{[0-9]+}}(%esp)
1168 ; X32-SSE-NEXT: pextrw $7, %xmm1, %eax
1169 ; X32-SSE-NEXT: andl $15, %eax
1170 ; X32-SSE-NEXT: movb %al, (%esp)
1171 ; X32-SSE-NEXT: pextrw $6, %xmm1, %eax
1172 ; X32-SSE-NEXT: andl $15, %eax
1173 ; X32-SSE-NEXT: movb %al, (%esp)
1174 ; X32-SSE-NEXT: pextrw $5, %xmm1, %eax
1175 ; X32-SSE-NEXT: andl $15, %eax
1176 ; X32-SSE-NEXT: movb %al, (%esp)
1177 ; X32-SSE-NEXT: pextrw $4, %xmm1, %eax
1178 ; X32-SSE-NEXT: andl $15, %eax
1179 ; X32-SSE-NEXT: movb %al, (%esp)
1180 ; X32-SSE-NEXT: pextrw $3, %xmm1, %eax
1181 ; X32-SSE-NEXT: andl $15, %eax
1182 ; X32-SSE-NEXT: movb %al, (%esp)
1183 ; X32-SSE-NEXT: pextrw $2, %xmm1, %eax
1184 ; X32-SSE-NEXT: andl $15, %eax
1185 ; X32-SSE-NEXT: movb %al, (%esp)
1186 ; X32-SSE-NEXT: pextrw $1, %xmm1, %eax
1187 ; X32-SSE-NEXT: andl $15, %eax
1188 ; X32-SSE-NEXT: movb %al, (%esp)
1189 ; X32-SSE-NEXT: movd %xmm1, %eax
1190 ; X32-SSE-NEXT: andl $15, %eax
1191 ; X32-SSE-NEXT: movb %al, (%esp)
1192 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
1193 ; X32-SSE-NEXT: orl (%esp), %eax
1194 ; X32-SSE-NEXT: movl %eax, %ecx
1195 ; X32-SSE-NEXT: shrl $4, %ecx
1196 ; X32-SSE-NEXT: andl $15, %ecx
1197 ; X32-SSE-NEXT: movl %eax, %edx
1198 ; X32-SSE-NEXT: andl $15, %edx
1199 ; X32-SSE-NEXT: movd %edx, %xmm0
1200 ; X32-SSE-NEXT: pinsrw $1, %ecx, %xmm0
1201 ; X32-SSE-NEXT: movl %eax, %ecx
1202 ; X32-SSE-NEXT: shrl $8, %ecx
1203 ; X32-SSE-NEXT: andl $15, %ecx
1204 ; X32-SSE-NEXT: pinsrw $2, %ecx, %xmm0
1205 ; X32-SSE-NEXT: movl %eax, %ecx
1206 ; X32-SSE-NEXT: shrl $12, %ecx
1207 ; X32-SSE-NEXT: andl $15, %ecx
1208 ; X32-SSE-NEXT: pinsrw $3, %ecx, %xmm0
1209 ; X32-SSE-NEXT: movl %eax, %ecx
1210 ; X32-SSE-NEXT: shrl $16, %ecx
1211 ; X32-SSE-NEXT: andl $15, %ecx
1212 ; X32-SSE-NEXT: pinsrw $4, %ecx, %xmm0
1213 ; X32-SSE-NEXT: movl %eax, %ecx
1214 ; X32-SSE-NEXT: shrl $20, %ecx
1215 ; X32-SSE-NEXT: andl $15, %ecx
1216 ; X32-SSE-NEXT: pinsrw $5, %ecx, %xmm0
1217 ; X32-SSE-NEXT: movl %eax, %ecx
1218 ; X32-SSE-NEXT: shrl $24, %ecx
1219 ; X32-SSE-NEXT: andl $15, %ecx
1220 ; X32-SSE-NEXT: pinsrw $6, %ecx, %xmm0
1221 ; X32-SSE-NEXT: shrl $28, %eax
1222 ; X32-SSE-NEXT: pinsrw $7, %eax, %xmm0
1223 ; X32-SSE-NEXT: movl %ebp, %esp
1224 ; X32-SSE-NEXT: popl %ebp
1225 ; X32-SSE-NEXT: retl
1226 ;
1227 ; X64-SSE-LABEL: or_v8i4_as_i32:
1228 ; X64-SSE: # BB#0:
1229 ; X64-SSE-NEXT: pextrw $7, %xmm0, %eax
1230 ; X64-SSE-NEXT: andl $15, %eax
1231 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1232 ; X64-SSE-NEXT: pextrw $6, %xmm0, %eax
1233 ; X64-SSE-NEXT: andl $15, %eax
1234 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1235 ; X64-SSE-NEXT: pextrw $5, %xmm0, %eax
1236 ; X64-SSE-NEXT: andl $15, %eax
1237 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1238 ; X64-SSE-NEXT: pextrw $4, %xmm0, %eax
1239 ; X64-SSE-NEXT: andl $15, %eax
1240 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1241 ; X64-SSE-NEXT: pextrw $3, %xmm0, %eax
1242 ; X64-SSE-NEXT: andl $15, %eax
1243 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1244 ; X64-SSE-NEXT: pextrw $2, %xmm0, %eax
1245 ; X64-SSE-NEXT: andl $15, %eax
1246 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1247 ; X64-SSE-NEXT: pextrw $1, %xmm0, %eax
1248 ; X64-SSE-NEXT: andl $15, %eax
1249 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1250 ; X64-SSE-NEXT: movd %xmm0, %eax
1251 ; X64-SSE-NEXT: andl $15, %eax
1252 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1253 ; X64-SSE-NEXT: pextrw $7, %xmm1, %eax
1254 ; X64-SSE-NEXT: andl $15, %eax
1255 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1256 ; X64-SSE-NEXT: pextrw $6, %xmm1, %eax
1257 ; X64-SSE-NEXT: andl $15, %eax
1258 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1259 ; X64-SSE-NEXT: pextrw $5, %xmm1, %eax
1260 ; X64-SSE-NEXT: andl $15, %eax
1261 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1262 ; X64-SSE-NEXT: pextrw $4, %xmm1, %eax
1263 ; X64-SSE-NEXT: andl $15, %eax
1264 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1265 ; X64-SSE-NEXT: pextrw $3, %xmm1, %eax
1266 ; X64-SSE-NEXT: andl $15, %eax
1267 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1268 ; X64-SSE-NEXT: pextrw $2, %xmm1, %eax
1269 ; X64-SSE-NEXT: andl $15, %eax
1270 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1271 ; X64-SSE-NEXT: pextrw $1, %xmm1, %eax
1272 ; X64-SSE-NEXT: andl $15, %eax
1273 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1274 ; X64-SSE-NEXT: movd %xmm1, %eax
1275 ; X64-SSE-NEXT: andl $15, %eax
1276 ; X64-SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp)
1277 ; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
1278 ; X64-SSE-NEXT: orl -{{[0-9]+}}(%rsp), %eax
1279 ; X64-SSE-NEXT: movl %eax, %ecx
1280 ; X64-SSE-NEXT: shrl $4, %ecx
1281 ; X64-SSE-NEXT: andl $15, %ecx
1282 ; X64-SSE-NEXT: movl %eax, %edx
1283 ; X64-SSE-NEXT: andl $15, %edx
1284 ; X64-SSE-NEXT: movd %edx, %xmm0
1285 ; X64-SSE-NEXT: pinsrw $1, %ecx, %xmm0
1286 ; X64-SSE-NEXT: movl %eax, %ecx
1287 ; X64-SSE-NEXT: shrl $8, %ecx
1288 ; X64-SSE-NEXT: andl $15, %ecx
1289 ; X64-SSE-NEXT: pinsrw $2, %ecx, %xmm0
1290 ; X64-SSE-NEXT: movl %eax, %ecx
1291 ; X64-SSE-NEXT: shrl $12, %ecx
1292 ; X64-SSE-NEXT: andl $15, %ecx
1293 ; X64-SSE-NEXT: pinsrw $3, %ecx, %xmm0
1294 ; X64-SSE-NEXT: movl %eax, %ecx
1295 ; X64-SSE-NEXT: shrl $16, %ecx
1296 ; X64-SSE-NEXT: andl $15, %ecx
1297 ; X64-SSE-NEXT: pinsrw $4, %ecx, %xmm0
1298 ; X64-SSE-NEXT: movl %eax, %ecx
1299 ; X64-SSE-NEXT: shrl $20, %ecx
1300 ; X64-SSE-NEXT: andl $15, %ecx
1301 ; X64-SSE-NEXT: pinsrw $5, %ecx, %xmm0
1302 ; X64-SSE-NEXT: movl %eax, %ecx
1303 ; X64-SSE-NEXT: shrl $24, %ecx
1304 ; X64-SSE-NEXT: andl $15, %ecx
1305 ; X64-SSE-NEXT: pinsrw $6, %ecx, %xmm0
1306 ; X64-SSE-NEXT: shrl $28, %eax
1307 ; X64-SSE-NEXT: pinsrw $7, %eax, %xmm0
1308 ; X64-SSE-NEXT: retq
1309 %1 = bitcast <8 x i4> %a to i32
1310 %2 = bitcast <8 x i4> %b to i32
1311 %3 = or i32 %1, %2
1312 %4 = bitcast i32 %3 to <8 x i4>
1313 ret <8 x i4> %4
1314 }