llvm.org GIT mirror llvm / 736b292
[X86] Add some early DAG combines to turn v4i32 AND/OR/XOR into FAND/FOR/FXOR whe only SSE1 is available. v4i32 isn't a legal type with sse1 only and would end up getting scalarized otherwise. This isn't completely ideal as it doesn't handle cases like v8i32 that would get split to v4i32. But it at least helps with code written using the clang intrinsic header. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318967 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
4 changed file(s) with 109 addition(s) and 282 deletion(s). Raw diff Collapse all Expand all
3296632966 static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
3296732967 TargetLowering::DAGCombinerInfo &DCI,
3296832968 const X86Subtarget &Subtarget) {
32969 EVT VT = N->getValueType(0);
32970
32971 // If this is SSE1 only convert to FAND to avoid scalarization.
32972 if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
32973 return DAG.getBitcast(
32974 MVT::v4i32, DAG.getNode(X86ISD::FAND, SDLoc(N), MVT::v4f32,
32975 DAG.getBitcast(MVT::v4f32, N->getOperand(0)),
32976 DAG.getBitcast(MVT::v4f32, N->getOperand(1))));
32977 }
32978
3296932979 if (DCI.isBeforeLegalizeOps())
3297032980 return SDValue();
3297132981
3298032990
3298132991 if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
3298232992 return ShiftRight;
32983
32984 EVT VT = N->getValueType(0);
3298532993
3298632994 // Attempt to recursively combine a bitmask AND with shuffles.
3298732995 if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
3326533273 static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
3326633274 TargetLowering::DAGCombinerInfo &DCI,
3326733275 const X86Subtarget &Subtarget) {
33268 if (DCI.isBeforeLegalizeOps())
33269 return SDValue();
33270
33271 if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
33272 return R;
33273
33274 if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
33275 return FPLogic;
33276
33277 if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
33278 return R;
33279
3328033276 SDValue N0 = N->getOperand(0);
3328133277 SDValue N1 = N->getOperand(1);
3328233278 EVT VT = N->getValueType(0);
33279
33280 // If this is SSE1 only convert to FOR to avoid scalarization.
33281 if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
33282 return DAG.getBitcast(MVT::v4i32,
33283 DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32,
33284 DAG.getBitcast(MVT::v4f32, N0),
33285 DAG.getBitcast(MVT::v4f32, N1)));
33286 }
33287
33288 if (DCI.isBeforeLegalizeOps())
33289 return SDValue();
33290
33291 if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
33292 return R;
33293
33294 if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
33295 return FPLogic;
33296
33297 if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
33298 return R;
3328333299
3328433300 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
3328533301 return SDValue();
3495634972 static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
3495734973 TargetLowering::DAGCombinerInfo &DCI,
3495834974 const X86Subtarget &Subtarget) {
34975 // If this is SSE1 only convert to FXOR to avoid scalarization.
34976 if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() &&
34977 N->getValueType(0) == MVT::v4i32) {
34978 return DAG.getBitcast(
34979 MVT::v4i32, DAG.getNode(X86ISD::FXOR, SDLoc(N), MVT::v4f32,
34980 DAG.getBitcast(MVT::v4f32, N->getOperand(0)),
34981 DAG.getBitcast(MVT::v4f32, N->getOperand(1))));
34982 }
34983
3495934984 if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
3496034985 return Cmp;
3496134986
10661066 ; X32-SSE1: # BB#0:
10671067 ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
10681068 ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
1069 ; X32-SSE1-NEXT: movl (%ecx), %ecx
1070 ; X32-SSE1-NEXT: movl %ecx, (%eax)
1071 ; X32-SSE1-NEXT: movl $0, 12(%eax)
1072 ; X32-SSE1-NEXT: movl $0, 8(%eax)
1073 ; X32-SSE1-NEXT: movl $0, 4(%eax)
1069 ; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1070 ; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1071 ; X32-SSE1-NEXT: andps %xmm0, %xmm1
1072 ; X32-SSE1-NEXT: movaps %xmm1, (%eax)
10741073 ; X32-SSE1-NEXT: retl
10751074 ;
10761075 ; X32-SSE41-LABEL: merge_4i32_i32_combine:
3737 define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
3838 ; X32-LABEL: test_mm_and_ps:
3939 ; X32: # BB#0:
40 ; X32-NEXT: pushl %ebp
41 ; X32-NEXT: movl %esp, %ebp
42 ; X32-NEXT: pushl %esi
43 ; X32-NEXT: andl $-16, %esp
44 ; X32-NEXT: subl $64, %esp
45 ; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
46 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
47 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
48 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
49 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
50 ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
51 ; X32-NEXT: andl {{[0-9]+}}(%esp), %esi
52 ; X32-NEXT: movl %esi, (%esp)
53 ; X32-NEXT: andl {{[0-9]+}}(%esp), %edx
54 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
55 ; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
56 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
57 ; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
58 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
59 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
60 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
61 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
62 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
63 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
64 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
65 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
66 ; X32-NEXT: leal -4(%ebp), %esp
67 ; X32-NEXT: popl %esi
68 ; X32-NEXT: popl %ebp
40 ; X32-NEXT: andps %xmm1, %xmm0
6941 ; X32-NEXT: retl
7042 ;
7143 ; X64-LABEL: test_mm_and_ps:
7244 ; X64: # BB#0:
73 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
74 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
75 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8
76 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
77 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
78 ; X64-NEXT: movq %rdx, %rsi
79 ; X64-NEXT: andl %eax, %edx
80 ; X64-NEXT: shrq $32, %rax
81 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
82 ; X64-NEXT: movq %rcx, %rdi
83 ; X64-NEXT: andl %r8d, %ecx
84 ; X64-NEXT: shrq $32, %r8
85 ; X64-NEXT: shrq $32, %rsi
86 ; X64-NEXT: shrq $32, %rdi
87 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
88 ; X64-NEXT: andl %r8d, %edi
89 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
90 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
91 ; X64-NEXT: andl %eax, %esi
92 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
93 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
94 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
95 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
96 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
97 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
98 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
99 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
45 ; X64-NEXT: andps %xmm1, %xmm0
10046 ; X64-NEXT: retq
10147 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
10248 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
10854 define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
10955 ; X32-LABEL: test_mm_andnot_ps:
11056 ; X32: # BB#0:
111 ; X32-NEXT: pushl %ebp
112 ; X32-NEXT: movl %esp, %ebp
113 ; X32-NEXT: pushl %esi
114 ; X32-NEXT: andl $-16, %esp
115 ; X32-NEXT: subl $64, %esp
116 ; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
117 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
118 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
119 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
120 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
121 ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
122 ; X32-NEXT: notl %edx
123 ; X32-NEXT: notl %esi
124 ; X32-NEXT: notl %ecx
125 ; X32-NEXT: notl %eax
126 ; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
127 ; X32-NEXT: movl %eax, (%esp)
128 ; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
129 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
130 ; X32-NEXT: andl {{[0-9]+}}(%esp), %esi
131 ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
132 ; X32-NEXT: andl {{[0-9]+}}(%esp), %edx
133 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
134 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
135 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
136 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
137 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
138 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
139 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
140 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
141 ; X32-NEXT: leal -4(%ebp), %esp
142 ; X32-NEXT: popl %esi
143 ; X32-NEXT: popl %ebp
57 ; X32-NEXT: xorps {{\.LCPI.*}}, %xmm0
58 ; X32-NEXT: andps %xmm1, %xmm0
14459 ; X32-NEXT: retl
14560 ;
14661 ; X64-LABEL: test_mm_andnot_ps:
14762 ; X64: # BB#0:
148 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
149 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
150 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
151 ; X64-NEXT: movq %rcx, %rdx
152 ; X64-NEXT: shrq $32, %rdx
153 ; X64-NEXT: movq %rax, %rsi
154 ; X64-NEXT: shrq $32, %rsi
155 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
156 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi
157 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8
158 ; X64-NEXT: notl %eax
159 ; X64-NEXT: andl %edi, %eax
160 ; X64-NEXT: shrq $32, %rdi
161 ; X64-NEXT: notl %ecx
162 ; X64-NEXT: andl %r8d, %ecx
163 ; X64-NEXT: shrq $32, %r8
164 ; X64-NEXT: notl %esi
165 ; X64-NEXT: notl %edx
166 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
167 ; X64-NEXT: andl %r8d, %edx
168 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
169 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
170 ; X64-NEXT: andl %edi, %esi
171 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
172 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
173 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
174 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
175 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
176 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
177 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
178 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
63 ; X64-NEXT: xorps {{.*}}(%rip), %xmm0
64 ; X64-NEXT: andps %xmm1, %xmm0
17965 ; X64-NEXT: retq
18066 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
18167 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
12611147 define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
12621148 ; X32-LABEL: test_mm_or_ps:
12631149 ; X32: # BB#0:
1264 ; X32-NEXT: pushl %ebp
1265 ; X32-NEXT: movl %esp, %ebp
1266 ; X32-NEXT: pushl %esi
1267 ; X32-NEXT: andl $-16, %esp
1268 ; X32-NEXT: subl $64, %esp
1269 ; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
1270 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1271 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
1272 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
1273 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
1274 ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1275 ; X32-NEXT: orl {{[0-9]+}}(%esp), %esi
1276 ; X32-NEXT: movl %esi, (%esp)
1277 ; X32-NEXT: orl {{[0-9]+}}(%esp), %edx
1278 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
1279 ; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx
1280 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1281 ; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
1282 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
1283 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1284 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1285 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1286 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1287 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1288 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1289 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1290 ; X32-NEXT: leal -4(%ebp), %esp
1291 ; X32-NEXT: popl %esi
1292 ; X32-NEXT: popl %ebp
1150 ; X32-NEXT: orps %xmm1, %xmm0
12931151 ; X32-NEXT: retl
12941152 ;
12951153 ; X64-LABEL: test_mm_or_ps:
12961154 ; X64: # BB#0:
1297 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1298 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
1299 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8
1300 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
1301 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
1302 ; X64-NEXT: movq %rdx, %rsi
1303 ; X64-NEXT: orl %eax, %edx
1304 ; X64-NEXT: shrq $32, %rax
1305 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
1306 ; X64-NEXT: movq %rcx, %rdi
1307 ; X64-NEXT: orl %r8d, %ecx
1308 ; X64-NEXT: shrq $32, %r8
1309 ; X64-NEXT: shrq $32, %rsi
1310 ; X64-NEXT: shrq $32, %rdi
1311 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
1312 ; X64-NEXT: orl %r8d, %edi
1313 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
1314 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
1315 ; X64-NEXT: orl %eax, %esi
1316 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
1317 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1318 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1319 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1320 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1321 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
1322 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1323 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1155 ; X64-NEXT: orps %xmm1, %xmm0
13241156 ; X64-NEXT: retq
13251157 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
13261158 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
22232055 define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
22242056 ; X32-LABEL: test_mm_xor_ps:
22252057 ; X32: # BB#0:
2226 ; X32-NEXT: pushl %ebp
2227 ; X32-NEXT: movl %esp, %ebp
2228 ; X32-NEXT: pushl %esi
2229 ; X32-NEXT: andl $-16, %esp
2230 ; X32-NEXT: subl $64, %esp
2231 ; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
2232 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
2233 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
2234 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
2235 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
2236 ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
2237 ; X32-NEXT: xorl {{[0-9]+}}(%esp), %esi
2238 ; X32-NEXT: movl %esi, (%esp)
2239 ; X32-NEXT: xorl {{[0-9]+}}(%esp), %edx
2240 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
2241 ; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
2242 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2243 ; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax
2244 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
2245 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2246 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2247 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2248 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2249 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2250 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2251 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2252 ; X32-NEXT: leal -4(%ebp), %esp
2253 ; X32-NEXT: popl %esi
2254 ; X32-NEXT: popl %ebp
2058 ; X32-NEXT: xorps %xmm1, %xmm0
22552059 ; X32-NEXT: retl
22562060 ;
22572061 ; X64-LABEL: test_mm_xor_ps:
22582062 ; X64: # BB#0:
2259 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
2260 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
2261 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8
2262 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
2263 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
2264 ; X64-NEXT: movq %rdx, %rsi
2265 ; X64-NEXT: xorl %eax, %edx
2266 ; X64-NEXT: shrq $32, %rax
2267 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
2268 ; X64-NEXT: movq %rcx, %rdi
2269 ; X64-NEXT: xorl %r8d, %ecx
2270 ; X64-NEXT: shrq $32, %r8
2271 ; X64-NEXT: shrq $32, %rsi
2272 ; X64-NEXT: shrq $32, %rdi
2273 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
2274 ; X64-NEXT: xorl %r8d, %edi
2275 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
2276 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
2277 ; X64-NEXT: xorl %eax, %esi
2278 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
2279 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
2280 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2281 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2282 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2283 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
2284 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2285 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2063 ; X64-NEXT: xorps %xmm1, %xmm0
22862064 ; X64-NEXT: retq
22872065 %arg0 = bitcast <4 x float> %a0 to <4 x i32>
22882066 %arg1 = bitcast <4 x float> %a1 to <4 x i32>
157157 define <4 x i32> @PR30512(<4 x i32> %x, <4 x i32> %y) nounwind {
158158 ; X32-LABEL: PR30512:
159159 ; X32: # BB#0:
160 ; X32-NEXT: pushl %ebp
161160 ; X32-NEXT: pushl %ebx
162161 ; X32-NEXT: pushl %edi
163162 ; X32-NEXT: pushl %esi
164 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
163 ; X32-NEXT: subl $16, %esp
164 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
165 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
166 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
165167 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
166168 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
167 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
168 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
169 ; X32-NEXT: xorl %ecx, %ecx
170 ; X32-NEXT: cmpl {{[0-9]+}}(%esp), %edx
171 ; X32-NEXT: sete %cl
172 ; X32-NEXT: xorl %edx, %edx
173 ; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ebx
174 ; X32-NEXT: sete %dl
175169 ; X32-NEXT: xorl %ebx, %ebx
176170 ; X32-NEXT: cmpl {{[0-9]+}}(%esp), %edi
177171 ; X32-NEXT: sete %bl
178 ; X32-NEXT: xorl %eax, %eax
172 ; X32-NEXT: negl %ebx
173 ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp)
174 ; X32-NEXT: xorl %ebx, %ebx
179175 ; X32-NEXT: cmpl {{[0-9]+}}(%esp), %esi
180 ; X32-NEXT: sete %al
181 ; X32-NEXT: movl %eax, 12(%ebp)
182 ; X32-NEXT: movl %ebx, 8(%ebp)
183 ; X32-NEXT: movl %edx, 4(%ebp)
184 ; X32-NEXT: movl %ecx, (%ebp)
185 ; X32-NEXT: movl %ebp, %eax
176 ; X32-NEXT: sete %bl
177 ; X32-NEXT: negl %ebx
178 ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp)
179 ; X32-NEXT: xorl %ebx, %ebx
180 ; X32-NEXT: cmpl {{[0-9]+}}(%esp), %edx
181 ; X32-NEXT: sete %bl
182 ; X32-NEXT: negl %ebx
183 ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp)
184 ; X32-NEXT: xorl %edx, %edx
185 ; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
186 ; X32-NEXT: sete %dl
187 ; X32-NEXT: negl %edx
188 ; X32-NEXT: movl %edx, (%esp)
189 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
190 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
191 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
192 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
193 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
194 ; X32-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
195 ; X32-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
196 ; X32-NEXT: andps {{\.LCPI.*}}, %xmm2
197 ; X32-NEXT: movaps %xmm2, (%eax)
198 ; X32-NEXT: addl $16, %esp
186199 ; X32-NEXT: popl %esi
187200 ; X32-NEXT: popl %edi
188201 ; X32-NEXT: popl %ebx
189 ; X32-NEXT: popl %ebp
190202 ; X32-NEXT: retl $4
191203 ;
192204 ; X64-LABEL: PR30512:
193205 ; X64: # BB#0:
194206 ; X64-NEXT: xorl %eax, %eax
207 ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %r8d
208 ; X64-NEXT: sete %al
209 ; X64-NEXT: negl %eax
210 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
211 ; X64-NEXT: xorl %eax, %eax
212 ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %ecx
213 ; X64-NEXT: sete %al
214 ; X64-NEXT: negl %eax
215 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
216 ; X64-NEXT: xorl %eax, %eax
217 ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %edx
218 ; X64-NEXT: sete %al
219 ; X64-NEXT: negl %eax
220 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
221 ; X64-NEXT: xorl %eax, %eax
195222 ; X64-NEXT: cmpl %r9d, %esi
196223 ; X64-NEXT: sete %al
197 ; X64-NEXT: xorl %esi, %esi
198 ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %edx
199 ; X64-NEXT: sete %sil
200 ; X64-NEXT: xorl %edx, %edx
201 ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %ecx
202 ; X64-NEXT: sete %dl
203 ; X64-NEXT: xorl %ecx, %ecx
204 ; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %r8d
205 ; X64-NEXT: sete %cl
206 ; X64-NEXT: movl %ecx, 12(%rdi)
207 ; X64-NEXT: movl %edx, 8(%rdi)
208 ; X64-NEXT: movl %esi, 4(%rdi)
209 ; X64-NEXT: movl %eax, (%rdi)
224 ; X64-NEXT: negl %eax
225 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
226 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
227 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
228 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
229 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
230 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
231 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
232 ; X64-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0]
233 ; X64-NEXT: andps {{.*}}(%rip), %xmm2
234 ; X64-NEXT: movaps %xmm2, (%rdi)
210235 ; X64-NEXT: movq %rdi, %rax
211236 ; X64-NEXT: retq
212237 %cmp = icmp eq <4 x i32> %x, %y