llvm.org GIT mirror llvm / 27b6133
[DAG] add splat vector support for 'xor' in SimplifyDemandedBits This allows forming more 'not' ops, so we get improvements for ISAs that have and-not. Follow-up to: https://reviews.llvm.org/rL300725 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300763 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 2 years ago
7 changed file(s) with 38 addition(s) and 48 deletion(s). Raw diff Collapse all Expand all
50575057 return Tmp;
50585058
50595059 // Simplify the expression using non-local knowledge.
5060 if (!VT.isVector() &&
5061 SimplifyDemandedBits(SDValue(N, 0)))
5060 if (SimplifyDemandedBits(SDValue(N, 0)))
50625061 return SDValue(N, 0);
50635062
50645063 return SDValue();
714714 // If the RHS is a constant, see if we can simplify it.
715715 // for XOR, we prefer to force bits to 1 if they will make a -1.
716716 // If we can't force bits, try to shrink the constant.
717 if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) {
717 if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
718718 APInt Expanded = C->getAPIntValue() | (~NewMask);
719719 // If we can expand it to have all bits set, do it.
720720 if (Expanded.isAllOnesValue()) {
849849 ; CHECK-LABEL: hidden_not_v4i32:
850850 ; CHECK: @ BB#0:
851851 ; CHECK-NEXT: vmov d19, r2, r3
852 ; CHECK-NEXT: vmov.i32 q8, #0xf
852 ; CHECK-NEXT: vmov.i32 q8, #0x6
853853 ; CHECK-NEXT: vmov d18, r0, r1
854 ; CHECK-NEXT: vmov.i32 q10, #0x6
855 ; CHECK-NEXT: veor q8, q9, q8
856 ; CHECK-NEXT: vand q8, q8, q10
854 ; CHECK-NEXT: vbic q8, q8, q9
857855 ; CHECK-NEXT: vmov r0, r1, d16
858856 ; CHECK-NEXT: vmov r2, r3, d17
859857 ; CHECK-NEXT: bx lr
4242 define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) {
4343 ; CHECK-LABEL: hidden_not_v4i32:
4444 ; CHECK: # BB#0:
45 ; CHECK-NEXT: vspltisw 3, 15
46 ; CHECK-NEXT: vspltisw 4, 6
47 ; CHECK-NEXT: xxlxor 0, 34, 35
48 ; CHECK-NEXT: xxland 34, 0, 36
45 ; CHECK-NEXT: vspltisw 3, 6
46 ; CHECK-NEXT: xxlandc 34, 35, 34
4947 ; CHECK-NEXT: blr
5048 %xor = xor <4 x i32> %x,
5149 %and = and <4 x i32> %xor,
273273 define <4 x i32> @and_xor_splat1_v4i32(<4 x i32> %x) nounwind {
274274 ; AVX-LABEL: and_xor_splat1_v4i32:
275275 ; AVX: # BB#0:
276 ; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1]
277 ; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
278 ; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
276 ; AVX-NEXT: vandnps {{.*}}(%rip), %xmm0, %xmm0
279277 ; AVX-NEXT: retq
280278 ;
281279 ; AVX512-LABEL: and_xor_splat1_v4i32:
282280 ; AVX512: # BB#0:
283281 ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
284 ; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0
285 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
282 ; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0
286283 ; AVX512-NEXT: retq
287284 %xor = xor <4 x i32> %x,
288285 %and = and <4 x i32> %xor,
292289 define <4 x i64> @and_xor_splat1_v4i64(<4 x i64> %x) nounwind {
293290 ; AVX-LABEL: and_xor_splat1_v4i64:
294291 ; AVX: # BB#0:
295 ; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1]
296 ; AVX-NEXT: vxorps %ymm1, %ymm0, %ymm0
297 ; AVX-NEXT: vandps %ymm1, %ymm0, %ymm0
292 ; AVX-NEXT: vandnps {{.*}}(%rip), %ymm0, %ymm0
298293 ; AVX-NEXT: retq
299294 ;
300295 ; AVX512-LABEL: and_xor_splat1_v4i64:
301296 ; AVX512: # BB#0:
302297 ; AVX512-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1
303 ; AVX512-NEXT: vxorps %ymm1, %ymm0, %ymm0
304 ; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
298 ; AVX512-NEXT: vandnps %ymm1, %ymm0, %ymm0
305299 ; AVX512-NEXT: retq
306300 %xor = xor <4 x i64> %x,
307301 %and = and <4 x i64> %xor,
14291429 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
14301430 ; KNL-LABEL: store_v2i1:
14311431 ; KNL: ## BB#0:
1432 ; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
1432 ; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1433 ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
14331434 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
14341435 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
14351436 ; KNL-NEXT: kmovw %k0, %eax
14461447 ;
14471448 ; AVX512BW-LABEL: store_v2i1:
14481449 ; AVX512BW: ## BB#0:
1449 ; AVX512BW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
1450 ; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1451 ; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
14501452 ; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0
14511453 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
14521454 ; AVX512BW-NEXT: kmovd %k0, %eax
14561458 ;
14571459 ; AVX512DQ-LABEL: store_v2i1:
14581460 ; AVX512DQ: ## BB#0:
1459 ; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
1461 ; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1462 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0
14601463 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0
14611464 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
14621465 ; AVX512DQ-NEXT: kmovb %k0, (%rdi)
14701473 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
14711474 ; KNL-LABEL: store_v4i1:
14721475 ; KNL: ## BB#0:
1473 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
1476 ; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
14741477 ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
14751478 ; KNL-NEXT: vpslld $31, %ymm0, %ymm0
14761479 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
14881491 ;
14891492 ; AVX512BW-LABEL: store_v4i1:
14901493 ; AVX512BW: ## BB#0:
1491 ; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
1494 ; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
14921495 ; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
14931496 ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0
14941497 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
14991502 ;
15001503 ; AVX512DQ-LABEL: store_v4i1:
15011504 ; AVX512DQ: ## BB#0:
1502 ; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
1505 ; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
15031506 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0
15041507 ; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0
15051508 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
223223 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0]
224224 ; X64-SSE-NEXT: movdqa %xmm0, %xmm2
225225 ; X64-SSE-NEXT: pxor %xmm1, %xmm2
226 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551361,18446744073709551361]
227 ; X64-SSE-NEXT: movdqa %xmm1, %xmm4
228 ; X64-SSE-NEXT: pxor %xmm3, %xmm4
229 ; X64-SSE-NEXT: movdqa %xmm4, %xmm5
230 ; X64-SSE-NEXT: pcmpgtd %xmm2, %xmm5
231 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
232 ; X64-SSE-NEXT: pcmpeqd %xmm2, %xmm4
233 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
234 ; X64-SSE-NEXT: pand %xmm6, %xmm2
235 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
236 ; X64-SSE-NEXT: por %xmm2, %xmm4
237 ; X64-SSE-NEXT: movdqa %xmm4, %xmm2
226 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744071562067713,18446744071562067713]
227 ; X64-SSE-NEXT: movdqa %xmm3, %xmm4
228 ; X64-SSE-NEXT: pcmpgtd %xmm2, %xmm4
229 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
230 ; X64-SSE-NEXT: pcmpeqd %xmm3, %xmm2
231 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
232 ; X64-SSE-NEXT: pand %xmm5, %xmm2
233 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
234 ; X64-SSE-NEXT: por %xmm2, %xmm3
235 ; X64-SSE-NEXT: movdqa %xmm3, %xmm2
238236 ; X64-SSE-NEXT: pandn %xmm0, %xmm2
239 ; X64-SSE-NEXT: pand %xmm3, %xmm4
240 ; X64-SSE-NEXT: por %xmm2, %xmm4
241 ; X64-SSE-NEXT: movdqa %xmm4, %xmm0
237 ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm3
238 ; X64-SSE-NEXT: por %xmm2, %xmm3
239 ; X64-SSE-NEXT: movdqa %xmm3, %xmm0
242240 ; X64-SSE-NEXT: pxor %xmm1, %xmm0
243241 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255]
244 ; X64-SSE-NEXT: pxor %xmm2, %xmm1
245 ; X64-SSE-NEXT: movdqa %xmm0, %xmm3
246 ; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm3
247 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
242 ; X64-SSE-NEXT: por %xmm2, %xmm1
243 ; X64-SSE-NEXT: movdqa %xmm0, %xmm4
244 ; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm4
245 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
248246 ; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1
249247 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
250248 ; X64-SSE-NEXT: pand %xmm5, %xmm0
251 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
249 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
252250 ; X64-SSE-NEXT: por %xmm0, %xmm1
253251 ; X64-SSE-NEXT: movdqa %xmm1, %xmm0
254 ; X64-SSE-NEXT: pandn %xmm4, %xmm0
252 ; X64-SSE-NEXT: pandn %xmm3, %xmm0
255253 ; X64-SSE-NEXT: pand %xmm2, %xmm1
256254 ; X64-SSE-NEXT: por %xmm0, %xmm1
257255 ; X64-SSE-NEXT: movd %xmm1, %rax