llvm.org GIT mirror llvm / 2253059
[TargetLowering] Add ISD::AND handling to SimplifyDemandedVectorElts If either of the operand elements are zero then we know the result element is going to be zero (even if the other element is undef). Differential Revision: https://reviews.llvm.org/D55558 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@348926 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 1 year, 2 months ago
7 changed file(s) with 57 addition(s) and 46 deletion(s). Raw diff Collapse all Expand all
17861786 KnownUndef &= SrcUndef;
17871787 break;
17881788 }
1789 case ISD::AND: {
1790 APInt SrcUndef, SrcZero;
1791 if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
1792 SrcZero, TLO, Depth + 1))
1793 return true;
1794 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
1795 KnownZero, TLO, Depth + 1))
1796 return true;
1797
1798 // If either side has a zero element, then the result element is zero, even
1799 // if the other is an UNDEF.
1800 KnownZero |= SrcZero;
1801 KnownUndef &= SrcUndef;
1802 KnownUndef &= ~KnownZero;
1803 break;
1804 }
17891805 case ISD::TRUNCATE:
17901806 case ISD::SIGN_EXTEND:
17911807 case ISD::ZERO_EXTEND:
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
12 ;
23 ; Check that a widening truncate to a vector of i1 elements can be handled.
45 define void @pr32275(<4 x i8> %B15) {
56 ; CHECK-LABEL: pr32275:
67 ; CHECK: # %bb.0: # %BB
7 ; CHECK: vlgvb %r0, %v24, 3
8 ; CHECK-NEXT: vlgvb %r1, %v24, 1
9 ; CHECK-NEXT: vlvgp [[REG1:%v[0-9]]], %r1, %r0
10 ; CHECK-NEXT: vlgvb %r0, %v24, 0
11 ; CHECK-NEXT: vlgvb [[REG3:%r[0-9]]], %v24, 2
12 ; CHECK-NEXT: vrepif [[REG0:%v[0-9]]], 1
13 ; CHECK: .LBB0_1:
14 ; CHECK-DAG: vlr [[REG2:%v[0-9]]], [[REG1]]
15 ; CHECK-DAG: vlvgf [[REG2]], %r0, 0
16 ; CHECK-NEXT: vlvgf [[REG2]], [[REG3]], 2
17 ; CHECK-NEXT: vn [[REG2]], [[REG2]], [[REG0]]
18 ; CHECK-NEXT: vlgvf [[REG4:%r[0-9]]], [[REG2]], 3
19 ; CHECK-NEXT: cijlh [[REG4]], 0, .LBB0_1
8 ; CHECK-NEXT: vlgvb %r0, %v24, 3
9 ; CHECK-NEXT: vlvgp %v0, %r0, %r0
10 ; CHECK-NEXT: vrepif %v1, 1
11 ; CHECK-NEXT: vn %v0, %v0, %v1
12 ; CHECK-NEXT: vlgvf %r0, %v0, 3
13 ; CHECK-NEXT: .LBB0_1: # %CF34
14 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
15 ; CHECK-NEXT: cijlh %r0, 0, .LBB0_1
2016 ; CHECK-NEXT: # %bb.2: # %CF36
2117 ; CHECK-NEXT: br %r14
2218 BB:
2525 ; CHECK-LABEL: mag_neg0_double:
2626 ; CHECK: ## %bb.0:
2727 ; CHECK-NEXT: movsd [[SIGNMASK2]](%rip), %xmm1
28 ; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
2928 ; CHECK-NEXT: andps %xmm1, %xmm0
3029 ; CHECK-NEXT: retq
3130 ;
9190 ; CHECK-LABEL: mag_neg0_float:
9291 ; CHECK: ## %bb.0:
9392 ; CHECK-NEXT: movss [[SIGNMASK6]](%rip), %xmm1
94 ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
9593 ; CHECK-NEXT: andps %xmm1, %xmm0
9694 ; CHECK-NEXT: retq
9795 ;
44 define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
55 ; X32-LABEL: knownbits_mask_extract_sext:
66 ; X32: # %bb.0:
7 ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
7 ; X32-NEXT: movl $15, %eax
8 ; X32-NEXT: vmovd %eax, %xmm1
9 ; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
810 ; X32-NEXT: vpextrw $0, %xmm0, %eax
911 ; X32-NEXT: retl
1012 ;
1113 ; X64-LABEL: knownbits_mask_extract_sext:
1214 ; X64: # %bb.0:
13 ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
15 ; X64-NEXT: movl $15, %eax
16 ; X64-NEXT: vmovd %eax, %xmm1
17 ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
1418 ; X64-NEXT: vpextrw $0, %xmm0, %eax
1519 ; X64-NEXT: retq
1620 %1 = and <8 x i16> %a0,
252252 ; X32-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
253253 ; X32: # %bb.0:
254254 ; X32-NEXT: pushl %eax
255 ; X32-NEXT: vpsrlq $60, %xmm0, %xmm2
255 ; X32-NEXT: vpsrlq $60, %xmm0, %xmm1
256256 ; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
257 ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
258 ; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,0,0,8,0,0,0]
259 ; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0
260 ; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0
261 ; X32-NEXT: vpinsrd $0, {{[0-9]+}}(%esp), %xmm1, %xmm1
257 ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
258 ; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [4,0,0,0,8,0,0,0]
259 ; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0
260 ; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0
261 ; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
262262 ; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
263263 ; X32-NEXT: vmovd %xmm0, %eax
264 ; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
264 ; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
265265 ; X32-NEXT: vmovss %xmm0, (%esp)
266266 ; X32-NEXT: flds (%esp)
267267 ; X32-NEXT: popl %eax
269269 ;
270270 ; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
271271 ; X64: # %bb.0:
272 ; X64-NEXT: vpsrlq $60, %xmm0, %xmm2
272 ; X64-NEXT: vpsrlq $60, %xmm0, %xmm1
273273 ; X64-NEXT: vpsrlq $61, %xmm0, %xmm0
274 ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
275 ; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8]
276 ; X64-NEXT: vpxor %xmm2, %xmm0, %xmm0
277 ; X64-NEXT: vpsubq %xmm2, %xmm0, %xmm0
274 ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
275 ; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [4,8]
276 ; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
277 ; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0
278278 ; X64-NEXT: movslq %edi, %rax
279 ; X64-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
279 ; X64-NEXT: vmovq %rax, %xmm1
280280 ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
281281 ; X64-NEXT: vmovq %xmm0, %rax
282 ; X64-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
282 ; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
283283 ; X64-NEXT: retq
284284 %1 = ashr <2 x i64> %a0,
285285 %2 = sext i32 %a2 to i64
860860 ; X32-SSE: # %bb.0:
861861 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
862862 ; X32-SSE-NEXT: pand %xmm2, %xmm0
863 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
864 ; X32-SSE-NEXT: pand %xmm2, %xmm3
863 ; X32-SSE-NEXT: pand %xmm1, %xmm2
864 ; X32-SSE-NEXT: movdqa %xmm0, %xmm3
865 ; X32-SSE-NEXT: psrlq %xmm2, %xmm3
865866 ; X32-SSE-NEXT: pxor %xmm2, %xmm2
866867 ; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
867 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1
868 ; X32-SSE-NEXT: psrlq %xmm2, %xmm1
869 ; X32-SSE-NEXT: psrlq %xmm3, %xmm0
870 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
871 ; X32-SSE-NEXT: movapd %xmm1, %xmm0
868 ; X32-SSE-NEXT: psrlq %xmm2, %xmm0
869 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
872870 ; X32-SSE-NEXT: retl
873871 %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
874872 %shift = lshr <2 x i32> %a, %splat
638638 ;
639639 ; X32-SSE-LABEL: splatvar_shift_v2i32:
640640 ; X32-SSE: # %bb.0:
641 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
642 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2
643 ; X32-SSE-NEXT: xorps %xmm3, %xmm3
644 ; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
645 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1
646 ; X32-SSE-NEXT: psllq %xmm3, %xmm1
641 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
642 ; X32-SSE-NEXT: pand %xmm1, %xmm2
643 ; X32-SSE-NEXT: movdqa %xmm0, %xmm3
644 ; X32-SSE-NEXT: psllq %xmm2, %xmm3
645 ; X32-SSE-NEXT: pxor %xmm2, %xmm2
646 ; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
647647 ; X32-SSE-NEXT: psllq %xmm2, %xmm0
648 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
649 ; X32-SSE-NEXT: movapd %xmm1, %xmm0
648 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
650649 ; X32-SSE-NEXT: retl
651650 %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
652651 %shift = shl <2 x i32> %a, %splat