llvm.org GIT mirror llvm / c7bf2b1
[X86] Teach combineExtSetcc to handle ZERO_EXTEND by widening the setcc and then masking. A later DAG combine will convert to a shift. This helps to avoid a constant pool load needed to zero extend from the mask. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@324804 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
4 changed file(s) with 47 addition(s) and 79 deletion(s). Raw diff Collapse all Expand all
3621736217 EVT VT = N->getValueType(0);
3621836218 SDLoc dl(N);
3621936219
36220 // Only handle sext/aext for now.
36221 if (N->getOpcode() != ISD::SIGN_EXTEND && N->getOpcode() != ISD::ANY_EXTEND)
36222 return SDValue();
36223
3622436220 // Only do this combine with AVX512 for vector extends.
3622536221 if (!Subtarget.hasAVX512() || !VT.isVector() || N0->getOpcode() != ISD::SETCC)
3622636222 return SDValue();
3624836244 if (Size != MatchingVecType.getSizeInBits())
3624936245 return SDValue();
3625036246
36251 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
36247 SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
36248
36249 if (N->getOpcode() == ISD::ZERO_EXTEND)
36250 Res = DAG.getZeroExtendInReg(Res, dl, N0.getValueType().getScalarType());
36251
36252 return Res;
3625236253 }
3625336254
3625436255 static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
16931693 }
16941694
16951695 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
1696 ; KNL-LABEL: zext_16xi1_to_16xi16:
1697 ; KNL: # %bb.0:
1698 ; KNL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1699 ; KNL-NEXT: vpsrlw $15, %ymm0, %ymm0
1700 ; KNL-NEXT: retq
1701 ;
1702 ; SKX-LABEL: zext_16xi1_to_16xi16:
1703 ; SKX: # %bb.0:
1704 ; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
1705 ; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
1706 ; SKX-NEXT: retq
1696 ; ALL-LABEL: zext_16xi1_to_16xi16:
1697 ; ALL: # %bb.0:
1698 ; ALL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1699 ; ALL-NEXT: vpsrlw $15, %ymm0, %ymm0
1700 ; ALL-NEXT: retq
17071701 %mask = icmp eq <16 x i16> %x, %y
17081702 %1 = zext <16 x i1> %mask to <16 x i16>
17091703 ret <16 x i16> %1
17341728 }
17351729
17361730 define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
1737 ; KNL-LABEL: zext_4xi1_to_4x32:
1738 ; KNL: # %bb.0:
1739 ; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
1740 ; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1
1741 ; KNL-NEXT: vpand %xmm2, %xmm0, %xmm0
1742 ; KNL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1743 ; KNL-NEXT: vpsrld $31, %xmm0, %xmm0
1744 ; KNL-NEXT: retq
1745 ;
1746 ; SKX-LABEL: zext_4xi1_to_4x32:
1747 ; SKX: # %bb.0:
1748 ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
1749 ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1
1750 ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0
1751 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
1752 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1753 ; SKX-NEXT: retq
1731 ; ALL-LABEL: zext_4xi1_to_4x32:
1732 ; ALL: # %bb.0:
1733 ; ALL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
1734 ; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
1735 ; ALL-NEXT: vpand %xmm2, %xmm0, %xmm0
1736 ; ALL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1737 ; ALL-NEXT: vpsrld $31, %xmm0, %xmm0
1738 ; ALL-NEXT: retq
17541739 %mask = icmp eq <4 x i8> %x, %y
17551740 %1 = zext <4 x i1> %mask to <4 x i32>
17561741 ret <4 x i32> %1
17571742 }
17581743
17591744 define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
1760 ; KNL-LABEL: zext_2xi1_to_2xi64:
1761 ; KNL: # %bb.0:
1762 ; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
1763 ; KNL-NEXT: vpand %xmm2, %xmm1, %xmm1
1764 ; KNL-NEXT: vpand %xmm2, %xmm0, %xmm0
1765 ; KNL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
1766 ; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0
1767 ; KNL-NEXT: retq
1768 ;
1769 ; SKX-LABEL: zext_2xi1_to_2xi64:
1770 ; SKX: # %bb.0:
1771 ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
1772 ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1
1773 ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0
1774 ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
1775 ; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
1776 ; SKX-NEXT: retq
1745 ; ALL-LABEL: zext_2xi1_to_2xi64:
1746 ; ALL: # %bb.0:
1747 ; ALL-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
1748 ; ALL-NEXT: vpand %xmm2, %xmm1, %xmm1
1749 ; ALL-NEXT: vpand %xmm2, %xmm0, %xmm0
1750 ; ALL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
1751 ; ALL-NEXT: vpsrlq $63, %xmm0, %xmm0
1752 ; ALL-NEXT: retq
17771753 %mask = icmp eq <2 x i8> %x, %y
17781754 %1 = zext <2 x i1> %mask to <2 x i64>
17791755 ret <2 x i64> %1
46694669 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
46704670 ; GENERIC-LABEL: zext_16xi1_to_16xi16:
46714671 ; GENERIC: # %bb.0:
4672 ; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
4673 ; GENERIC-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [4:0.50]
4672 ; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4673 ; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00]
46744674 ; GENERIC-NEXT: retq # sched: [1:1.00]
46754675 ;
46764676 ; SKX-LABEL: zext_16xi1_to_16xi16:
46774677 ; SKX: # %bb.0:
4678 ; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
4679 ; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
4678 ; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4679 ; SKX-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:0.50]
46804680 ; SKX-NEXT: retq # sched: [7:1.00]
46814681 %mask = icmp eq <16 x i16> %x, %y
46824682 %1 = zext <16 x i1> %mask to <16 x i16>
47074707 ; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50]
47084708 ; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
47094709 ; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4710 ; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00]
4711 ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
4710 ; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4711 ; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
47124712 ; GENERIC-NEXT: retq # sched: [1:1.00]
47134713 ;
47144714 ; SKX-LABEL: zext_4xi1_to_4x32:
47164716 ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50]
47174717 ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
47184718 ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4719 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00]
4720 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
4719 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4720 ; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
47214721 ; SKX-NEXT: retq # sched: [7:1.00]
47224722 %mask = icmp eq <4 x i8> %x, %y
47234723 %1 = zext <4 x i1> %mask to <4 x i32>
47304730 ; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50]
47314731 ; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
47324732 ; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4733 ; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 # sched: [3:1.00]
4734 ; GENERIC-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [4:0.50]
4733 ; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4734 ; GENERIC-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:1.00]
47354735 ; GENERIC-NEXT: retq # sched: [1:1.00]
47364736 ;
47374737 ; SKX-LABEL: zext_2xi1_to_2xi64:
47394739 ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50]
47404740 ; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
47414741 ; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
4742 ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 # sched: [3:1.00]
4743 ; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
4742 ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
4743 ; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:0.50]
47444744 ; SKX-NEXT: retq # sched: [7:1.00]
47454745 %mask = icmp eq <2 x i8> %x, %y
47464746 %1 = zext <2 x i1> %mask to <2 x i64>
882882 }
883883
884884 define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
885 ; AVX512-LABEL: test45:
886 ; AVX512: ## %bb.0:
887 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
888 ; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
889 ; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
890 ; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
891 ; AVX512-NEXT: vpsrlq $63, %xmm0, %xmm0
892 ; AVX512-NEXT: retq
893 ;
894 ; SKX-LABEL: test45:
895 ; SKX: ## %bb.0:
896 ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
897 ; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
898 ; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
899 ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
900 ; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
901 ; SKX-NEXT: retq
885 ; CHECK-LABEL: test45:
886 ; CHECK: ## %bb.0:
887 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
888 ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
889 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
890 ; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
891 ; CHECK-NEXT: vpsrlq $63, %xmm0, %xmm0
892 ; CHECK-NEXT: retq
902893 %mask = icmp eq <2 x i16> %x, %y
903894 %1 = zext <2 x i1> %mask to <2 x i64>
904895 ret <2 x i64> %1