llvm.org GIT mirror llvm / 2bc87a6
[DAGCombiner] Generalize masking of constant rotates. We don't need a mask of a rotation result to be a constant splat - any constant scalar/vector can be usefully folded. Followup to D13851. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251197 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
3 changed file(s) with 43 addition(s) and 64 deletion(s). Raw diff Collapse all Expand all
37953795 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
37963796 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
37973797 if (Op.getOpcode() == ISD::AND) {
3798 if (isConstOrConstSplat(Op.getOperand(1))) {
3798 if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
37993799 Mask = Op.getOperand(1);
38003800 Op = Op.getOperand(0);
38013801 } else {
39963996
39973997 // If there is an AND of either shifted operand, apply it to the result.
39983998 if (LHSMask.getNode() || RHSMask.getNode()) {
3999 APInt Mask = APInt::getAllOnesValue(EltSizeInBits);
3999 APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
4000 SDValue Mask = DAG.getConstant(AllBits, DL, VT);
40004001
40014002 if (LHSMask.getNode()) {
40024003 APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
4003 Mask &= isConstOrConstSplat(LHSMask)->getAPIntValue() | RHSBits;
4004 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4005 DAG.getNode(ISD::OR, DL, VT, LHSMask,
4006 DAG.getConstant(RHSBits, DL, VT)));
40044007 }
40054008 if (RHSMask.getNode()) {
40064009 APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
4007 Mask &= isConstOrConstSplat(RHSMask)->getAPIntValue() | LHSBits;
4010 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4011 DAG.getNode(ISD::OR, DL, VT, RHSMask,
4012 DAG.getConstant(LHSBits, DL, VT)));
40084013 }
40094014
4010 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
4015 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
40114016 }
40124017
40134018 return Rot.getNode();
14451445 ; X32-SSE-NEXT: retl
14461446 %shl = shl <2 x i64> %a,
14471447 %lshr = lshr <2 x i64> %a,
1448 %rmask = and <2 x i64> %lshr,
1449 %lmask = and <2 x i64> %shl, >
1448 %rmask = and <2 x i64> %lshr, >
1449 %lmask = and <2 x i64> %shl,
14501450 %or = or <2 x i64> %lmask, %rmask
14511451 ret <2 x i64> %or
14521452 }
14631463 ; SSE-NEXT: movdqa %xmm1, %xmm0
14641464 ; SSE-NEXT: retq
14651465 ;
1466 ; AVX1-LABEL: splatconstant_rotate_mask_v4i32:
1467 ; AVX1: # BB#0:
1468 ; AVX1-NEXT: vpslld $4, %xmm0, %xmm1
1469 ; AVX1-NEXT: vpsrld $28, %xmm0, %xmm0
1470 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1471 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
1472 ; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
1473 ; AVX1-NEXT: retq
1474 ;
1475 ; AVX2-LABEL: splatconstant_rotate_mask_v4i32:
1476 ; AVX2: # BB#0:
1477 ; AVX2-NEXT: vpslld $4, %xmm0, %xmm1
1478 ; AVX2-NEXT: vpsrld $28, %xmm0, %xmm0
1479 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
1480 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
1481 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
1482 ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
1483 ; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
1484 ; AVX2-NEXT: retq
1485 ;
1486 ; XOPAVX1-LABEL: splatconstant_rotate_mask_v4i32:
1487 ; XOPAVX1: # BB#0:
1488 ; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm0
1489 ; XOPAVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1490 ; XOPAVX1-NEXT: retq
1491 ;
1492 ; XOPAVX2-LABEL: splatconstant_rotate_mask_v4i32:
1493 ; XOPAVX2: # BB#0:
1494 ; XOPAVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
1495 ; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm0
1496 ; XOPAVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1497 ; XOPAVX2-NEXT: retq
1466 ; AVX-LABEL: splatconstant_rotate_mask_v4i32:
1467 ; AVX: # BB#0:
1468 ; AVX-NEXT: vpslld $4, %xmm0, %xmm1
1469 ; AVX-NEXT: vpsrld $28, %xmm0, %xmm0
1470 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1471 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
1472 ; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
1473 ; AVX-NEXT: retq
1474 ;
1475 ; XOP-LABEL: splatconstant_rotate_mask_v4i32:
1476 ; XOP: # BB#0:
1477 ; XOP-NEXT: vprotd $4, %xmm0, %xmm0
1478 ; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1479 ; XOP-NEXT: retq
14981480 ;
14991481 ; X32-SSE-LABEL: splatconstant_rotate_mask_v4i32:
15001482 ; X32-SSE: # BB#0:
15081490 ; X32-SSE-NEXT: retl
15091491 %shl = shl <4 x i32> %a,
15101492 %lshr = lshr <4 x i32> %a,
1511 %rmask = and <4 x i32> %lshr,
1512 %lmask = and <4 x i32> %shl, 3>
1493 %rmask = and <4 x i32> %lshr, 3>
1494 %lmask = and <4 x i32> %shl,
15131495 %or = or <4 x i32> %lmask, %rmask
15141496 ret <4 x i32> %or
15151497 }
891891 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
892892 ; AVX1-NEXT: vpsrlq $49, %xmm0, %xmm0
893893 ; AVX1-NEXT: vpsrlq $49, %xmm2, %xmm2
894 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
895 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
896 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
897894 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
895 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
898896 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
899897 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
900898 ; AVX1-NEXT: retq
904902 ; AVX2-NEXT: vpsllq $15, %ymm0, %ymm1
905903 ; AVX2-NEXT: vpsrlq $49, %ymm0, %ymm0
906904 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
907 ; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
908 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
905 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
909906 ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
910907 ; AVX2-NEXT: retq
911908 ;
912909 ; XOPAVX1-LABEL: splatconstant_rotate_mask_v4i64:
913910 ; XOPAVX1: # BB#0:
914 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
915 ; XOPAVX1-NEXT: vprotq $15, %xmm1, %xmm1
916 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
917 ; XOPAVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
911 ; XOPAVX1-NEXT: vprotq $15, %xmm0, %xmm1
912 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
918913 ; XOPAVX1-NEXT: vprotq $15, %xmm0, %xmm0
919 ; XOPAVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
920 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
914 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
915 ; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
921916 ; XOPAVX1-NEXT: retq
922917 ;
923918 ; XOPAVX2-LABEL: splatconstant_rotate_mask_v4i64:
930925 ; XOPAVX2-NEXT: retq
931926 %shl = shl <4 x i64> %a,
932927 %lshr = lshr <4 x i64> %a,
933 %rmask = and <4 x i64> %lshr,
934 %lmask = and <4 x i64> %shl, >
928 %rmask = and <4 x i64> %lshr, >
929 %lmask = and <4 x i64> %shl,
935930 %or = or <4 x i64> %lmask, %rmask
936931 ret <4 x i64> %or
937932 }
955950 ; AVX2: # BB#0:
956951 ; AVX2-NEXT: vpslld $4, %ymm0, %ymm1
957952 ; AVX2-NEXT: vpsrld $28, %ymm0, %ymm0
958 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
959 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
960 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
961 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
953 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
954 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
962955 ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
963956 ; AVX2-NEXT: retq
964957 ;
973966 ;
974967 ; XOPAVX2-LABEL: splatconstant_rotate_mask_v8i32:
975968 ; XOPAVX2: # BB#0:
976 ; XOPAVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
977 ; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm2
969 ; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm1
978970 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
979971 ; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm0
980 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
981 ; XOPAVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
972 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
973 ; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
982974 ; XOPAVX2-NEXT: retq
983975 %shl = shl <8 x i32> %a,
984976 %lshr = lshr <8 x i32> %a,
985 %rmask = and <8 x i32> %lshr,
986 %lmask = and <8 x i32> %shl, >
977 %rmask = and <8 x i32> %lshr, >
978 %lmask = and <8 x i32> %shl,
987979 %or = or <8 x i32> %lmask, %rmask
988980 ret <8 x i32> %or
989981 }