llvm.org GIT mirror llvm / c341b7c
AVX-512: optimized icmp -> sext -> icmp pattern git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200849 91177308-0d34-0410-b5e6-96231b3b80d8 Elena Demikhovsky 6 years ago
2 changed file(s) with 114 addition(s) and 39 deletion(s). Raw diff Collapse all Expand all
48104810 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
48114811 Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
48124812 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16);
4813 } else if (VT.getScalarType() == MVT::i1) {
4814 assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
4815 SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
4816 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
4817 Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
4818 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
4819 Ops, VT.getVectorNumElements());
48134820 } else
48144821 llvm_unreachable("Unexpected vector type");
48154822
91349141 In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
91359142 InVT = ExtVT;
91369143 }
9144
91379145 SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
91389146 const Constant *C = (dyn_cast(Cst))->getConstantIntValue();
91399147 SDValue CP = DAG.getConstantPool(C, getPointerTy());
999810006 DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
999910007 }
1000010008
10001 static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
10009 static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
10010 const X86Subtarget *Subtarget) {
1000210011 SDValue Op0 = Op.getOperand(0);
1000310012 SDValue Op1 = Op.getOperand(1);
1000410013 SDValue CC = Op.getOperand(2);
1000510014 MVT VT = Op.getSimpleValueType();
10015 SDLoc dl(Op);
1000610016
1000710017 assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 &&
1000810018 Op.getValueType().getScalarType() == MVT::i1 &&
1000910019 "Cannot set masked compare for this operation");
1001010020
1001110021 ISD::CondCode SetCCOpcode = cast(CC)->get();
10012 SDLoc dl(Op);
10013
10022 unsigned Opc = 0;
1001410023 bool Unsigned = false;
10024 bool Swap = false;
1001510025 unsigned SSECC;
1001610026 switch (SetCCOpcode) {
1001710027 default: llvm_unreachable("Unexpected SETCC condition");
1001810028 case ISD::SETNE: SSECC = 4; break;
10019 case ISD::SETEQ: SSECC = 0; break;
10020 case ISD::SETUGT: Unsigned = true;
10021 case ISD::SETGT: SSECC = 6; break; // NLE
10022 case ISD::SETULT: Unsigned = true;
10023 case ISD::SETLT: SSECC = 1; break;
10024 case ISD::SETUGE: Unsigned = true;
10025 case ISD::SETGE: SSECC = 5; break; // NLT
10026 case ISD::SETULE: Unsigned = true;
10029 case ISD::SETEQ: Opc = X86ISD::PCMPEQM; break;
10030 case ISD::SETUGT: SSECC = 6; Unsigned = true; break;
10031 case ISD::SETLT: Swap = true; //fall-through
10032 case ISD::SETGT: Opc = X86ISD::PCMPGTM; break;
10033 case ISD::SETULT: SSECC = 1; Unsigned = true; break;
10034 case ISD::SETUGE: SSECC = 5; Unsigned = true; break; //NLT
10035 case ISD::SETGE: Swap = true; SSECC = 2; break; // LE + swap
10036 case ISD::SETULE: Unsigned = true; //fall-through
1002710037 case ISD::SETLE: SSECC = 2; break;
1002810038 }
10029 unsigned Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
10039
10040 if (Swap)
10041 std::swap(Op0, Op1);
10042 if (Opc)
10043 return DAG.getNode(Opc, dl, VT, Op0, Op1);
10044 Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
1003010045 return DAG.getNode(Opc, dl, VT, Op0, Op1,
1003110046 DAG.getConstant(SSECC, MVT::i8));
10032
1003310047 }
1003410048
1003510049 static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
1008510099 if (Subtarget->hasAVX512()) {
1008610100 if (Op1.getValueType().is512BitVector() ||
1008710101 (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
10088 return LowerIntVSETCC_AVX512(Op, DAG);
10102 return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
1008910103
1009010104 // In AVX-512 architecture setcc returns mask with i1 elements,
1009110105 // But there is no compare instruction for i8 and i16 elements.
1010710121 switch (SetCCOpcode) {
1010810122 default: llvm_unreachable("Unexpected SETCC condition");
1010910123 case ISD::SETNE: Invert = true;
10110 case ISD::SETEQ: Opc = MaskResult? X86ISD::PCMPEQM: X86ISD::PCMPEQ; break;
10124 case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
1011110125 case ISD::SETLT: Swap = true;
10112 case ISD::SETGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; break;
10126 case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
1011310127 case ISD::SETGE: Swap = true;
10114 case ISD::SETLE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
10128 case ISD::SETLE: Opc = X86ISD::PCMPGT;
1011510129 Invert = true; break;
1011610130 case ISD::SETULT: Swap = true;
10117 case ISD::SETUGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
10131 case ISD::SETUGT: Opc = X86ISD::PCMPGT;
1011810132 FlipSigns = true; break;
1011910133 case ISD::SETUGE: Swap = true;
10120 case ISD::SETULE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
10134 case ISD::SETULE: Opc = X86ISD::PCMPGT;
1012110135 FlipSigns = true; Invert = true; break;
1012210136 }
1012310137
1403914053 case X86ISD::PTEST: return "X86ISD::PTEST";
1404014054 case X86ISD::TESTP: return "X86ISD::TESTP";
1404114055 case X86ISD::TESTM: return "X86ISD::TESTM";
14056 case X86ISD::TESTNM: return "X86ISD::TESTNM";
1404214057 case X86ISD::KORTEST: return "X86ISD::KORTEST";
1404314058 case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
1404414059 case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
1920219217
1920319218 // Optimize x == -y --> x+y == 0
1920419219 // x != -y --> x+y != 0
19205 static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
19220 static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
19221 const X86Subtarget* Subtarget) {
1920619222 ISD::CondCode CC = cast(N->getOperand(2))->get();
1920719223 SDValue LHS = N->getOperand(0);
1920819224 SDValue RHS = N->getOperand(1);
19225 EVT VT = N->getValueType(0);
19226 SDLoc DL(N);
1920919227
1921019228 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
1921119229 if (ConstantSDNode *C = dyn_cast(LHS.getOperand(0)))
1922319241 return DAG.getSetCC(SDLoc(N), N->getValueType(0),
1922419242 addV, DAG.getConstant(0, addV.getValueType()), CC);
1922519243 }
19244
19245 if (VT.getScalarType() == MVT::i1) {
19246 bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
19247 (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
19248 bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode());
19249 if (!IsSEXT0 && !IsVZero0)
19250 return SDValue();
19251 bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) &&
19252 (RHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
19253 bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
19254
19255 if (!IsSEXT1 && !IsVZero1)
19256 return SDValue();
19257
19258 if (IsSEXT0 && IsVZero1) {
19259 assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type");
19260 if (CC == ISD::SETEQ)
19261 return DAG.getNOT(DL, LHS.getOperand(0), VT);
19262 return LHS.getOperand(0);
19263 }
19264 if (IsSEXT1 && IsVZero0) {
19265 assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type");
19266 if (CC == ISD::SETEQ)
19267 return DAG.getNOT(DL, RHS.getOperand(0), VT);
19268 return RHS.getOperand(0);
19269 }
19270 }
19271
1922619272 return SDValue();
1922719273 }
1922819274
1950719553 case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
1950819554 case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
1950919555 case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
19510 case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
19556 case ISD::SETCC: return PerformISDSETCCCombine(N, DAG, Subtarget);
1951119557 case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
1951219558 case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
1951319559 case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
44 ; CHECK: vmovups
55 ; CHECK: ret
66 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
7 %mask = fcmp ole <16 x float> %x, %y
8 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
9 ret <16 x float> %max
7 %mask = fcmp ole <16 x float> %x, %y
8 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
9 ret <16 x float> %max
1010 }
1111
1212 ; CHECK-LABEL: test2
1414 ; CHECK: vmovupd
1515 ; CHECK: ret
1616 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
17 %mask = fcmp ole <8 x double> %x, %y
18 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
19 ret <8 x double> %max
17 %mask = fcmp ole <8 x double> %x, %y
18 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
19 ret <8 x double> %max
2020 }
2121
2222 ; CHECK-LABEL: test3
2525 ; CHECK: ret
2626 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
2727 %y = load <16 x i32>* %yp, align 4
28 %mask = icmp eq <16 x i32> %x, %y
29 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
30 ret <16 x i32> %max
28 %mask = icmp eq <16 x i32> %x, %y
29 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
30 ret <16 x i32> %max
3131 }
3232
3333 ; CHECK-LABEL: @test4_unsigned
3535 ; CHECK: vmovdqu32
3636 ; CHECK: ret
3737 define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y) nounwind {
38 %mask = icmp uge <16 x i32> %x, %y
39 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
40 ret <16 x i32> %max
38 %mask = icmp uge <16 x i32> %x, %y
39 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
40 ret <16 x i32> %max
4141 }
4242
4343 ; CHECK-LABEL: test5
4545 ; CHECK: vmovdqu64 {{.*}}%k1
4646 ; CHECK: ret
4747 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
48 %mask = icmp eq <8 x i64> %x, %y
49 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
50 ret <8 x i64> %max
48 %mask = icmp eq <8 x i64> %x, %y
49 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
50 ret <8 x i64> %max
5151 }
5252
5353 ; CHECK-LABEL: test6_unsigned
5555 ; CHECK: vmovdqu64 {{.*}}%k1
5656 ; CHECK: ret
5757 define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y) nounwind {
58 %mask = icmp ugt <8 x i64> %x, %y
59 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
60 ret <8 x i64> %max
58 %mask = icmp ugt <8 x i64> %x, %y
59 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
60 ret <8 x i64> %max
6161 }
6262
6363 ; CHECK-LABEL: test7
132132 %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
133133 ret <16 x i32> %conv
134134 }
135
136 ; CHECK-LABEL: test14
137 ; CHECK: vpcmp
138 ; CHECK-NOT: vpcmp
139 ; CHECK: vmovdqu32 {{.*}}{%k1} {z}
140 ; CHECK: ret
141 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
142 %sub_r = sub <16 x i32> %a, %b
143 %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
144 %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
145 %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
146 %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
147 ret <16 x i32>%res
148 }
149
150 ; CHECK-LABEL: test15
151 ; CHECK: vpcmpgtq
152 ; CHECK-NOT: vpcmp
153 ; CHECK: vmovdqu64 {{.*}}{%k1} {z}
154 ; CHECK: ret
155 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
156 %sub_r = sub <8 x i64> %a, %b
157 %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
158 %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
159 %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
160 %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
161 ret <8 x i64>%res
162 }
163