llvm.org GIT mirror llvm / 4031c08
[X86] Remove the multiply by 8 that goes into the shift constant for X86ISD::VSHLDQ and X86ISD::VSRLDQ. This simplifies the pattern matching in isel and allows these nodes to become the patterns embedded in the instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229431 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 5 years ago
5 changed file(s) with 52 addition(s) and 58 deletion(s). Raw diff Collapse all Expand all
537537
538538 if (Shift < 16) {
539539 SmallVector Idxs;
540 for (unsigned l = 0; l < 32; l += 16)
540 for (unsigned l = 0; l != 32; l += 16)
541541 for (unsigned i = 0; i != 16; ++i) {
542 unsigned Idx = i + Shift;
543 if (Idx >= 16) Idx += 16; // end of lane, switch operand.
542 unsigned Idx = 32 + i - Shift;
543 if (Idx < 32) Idx -= 16; // end of lane, switch operand.
544544 Idxs.push_back(Builder.getInt32(Idx + l));
545545 }
546546
560560
561561 if (Shift < 16) {
562562 SmallVector Idxs;
563 for (unsigned l = 0; l < 32; l += 16)
563 for (unsigned l = 0; l != 32; l += 16)
564564 for (unsigned i = 0; i != 16; ++i) {
565 unsigned Idx = 32 + i - Shift;
566 if (Idx < 32) Idx -= 16; // end of lane, switch operand.
565 unsigned Idx = i + Shift;
566 if (Idx >= 16) Idx += 16; // end of lane, switch operand.
567567 Idxs.push_back(Builder.getInt32(Idx + l));
568568 }
569569
59295929 unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
59305930 SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
59315931 MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType());
5932 SDValue ShiftVal = DAG.getConstant(NumBits, ScalarShiftTy);
5932 assert(NumBits % 8 == 0 && "Only support byte sized shifts");
5933 SDValue ShiftVal = DAG.getConstant(NumBits/8, ScalarShiftTy);
59335934 return DAG.getNode(ISD::BITCAST, dl, VT,
59345935 DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
59355936 }
77607761 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Hi);
77617762
77627763 SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo,
7763 DAG.getConstant(8 * LoByteShift, MVT::i8));
7764 DAG.getConstant(LoByteShift, MVT::i8));
77647765 SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi,
7765 DAG.getConstant(8 * HiByteShift, MVT::i8));
7766 DAG.getConstant(HiByteShift, MVT::i8));
77667767 return DAG.getNode(ISD::BITCAST, DL, VT,
77677768 DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
77687769 }
79067907 SDValue V = MatchV1 ? V1 : V2;
79077908 V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V);
79087909 V = DAG.getNode(Op, DL, ShiftVT, V,
7909 DAG.getConstant(ByteShift * 8, MVT::i8));
7910 DAG.getConstant(ByteShift, MVT::i8));
79107911 return DAG.getNode(ISD::BITCAST, DL, VT, V);
79117912 };
79127913
82998300 V2 = DAG.getNode(
83008301 X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
83018302 DAG.getConstant(
8302 V2Index * EltVT.getSizeInBits(),
8303 V2Index * EltVT.getSizeInBits()/8,
83038304 DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64)));
83048305 V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
83058306 }
41734173 VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
41744174 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
41754175
4176 let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
4176 let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
41774177 // 128-bit logical shifts.
41784178 def VPSLLDQri : PDIi8<0x73, MRM7r,
4179 (outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
4179 (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
41804180 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4181 []>, VEX_4V;
4181 [(set VR128:$dst,
4182 (v2i64 (X86vshldq VR128:$src1, (i8 imm:$src2))))]>,
4183 VEX_4V;
41824184 def VPSRLDQri : PDIi8<0x73, MRM3r,
4183 (outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
4185 (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
41844186 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4185 []>, VEX_4V;
4187 [(set VR128:$dst,
4188 (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>,
4189 VEX_4V;
41864190 // PSRADQri doesn't exist in SSE[1-3].
41874191 }
41884192 } // Predicates = [HasAVX]
42184222 let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
42194223 // 256-bit logical shifts.
42204224 def VPSLLDQYri : PDIi8<0x73, MRM7r,
4221 (outs VR256:$dst), (ins VR256:$src1, i32u8imm:$src2),
4225 (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
42224226 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4223 []>, VEX_4V, VEX_L;
4227 [(set VR256:$dst,
4228 (v4i64 (X86vshldq VR256:$src1, (i8 imm:$src2))))]>,
4229 VEX_4V, VEX_L;
42244230 def VPSRLDQYri : PDIi8<0x73, MRM3r,
4225 (outs VR256:$dst), (ins VR256:$src1, i32u8imm:$src2),
4231 (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
42264232 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4227 []>, VEX_4V, VEX_L;
4233 [(set VR256:$dst,
4234 (v4i64 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>,
4235 VEX_4V, VEX_L;
42284236 // PSRADQYri doesn't exist in SSE[1-3].
42294237 }
42304238 } // Predicates = [HasAVX2]
42604268 let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
42614269 // 128-bit logical shifts.
42624270 def PSLLDQri : PDIi8<0x73, MRM7r,
4263 (outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
4271 (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
42644272 "pslldq\t{$src2, $dst|$dst, $src2}",
4265 [], IIC_SSE_INTSHDQ_P_RI>;
4273 [(set VR128:$dst,
4274 (v2i64 (X86vshldq VR128:$src1, (i8 imm:$src2))))],
4275 IIC_SSE_INTSHDQ_P_RI>;
42664276 def PSRLDQri : PDIi8<0x73, MRM3r,
4267 (outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
4277 (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
42684278 "psrldq\t{$src2, $dst|$dst, $src2}",
4269 [], IIC_SSE_INTSHDQ_P_RI>;
4279 [(set VR128:$dst,
4280 (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))],
4281 IIC_SSE_INTSHDQ_P_RI>;
42704282 // PSRADQri doesn't exist in SSE[1-3].
42714283 }
42724284 } // Constraints = "$src1 = $dst"
42784290 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
42794291 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
42804292 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4281
4282 // Shift up / down and insert zero's.
4283 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
4284 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
4285 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
4286 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
42874293 }
42884294
42894295 let Predicates = [HasAVX2] in {
42914297 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
42924298 def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
42934299 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
4294
4295 // Shift up / down and insert zero's.
4296 def : Pat<(v4i64 (X86vshldq VR256:$src, (i8 imm:$amt))),
4297 (VPSLLDQYri VR256:$src, (BYTE_imm imm:$amt))>;
4298 def : Pat<(v4i64 (X86vshrdq VR256:$src, (i8 imm:$amt))),
4299 (VPSRLDQYri VR256:$src, (BYTE_imm imm:$amt))>;
43004300 }
43014301
43024302 let Predicates = [UseSSE2] in {
43064306 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
43074307 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
43084308 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4309
4310 // Shift up / down and insert zero's.
4311 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
4312 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
4313 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
4314 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
43154309 }
43164310
43174311 //===---------------------------------------------------------------------===//
3030 }
3131 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
3232
33
34 define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
35 ; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
36 %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
37 ret <4 x i64> %res
38 }
39 declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
40
41
42 define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
43 ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
44 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
45 ret <4 x i64> %res
46 }
47 declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
167167 declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
168168
169169
170 define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
171 ; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
172 %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
173 ret <4 x i64> %res
174 }
175 declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
176
177
178170 define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
179171 ; CHECK: vpsllq
180172 %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
261253 ret <4 x i64> %res
262254 }
263255 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
264
265
266 define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
267 ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
268 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
269 ret <4 x i64> %res
270 }
271 declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
272256
273257
274258 define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {