llvm.org GIT mirror llvm / 5759f97
Select vector shift with non-immediate i32 shift amount operand by first moving the operand into the right register. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50619 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 12 years ago
3 changed file(s) with 127 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
49084908 unsigned IntNo = cast(Op.getOperand(0))->getValue();
49094909 switch (IntNo) {
49104910 default: return SDOperand(); // Don't custom lower most intrinsics.
4911 // Comparison intrinsics.
4911 // Comparison intrinsics.
49124912 case Intrinsic::x86_sse_comieq_ss:
49134913 case Intrinsic::x86_sse_comilt_ss:
49144914 case Intrinsic::x86_sse_comile_ss:
50095009 DAG.getConstant(X86CC, MVT::i8), Cond);
50105010 return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
50115011 }
5012
5013 // Fix vector shift instructions where the last operand is a non-immediate
5014 // i32 value.
5015 case Intrinsic::x86_sse2_pslli_w:
5016 case Intrinsic::x86_sse2_pslli_d:
5017 case Intrinsic::x86_sse2_pslli_q:
5018 case Intrinsic::x86_sse2_psrli_w:
5019 case Intrinsic::x86_sse2_psrli_d:
5020 case Intrinsic::x86_sse2_psrli_q:
5021 case Intrinsic::x86_sse2_psrai_w:
5022 case Intrinsic::x86_sse2_psrai_d:
5023 case Intrinsic::x86_mmx_pslli_w:
5024 case Intrinsic::x86_mmx_pslli_d:
5025 case Intrinsic::x86_mmx_pslli_q:
5026 case Intrinsic::x86_mmx_psrli_w:
5027 case Intrinsic::x86_mmx_psrli_d:
5028 case Intrinsic::x86_mmx_psrli_q:
5029 case Intrinsic::x86_mmx_psrai_w:
5030 case Intrinsic::x86_mmx_psrai_d: {
5031 SDOperand ShAmt = Op.getOperand(2);
5032 if (isa(ShAmt))
5033 return SDOperand();
5034
5035 unsigned NewIntNo = 0;
5036 MVT::ValueType ShAmtVT = MVT::v4i32;
5037 switch (IntNo) {
5038 case Intrinsic::x86_sse2_pslli_w:
5039 NewIntNo = Intrinsic::x86_sse2_psll_w;
5040 break;
5041 case Intrinsic::x86_sse2_pslli_d:
5042 NewIntNo = Intrinsic::x86_sse2_psll_d;
5043 break;
5044 case Intrinsic::x86_sse2_pslli_q:
5045 NewIntNo = Intrinsic::x86_sse2_psll_q;
5046 break;
5047 case Intrinsic::x86_sse2_psrli_w:
5048 NewIntNo = Intrinsic::x86_sse2_psrl_w;
5049 break;
5050 case Intrinsic::x86_sse2_psrli_d:
5051 NewIntNo = Intrinsic::x86_sse2_psrl_d;
5052 break;
5053 case Intrinsic::x86_sse2_psrli_q:
5054 NewIntNo = Intrinsic::x86_sse2_psrl_q;
5055 break;
5056 case Intrinsic::x86_sse2_psrai_w:
5057 NewIntNo = Intrinsic::x86_sse2_psra_w;
5058 break;
5059 case Intrinsic::x86_sse2_psrai_d:
5060 NewIntNo = Intrinsic::x86_sse2_psra_d;
5061 break;
5062 default: {
5063 ShAmtVT = MVT::v2i32;
5064 switch (IntNo) {
5065 case Intrinsic::x86_mmx_pslli_w:
5066 NewIntNo = Intrinsic::x86_mmx_psll_w;
5067 break;
5068 case Intrinsic::x86_mmx_pslli_d:
5069 NewIntNo = Intrinsic::x86_mmx_psll_d;
5070 break;
5071 case Intrinsic::x86_mmx_pslli_q:
5072 NewIntNo = Intrinsic::x86_mmx_psll_q;
5073 break;
5074 case Intrinsic::x86_mmx_psrli_w:
5075 NewIntNo = Intrinsic::x86_mmx_psrl_w;
5076 break;
5077 case Intrinsic::x86_mmx_psrli_d:
5078 NewIntNo = Intrinsic::x86_mmx_psrl_d;
5079 break;
5080 case Intrinsic::x86_mmx_psrli_q:
5081 NewIntNo = Intrinsic::x86_mmx_psrl_q;
5082 break;
5083 case Intrinsic::x86_mmx_psrai_w:
5084 NewIntNo = Intrinsic::x86_mmx_psra_w;
5085 break;
5086 case Intrinsic::x86_mmx_psrai_d:
5087 NewIntNo = Intrinsic::x86_mmx_psra_d;
5088 break;
5089 default: abort(); // Can't reach here.
5090 }
5091 break;
5092 }
5093 }
5094 MVT::ValueType VT = Op.getValueType();
5095 ShAmt = DAG.getNode(ISD::BIT_CONVERT, VT,
5096 DAG.getNode(ISD::SCALAR_TO_VECTOR, ShAmtVT, ShAmt));
5097 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
5098 DAG.getConstant(NewIntNo, MVT::i32),
5099 Op.getOperand(1), ShAmt);
5100 }
50125101 }
50135102 }
50145103
0 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psllq | grep 32
11 ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psllq | grep 32
22 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+mmx | grep psrad
3 ; RUN: llvm-as < %s | llc -march=x86-64 -mattr=+mmx | grep psrlw
34
45 define i64 @t1(<1 x i64> %mm1) nounwind {
56 entry:
1819 }
1920
2021 declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone
22
23 define i64 @t3(<1 x i64> %mm1, i32 %bits) nounwind {
24 entry:
25 %tmp6 = bitcast <1 x i64> %mm1 to <4 x i16> ; <<4 x i16>> [#uses=1]
26 %tmp8 = tail call <4 x i16> @llvm.x86.mmx.psrli.w( <4 x i16> %tmp6, i32 %bits ) nounwind readnone ; <<4 x i16>> [#uses=1]
27 %retval1314 = bitcast <4 x i16> %tmp8 to i64 ; [#uses=1]
28 ret i64 %retval1314
29 }
30
31 declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone
0 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psllq
1 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep psraw
2 ; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep movd | count 2
3
4 define <2 x i64> @t1(<2 x i64> %x1, i32 %bits) nounwind {
5 entry:
6 %tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 %bits ) nounwind readnone ; <<2 x i64>> [#uses=1]
7 ret <2 x i64> %tmp3
8 }
9
10 define <2 x i64> @t2(<2 x i64> %x1) nounwind {
11 entry:
12 %tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 10 ) nounwind readnone ; <<2 x i64>> [#uses=1]
13 ret <2 x i64> %tmp3
14 }
15
16 define <2 x i64> @t3(<2 x i64> %x1, i32 %bits) nounwind {
17 entry:
18 %tmp2 = bitcast <2 x i64> %x1 to <8 x i16> ; <<8 x i16>> [#uses=1]
19 %tmp4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w( <8 x i16> %tmp2, i32 %bits ) nounwind readnone ; <<8 x i16>> [#uses=1]
20 %tmp5 = bitcast <8 x i16> %tmp4 to <2 x i64> ; <<2 x i64>> [#uses=1]
21 ret <2 x i64> %tmp5
22 }
23
24 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
25 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone