llvm.org GIT mirror llvm / 52981c4
I optimized the following patterns: sext <4 x i1> to <4 x i64> sext <4 x i8> to <4 x i64> sext <4 x i16> to <4 x i64> I'm running Combine on SIGN_EXTEND_IN_REG and revert SEXT patterns: (sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT))) The sext_in_reg (v4i32 x) may be lowered to shl+sar operations. The "sar" does not exist on 64-bit operation, so lowering sext_in_reg (v4i64 x) has no vector solution. I also added a cost of this operations to the AVX costs table. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175619 91177308-0d34-0410-b5e6-96231b3b80d8 Elena Demikhovsky 7 years ago
4 changed file(s) with 74 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
13221322 setTargetDAGCombine(ISD::ZERO_EXTEND);
13231323 setTargetDAGCombine(ISD::ANY_EXTEND);
13241324 setTargetDAGCombine(ISD::SIGN_EXTEND);
1325 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
13251326 setTargetDAGCombine(ISD::TRUNCATE);
13261327 setTargetDAGCombine(ISD::SINT_TO_FP);
13271328 setTargetDAGCombine(ISD::SETCC);
1707517076 return SDValue();
1707617077 }
1707717078
17079 static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
17080 const X86Subtarget *Subtarget) {
17081 EVT VT = N->getValueType(0);
17082 if (!VT.isVector())
17083 return SDValue();
17084
17085 SDValue N0 = N->getOperand(0);
17086 SDValue N1 = N->getOperand(1);
17087 EVT ExtraVT = cast(N1)->getVT();
17088 DebugLoc dl = N->getDebugLoc();
17089
17090 // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
17091 // both SSE and AVX2 since there is no sign-extended shift right
17092 // operation on a vector with 64-bit elements.
17093 //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
17094 // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
17095 if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
17096 N0.getOpcode() == ISD::SIGN_EXTEND)) {
17097 SDValue N00 = N0.getOperand(0);
17098
17099 // EXTLOAD has a better solution on AVX2,
17100 // it may be replaced with X86ISD::VSEXT node.
17101 if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
17102 if (!ISD::isNormalLoad(N00.getNode()))
17103 return SDValue();
17104
17105 if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
17106 SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
17107 N00, N1);
17108 return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
17109 }
17110 }
17111 return SDValue();
17112 }
17113
1707817114 static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
1707917115 TargetLowering::DAGCombinerInfo &DCI,
1708017116 const X86Subtarget *Subtarget) {
1746717503 case ISD::ANY_EXTEND:
1746817504 case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
1746917505 case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
17506 case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
1747017507 case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
1747117508 case ISD::SETCC: return PerformISDSETCCCombine(N, DAG);
1747217509 case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
231231 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
232232 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
233233 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
234 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 },
235 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
236 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 },
234237 { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
235238 };
236239
4343 %B = zext <8 x i16> undef to <8 x i32>
4444 ;CHECK: cost of 1 {{.*}} sext
4545 %C = sext <4 x i32> undef to <4 x i64>
46 ;CHECK: cost of 8 {{.*}} sext
47 %C1 = sext <4 x i8> undef to <4 x i64>
48 ;CHECK: cost of 8 {{.*}} sext
49 %C2 = sext <4 x i16> undef to <4 x i64>
4650
4751 ;CHECK: cost of 1 {{.*}} zext
4852 %D = zext <4 x i32> undef to <4 x i64>
5862 ret i32 undef
5963 }
6064
61 define i32 @masks(<8 x i1> %in) {
65 define i32 @masks8(<8 x i1> %in) {
6266 ;CHECK: cost of 6 {{.*}} zext
6367 %Z = zext <8 x i1> %in to <8 x i32>
6468 ;CHECK: cost of 9 {{.*}} sext
6670 ret i32 undef
6771 }
6872
73 define i32 @masks4(<4 x i1> %in) {
74 ;CHECK: cost of 8 {{.*}} sext
75 %S = sext <4 x i1> %in to <4 x i64>
76 ret i32 undef
77 }
78
141141 %Y = sext <8 x i8> %X to <8 x i16>
142142 ret <8 x i16>%Y
143143 }
144
145 ; AVX: sext_4i1_to_4i64
146 ; AVX: vpslld $31
147 ; AVX: vpsrad $31
148 ; AVX: vpmovsxdq
149 ; AVX: vpmovsxdq
150 ; AVX: ret
151 define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
152 %extmask = sext <4 x i1> %mask to <4 x i64>
153 ret <4 x i64> %extmask
154 }
155
156 ; AVX: sext_4i8_to_4i64
157 ; AVX: vpslld $24
158 ; AVX: vpsrad $24
159 ; AVX: vpmovsxdq
160 ; AVX: vpmovsxdq
161 ; AVX: ret
162 define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
163 %extmask = sext <4 x i8> %mask to <4 x i64>
164 ret <4 x i64> %extmask
165 }
166