llvm.org GIT mirror llvm / 6242fda
DAGCombiner: Canonicalize vector integer abs in the same way we do it for scalars. This already helps SSE2 x86 a lot because it lacks an efficient way to represent a vector select. The long term goal is to enable the backend to match a canonicalized pattern into a single instruction (e.g. vabs or pabs). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180597 91177308-0d34-0410-b5e6-96231b3b80d8 Benjamin Kramer 7 years ago
2 changed file(s) with 108 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
204204 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
205205 SDValue visitCTPOP(SDNode *N);
206206 SDValue visitSELECT(SDNode *N);
207 SDValue visitVSELECT(SDNode *N);
207208 SDValue visitSELECT_CC(SDNode *N);
208209 SDValue visitSETCC(SDNode *N);
209210 SDValue visitSIGN_EXTEND(SDNode *N);
11251126 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
11261127 case ISD::CTPOP: return visitCTPOP(N);
11271128 case ISD::SELECT: return visitSELECT(N);
1129 case ISD::VSELECT: return visitVSELECT(N);
11281130 case ISD::SELECT_CC: return visitSELECT_CC(N);
11291131 case ISD::SETCC: return visitSETCC(N);
11301132 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
41614163 return SDValue();
41624164 }
41634165
4166 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
4167 SDValue N0 = N->getOperand(0);
4168 SDValue N1 = N->getOperand(1);
4169 SDValue N2 = N->getOperand(2);
4170 DebugLoc DL = N->getDebugLoc();
4171
4172 // Canonicalize integer abs.
4173 // vselect (setg[te] X, 0), X, -X ->
4174 // vselect (setgt X, -1), X, -X ->
4175 // vselect (setl[te] X, 0), -X, X ->
4176 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
4177 if (N0.getOpcode() == ISD::SETCC) {
4178 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4179 ISD::CondCode CC = cast(N0.getOperand(2))->get();
4180 bool isAbs = false;
4181 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
4182
4183 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
4184 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
4185 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
4186 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
4187 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
4188 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
4189 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4190
4191 if (isAbs) {
4192 EVT VT = LHS.getValueType();
4193 SDValue Shift = DAG.getNode(
4194 ISD::SRA, DL, VT, LHS,
4195 DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
4196 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
4197 AddToWorkList(Shift.getNode());
4198 AddToWorkList(Add.getNode());
4199 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
4200 }
4201 }
4202
4203 return SDValue();
4204 }
4205
41644206 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
41654207 SDValue N0 = N->getOperand(0);
41664208 SDValue N1 = N->getOperand(1);
0 ; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck %s -check-prefix=SSE2
1
2 define <4 x i32> @test1(<4 x i32> %a) nounwind {
3 ; SSE2: test1:
4 ; SSE2: movdqa
5 ; SSE2-NEXT: psrad $31
6 ; SSE2-NEXT: padd
7 ; SSE2-NEXT: pxor
8 ; SSE2-NEXT: ret
9 %tmp1neg = sub <4 x i32> zeroinitializer, %a
10 %b = icmp sgt <4 x i32> %a,
11 %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
12 ret <4 x i32> %abs
13 }
14
15 define <4 x i32> @test2(<4 x i32> %a) nounwind {
16 ; SSE2: test2:
17 ; SSE2: movdqa
18 ; SSE2-NEXT: psrad $31
19 ; SSE2-NEXT: padd
20 ; SSE2-NEXT: pxor
21 ; SSE2-NEXT: ret
22 %tmp1neg = sub <4 x i32> zeroinitializer, %a
23 %b = icmp sge <4 x i32> %a, zeroinitializer
24 %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
25 ret <4 x i32> %abs
26 }
27
28 define <4 x i32> @test3(<4 x i32> %a) nounwind {
29 ; SSE2: test3:
30 ; SSE2: movdqa
31 ; SSE2-NEXT: psrad $31
32 ; SSE2-NEXT: padd
33 ; SSE2-NEXT: pxor
34 ; SSE2-NEXT: ret
35 %tmp1neg = sub <4 x i32> zeroinitializer, %a
36 %b = icmp sgt <4 x i32> %a, zeroinitializer
37 %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
38 ret <4 x i32> %abs
39 }
40
41 define <4 x i32> @test4(<4 x i32> %a) nounwind {
42 ; SSE2: test4:
43 ; SSE2: movdqa
44 ; SSE2-NEXT: psrad $31
45 ; SSE2-NEXT: padd
46 ; SSE2-NEXT: pxor
47 ; SSE2-NEXT: ret
48 %tmp1neg = sub <4 x i32> zeroinitializer, %a
49 %b = icmp slt <4 x i32> %a, zeroinitializer
50 %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
51 ret <4 x i32> %abs
52 }
53
54 define <4 x i32> @test5(<4 x i32> %a) nounwind {
55 ; SSE2: test5:
56 ; SSE2: movdqa
57 ; SSE2-NEXT: psrad $31
58 ; SSE2-NEXT: padd
59 ; SSE2-NEXT: pxor
60 ; SSE2-NEXT: ret
61 %tmp1neg = sub <4 x i32> zeroinitializer, %a
62 %b = icmp sle <4 x i32> %a, zeroinitializer
63 %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
64 ret <4 x i32> %abs
65 }