llvm.org GIT mirror llvm / b68371d
[ARM] Lower unsigned saturation to USAT Summary: Implement lower of unsigned saturation on an interval [0, k] where k + 1 is a power of two using USAT instruction in a similar way to how [~k, k] is lowered using SSAT on ARM models that supports it. Patch by Marten Svanfeldt Reviewers: t.p.northover, pbarrio, eastig, SjoerdMeijer, javed.absar, fhahn Reviewed By: fhahn Subscribers: fhahn, aemerson, javed.absar, llvm-commits, kristof.beyls Differential Revision: https://reviews.llvm.org/D41348 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@321164 91177308-0d34-0410-b5e6-96231b3b80d8 Florian Hahn 2 years ago
5 changed file(s) with 249 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
12451245 case ARMISD::CMOV: return "ARMISD::CMOV";
12461246
12471247 case ARMISD::SSAT: return "ARMISD::SSAT";
1248 case ARMISD::USAT: return "ARMISD::USAT";
12481249
12491250 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
12501251 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
41954196 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
41964197 }
41974198
4198 // Check if two chained conditionals could be converted into SSAT.
4199 // Check if two chained conditionals could be converted into SSAT or USAT.
41994200 //
42004201 // SSAT can replace a set of two conditional selectors that bound a number to an
42014202 // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
42064207 // x < k ? (x < -k ? -k : x) : k
42074208 // etc.
42084209 //
4210 // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is
4211 // a power of 2.
4212 //
42094213 // It returns true if the conversion can be done, false otherwise.
4210 // Additionally, the variable is returned in parameter V and the constant in K.
4214 // Additionally, the variable is returned in parameter V, the constant in K and
4215 // usat is set to true if the conditional represents an unsigned saturation
42114216 static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4212 uint64_t &K) {
4217 uint64_t &K, bool &usat) {
42134218 SDValue LHS1 = Op.getOperand(0);
42144219 SDValue RHS1 = Op.getOperand(1);
42154220 SDValue TrueVal1 = Op.getOperand(2);
42764281 int64_t Val1 = cast(*K1)->getSExtValue();
42774282 int64_t Val2 = cast(*K2)->getSExtValue();
42784283 int64_t PosVal = std::max(Val1, Val2);
4284 int64_t NegVal = std::min(Val1, Val2);
42794285
42804286 if (((Val1 > Val2 && UpperCheckOp == &Op) ||
42814287 (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4282 Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
4288 isPowerOf2_64(PosVal + 1)) {
4289
4290 // Handle the difference between USAT (unsigned) and SSAT (signed) saturation
4291 if (Val1 == ~Val2)
4292 usat = false;
4293 else if (NegVal == 0)
4294 usat = true;
4295 else
4296 return false;
42834297
42844298 V = V2;
42854299 K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4300
42864301 return true;
42874302 }
42884303
42964311 // Try to convert two saturating conditional selects into a single SSAT
42974312 SDValue SatValue;
42984313 uint64_t SatConstant;
4314 bool SatUSat;
42994315 if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4300 isSaturatingConditional(Op, SatValue, SatConstant))
4301 return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4302 DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4316 isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) {
4317 if (SatUSat)
4318 return DAG.getNode(ARMISD::USAT, dl, VT, SatValue,
4319 DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4320 else
4321 return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4322 DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4323 }
43034324
43044325 SDValue LHS = Op.getOperand(0);
43054326 SDValue RHS = Op.getOperand(1);
8686 CMOV, // ARM conditional move instructions.
8787
8888 SSAT, // Signed saturation
89 USAT, // Unsigned saturation
8990
9091 BCC_i64,
9192
137137 [SDNPInGlue]>;
138138
139139 def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
140
141 def ARMusatnoshift : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>;
140142
141143 def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
142144 [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
38313833 (USAT imm0_31:$pos, GPRnopc:$a, 0)>;
38323834 def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
38333835 (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
3836 def : ARMPat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm),
3837 (USAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
38343838 def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos),
38353839 (SSAT16 imm1_16:$pos, GPRnopc:$a)>;
38363840 def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos),
23352335
23362336 def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
23372337 (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
2338 def : T2Pat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm),
2339 (t2USAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
23382340 def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos),
23392341 (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
23402342 def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos),
0 ; RUN: llc -mtriple=armv4t-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V4T
1 ; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V6
2 ; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V6T2
3
4 ; Check for several conditions that should result in USAT.
5 ; For example, the base test is equivalent to
6 ; x < 0 ? 0 : (x > k ? k : x) in C. All patterns that bound x
7 ; to the interval [0, k] where k + 1 is a power of 2 can be
8 ; transformed into USAT. At the end there are some tests
9 ; checking that conditionals are not transformed if they don't
10 ; match the right pattern.
11
12 ;
13 ; Base tests with different bit widths
14 ;
15
16 ; x < 0 ? 0 : (x > k ? k : x)
17 ; 32-bit base test
18 define i32 @unsigned_sat_base_32bit(i32 %x) #0 {
19 ; CHECK-LABEL: unsigned_sat_base_32bit:
20 ; V6: usat r0, #23, r0
21 ; V6T2: usat r0, #23, r0
22 ; V4T-NOT: usat
23 entry:
24 %cmpLow = icmp slt i32 %x, 0
25 %cmpUp = icmp sgt i32 %x, 8388607
26 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x
27 %saturateLow = select i1 %cmpLow, i32 0, i32 %saturateUp
28 ret i32 %saturateLow
29 }
30
31 ; x < 0 ? 0 : (x > k ? k : x)
32 ; 16-bit base test
33 define i16 @unsigned_sat_base_16bit(i16 %x) #0 {
34 ; CHECK-LABEL: unsigned_sat_base_16bit:
35 ; V6: usat r0, #11, r0
36 ; V6T2: usat r0, #11, r0
37 ; V4T-NOT: usat
38 entry:
39 %cmpLow = icmp slt i16 %x, 0
40 %cmpUp = icmp sgt i16 %x, 2047
41 %saturateUp = select i1 %cmpUp, i16 2047, i16 %x
42 %saturateLow = select i1 %cmpLow, i16 0, i16 %saturateUp
43 ret i16 %saturateLow
44 }
45
46 ; x < 0 ? 0 : (x > k ? k : x)
47 ; 8-bit base test
48 define i8 @unsigned_sat_base_8bit(i8 %x) #0 {
49 ; CHECK-LABEL: unsigned_sat_base_8bit:
50 ; V6: usat r0, #5, r0
51 ; V6T2: usat r0, #5, r0
52 ; V4T-NOT: usat
53 entry:
54 %cmpLow = icmp slt i8 %x, 0
55 %cmpUp = icmp sgt i8 %x, 31
56 %saturateUp = select i1 %cmpUp, i8 31, i8 %x
57 %saturateLow = select i1 %cmpLow, i8 0, i8 %saturateUp
58 ret i8 %saturateLow
59 }
60
61 ;
62 ; Tests where the conditionals that check for upper and lower bounds,
63 ; or the < and > operators, are arranged in different ways. Only some
64 ; of the possible combinations that lead to USAT are tested.
65 ;
66 ; x < 0 ? 0 : (x < k ? x : k)
67 define i32 @unsigned_sat_lower_upper_1(i32 %x) #0 {
68 ; CHECK-LABEL: unsigned_sat_lower_upper_1:
69 ; V6: usat r0, #23, r0
70 ; V6T2: usat r0, #23, r0
71 ; V4T-NOT: usat
72 entry:
73 %cmpLow = icmp slt i32 %x, 0
74 %cmpUp = icmp slt i32 %x, 8388607
75 %saturateUp = select i1 %cmpUp, i32 %x, i32 8388607
76 %saturateLow = select i1 %cmpLow, i32 0, i32 %saturateUp
77 ret i32 %saturateLow
78 }
79
80 ; x > 0 ? (x > k ? k : x) : 0
81 define i32 @unsigned_sat_lower_upper_2(i32 %x) #0 {
82 ; CHECK-LABEL: unsigned_sat_lower_upper_2:
83 ; V6: usat r0, #23, r0
84 ; V6T2: usat r0, #23, r0
85 ; V4T-NOT: usat
86 entry:
87 %cmpLow = icmp sgt i32 %x, 0
88 %cmpUp = icmp sgt i32 %x, 8388607
89 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x
90 %saturateLow = select i1 %cmpLow, i32 %saturateUp, i32 0
91 ret i32 %saturateLow
92 }
93
94 ; x < k ? (x < 0 ? 0 : x) : k
95 define i32 @unsigned_sat_upper_lower_1(i32 %x) #0 {
96 ; CHECK-LABEL: unsigned_sat_upper_lower_1:
97 ; V6: usat r0, #23, r0
98 ; V6T2: usat r0, #23, r0
99 ; V4T-NOT: usat
100 entry:
101 %cmpUp = icmp slt i32 %x, 8388607
102 %cmpLow = icmp slt i32 %x, 0
103 %saturateLow = select i1 %cmpLow, i32 0, i32 %x
104 %saturateUp = select i1 %cmpUp, i32 %saturateLow, i32 8388607
105 ret i32 %saturateUp
106 }
107
108 ; x > k ? k : (x < 0 ? 0 : x)
109 define i32 @unsigned_sat_upper_lower_2(i32 %x) #0 {
110 ; CHECK-LABEL: unsigned_sat_upper_lower_2:
111 ; V6: usat r0, #23, r0
112 ; V6T2: usat r0, #23, r0
113 ; V4T-NOT: usat
114 entry:
115 %cmpUp = icmp sgt i32 %x, 8388607
116 %cmpLow = icmp slt i32 %x, 0
117 %saturateLow = select i1 %cmpLow, i32 0, i32 %x
118 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow
119 ret i32 %saturateUp
120 }
121
122 ; k < x ? k : (x > 0 ? x : 0)
123 define i32 @unsigned_sat_upper_lower_3(i32 %x) #0 {
124 ; CHECK-LABEL: unsigned_sat_upper_lower_3:
125 ; V6: usat r0, #23, r0
126 ; V6T2: usat r0, #23, r0
127 ; V4T-NOT: usat
128 entry:
129 %cmpUp = icmp slt i32 8388607, %x
130 %cmpLow = icmp sgt i32 %x, 0
131 %saturateLow = select i1 %cmpLow, i32 %x, i32 0
132 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow
133 ret i32 %saturateUp
134 }
135
136 ;
137 ; The following tests check for patterns that should not transform
138 ; into USAT but are similar enough that could confuse the selector.
139 ;
140 ; x > k ? k : (x > 0 ? 0 : x)
141 ; First condition upper-saturates, second doesn't lower-saturate.
142 define i32 @no_unsigned_sat_missing_lower(i32 %x) #0 {
143 ; CHECK-LABEL: no_unsigned_sat_missing_lower
144 ; CHECK-NOT: usat
145 entry:
146 %cmpUp = icmp sgt i32 %x, 8388607
147 %cmpLow = icmp sgt i32 %x, 0
148 %saturateLow = select i1 %cmpLow, i32 0, i32 %x
149 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow
150 ret i32 %saturateUp
151 }
152
153 ; x < k ? k : (x < 0 ? 0 : x)
154 ; Second condition lower-saturates, first doesn't upper-saturate.
155 define i32 @no_unsigned_sat_missing_upper(i32 %x) #0 {
156 ; CHECK-LABEL: no_unsigned_sat_missing_upper:
157 ; CHECK-NOT: usat
158 entry:
159 %cmpUp = icmp slt i32 %x, 8388607
160 %cmpLow = icmp slt i32 %x, 0
161 %saturateLow = select i1 %cmpLow, i32 0, i32 %x
162 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow
163 ret i32 %saturateUp
164 }
165
166 ; Lower constant is different in the select and in the compare
167 define i32 @no_unsigned_sat_incorrect_constant(i32 %x) #0 {
168 ; CHECK-LABEL: no_unsigned_sat_incorrect_constant:
169 ; CHECK-NOT: usat
170 entry:
171 %cmpUp = icmp sgt i32 %x, 8388607
172 %cmpLow = icmp slt i32 %x, 0
173 %saturateLow = select i1 %cmpLow, i32 -1, i32 %x
174 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow
175 ret i32 %saturateUp
176 }
177
178 ; The interval is not [0, k]
179 define i32 @no_unsigned_sat_incorrect_interval(i32 %x) #0 {
180 ; CHECK-LABEL: no_unsigned_sat_incorrect_interval:
181 ; CHECK-NOT: usat
182 entry:
183 %cmpUp = icmp sgt i32 %x, 8388607
184 %cmpLow = icmp slt i32 %x, -4
185 %saturateLow = select i1 %cmpLow, i32 -4, i32 %x
186 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow
187 ret i32 %saturateUp
188 }
189
190 ; The returned value (y) is not the same as the tested value (x).
191 define i32 @no_unsigned_sat_incorrect_return(i32 %x, i32 %y) #0 {
192 ; CHECK-LABEL: no_unsigned_sat_incorrect_return:
193 ; CHECK-NOT: usat
194 entry:
195 %cmpUp = icmp sgt i32 %x, 8388607
196 %cmpLow = icmp slt i32 %x, 0
197 %saturateLow = select i1 %cmpLow, i32 0, i32 %y
198 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow
199 ret i32 %saturateUp
200 }
201
202 ; One of the values in a compare (y) is not the same as the rest
203 ; of the compare and select values (x).
204 define i32 @no_unsigned_sat_incorrect_compare(i32 %x, i32 %y) #0 {
205 ; CHECK-LABEL: no_unsigned_sat_incorrect_compare:
206 ; CHECK-NOT: usat
207 entry:
208 %cmpUp = icmp sgt i32 %x, 8388607
209 %cmpLow = icmp slt i32 %y, 0
210 %saturateLow = select i1 %cmpLow, i32 0, i32 %x
211 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow
212 ret i32 %saturateUp
213 }