llvm.org GIT mirror llvm / 670e539
Recognize SSE min and max opportunities in even more cases. And fix a bug with the behavior of min/max instructions formed from fcmp uge comparisons. Also, use FiniteOnlyFPMath() for this code instead of UnsafeFPMath, as it is more specific. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82466 91177308-0d34-0410-b5e6-96231b3b80d8 Dan Gohman 10 years ago
3 changed file(s) with 447 addition(s) and 46 deletion(s). Raw diff Collapse all Expand all
82558255 SDValue LHS = N->getOperand(1);
82568256 SDValue RHS = N->getOperand(2);
82578257
8258 // If we have SSE[12] support, try to form min/max nodes.
8258 // If we have SSE[12] support, try to form min/max nodes. SSE min/max
8259 // instructions have the peculiarity that if either operand is a NaN,
8260 // they chose what we call the RHS operand (and as such are not symmetric).
8261 // It happens that this matches the semantics of the common C idiom
8262 // x
82598263 if (Subtarget->hasSSE2() &&
82608264 (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
82618265 Cond.getOpcode() == ISD::SETCC) {
82628266 ISD::CondCode CC = cast(Cond.getOperand(2))->get();
82638267
82648268 unsigned Opcode = 0;
8269 // Check for x CC y ? x : y.
82658270 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
82668271 switch (CC) {
82678272 default: break;
8268 case ISD::SETOLE: // (X <= Y) ? X : Y -> min
8269 case ISD::SETULE:
8270 case ISD::SETLE:
8271 if (!UnsafeFPMath) break;
8272 // FALL THROUGH.
8273 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
8274 case ISD::SETLT:
8273 case ISD::SETULT:
8274 // This can be a min if we can prove that at least one of the operands
8275 // is not a nan.
8276 if (!FiniteOnlyFPMath()) {
8277 if (DAG.isKnownNeverNaN(RHS)) {
8278 // Put the potential NaN in the RHS so that SSE will preserve it.
8279 std::swap(LHS, RHS);
8280 } else if (!DAG.isKnownNeverNaN(LHS))
8281 break;
8282 }
82758283 Opcode = X86ISD::FMIN;
82768284 break;
8277
8278 case ISD::SETOGT: // (X > Y) ? X : Y -> max
8285 case ISD::SETOLE:
8286 // This can be a min if we can prove that at least one of the operands
8287 // is not a nan.
8288 if (!FiniteOnlyFPMath()) {
8289 if (DAG.isKnownNeverNaN(LHS)) {
8290 // Put the potential NaN in the RHS so that SSE will preserve it.
8291 std::swap(LHS, RHS);
8292 } else if (!DAG.isKnownNeverNaN(RHS))
8293 break;
8294 }
8295 Opcode = X86ISD::FMIN;
8296 break;
8297 case ISD::SETULE:
8298 // This can be a min, but if either operand is a NaN we need it to
8299 // preserve the original LHS.
8300 std::swap(LHS, RHS);
8301 case ISD::SETOLT:
8302 case ISD::SETLT:
8303 case ISD::SETLE:
8304 Opcode = X86ISD::FMIN;
8305 break;
8306
8307 case ISD::SETOGE:
8308 // This can be a max if we can prove that at least one of the operands
8309 // is not a nan.
8310 if (!FiniteOnlyFPMath()) {
8311 if (DAG.isKnownNeverNaN(LHS)) {
8312 // Put the potential NaN in the RHS so that SSE will preserve it.
8313 std::swap(LHS, RHS);
8314 } else if (!DAG.isKnownNeverNaN(RHS))
8315 break;
8316 }
8317 Opcode = X86ISD::FMAX;
8318 break;
82798319 case ISD::SETUGT:
8320 // This can be a max if we can prove that at least one of the operands
8321 // is not a nan.
8322 if (!FiniteOnlyFPMath()) {
8323 if (DAG.isKnownNeverNaN(RHS)) {
8324 // Put the potential NaN in the RHS so that SSE will preserve it.
8325 std::swap(LHS, RHS);
8326 } else if (!DAG.isKnownNeverNaN(LHS))
8327 break;
8328 }
8329 Opcode = X86ISD::FMAX;
8330 break;
8331 case ISD::SETUGE:
8332 // This can be a max, but if either operand is a NaN we need it to
8333 // preserve the original LHS.
8334 std::swap(LHS, RHS);
8335 case ISD::SETOGT:
82808336 case ISD::SETGT:
8281 if (!UnsafeFPMath) break;
8282 // FALL THROUGH.
8283 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
82848337 case ISD::SETGE:
82858338 Opcode = X86ISD::FMAX;
82868339 break;
82878340 }
8341 // Check for x CC y ? y : x -- a min/max with reversed arms.
82888342 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
82898343 switch (CC) {
82908344 default: break;
8345 case ISD::SETOGE:
8346 // This can be a min if we can prove that at least one of the operands
8347 // is not a nan.
8348 if (!FiniteOnlyFPMath()) {
8349 if (DAG.isKnownNeverNaN(RHS)) {
8350 // Put the potential NaN in the RHS so that SSE will preserve it.
8351 std::swap(LHS, RHS);
8352 } else if (!DAG.isKnownNeverNaN(LHS))
8353 break;
8354 }
8355 Opcode = X86ISD::FMIN;
8356 break;
8357 case ISD::SETUGT:
8358 // This can be a min if we can prove that at least one of the operands
8359 // is not a nan.
8360 if (!FiniteOnlyFPMath()) {
8361 if (DAG.isKnownNeverNaN(LHS)) {
8362 // Put the potential NaN in the RHS so that SSE will preserve it.
8363 std::swap(LHS, RHS);
8364 } else if (!DAG.isKnownNeverNaN(RHS))
8365 break;
8366 }
8367 Opcode = X86ISD::FMIN;
8368 break;
8369 case ISD::SETUGE:
8370 // This can be a min, but if either operand is a NaN we need it to
8371 // preserve the original LHS.
8372 std::swap(LHS, RHS);
82918373 case ISD::SETOGT:
8292 // This can use a min only if the LHS isn't NaN.
8293 if (DAG.isKnownNeverNaN(LHS))
8294 Opcode = X86ISD::FMIN;
8295 else if (DAG.isKnownNeverNaN(RHS)) {
8296 Opcode = X86ISD::FMIN;
8297 // Put the potential NaN in the RHS so that SSE will preserve it.
8298 std::swap(LHS, RHS);
8299 }
8300 break;
8301
8302 case ISD::SETUGT: // (X > Y) ? Y : X -> min
83038374 case ISD::SETGT:
8304 if (!UnsafeFPMath) break;
8305 // FALL THROUGH.
8306 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
83078375 case ISD::SETGE:
83088376 Opcode = X86ISD::FMIN;
83098377 break;
83108378
8379 case ISD::SETULT:
8380 // This can be a max if we can prove that at least one of the operands
8381 // is not a nan.
8382 if (!FiniteOnlyFPMath()) {
8383 if (DAG.isKnownNeverNaN(LHS)) {
8384 // Put the potential NaN in the RHS so that SSE will preserve it.
8385 std::swap(LHS, RHS);
8386 } else if (!DAG.isKnownNeverNaN(RHS))
8387 break;
8388 }
8389 Opcode = X86ISD::FMAX;
8390 break;
8391 case ISD::SETOLE:
8392 // This can be a max if we can prove that at least one of the operands
8393 // is not a nan.
8394 if (!FiniteOnlyFPMath()) {
8395 if (DAG.isKnownNeverNaN(RHS)) {
8396 // Put the potential NaN in the RHS so that SSE will preserve it.
8397 std::swap(LHS, RHS);
8398 } else if (!DAG.isKnownNeverNaN(LHS))
8399 break;
8400 }
8401 Opcode = X86ISD::FMAX;
8402 break;
83118403 case ISD::SETULE:
8312 // This can use a max only if the LHS isn't NaN.
8313 if (DAG.isKnownNeverNaN(LHS))
8314 Opcode = X86ISD::FMAX;
8315 else if (DAG.isKnownNeverNaN(RHS)) {
8316 Opcode = X86ISD::FMAX;
8317 // Put the potential NaN in the RHS so that SSE will preserve it.
8318 std::swap(LHS, RHS);
8319 }
8320 break;
8321
8322 case ISD::SETOLE: // (X <= Y) ? Y : X -> max
8404 // This can be a max, but if either operand is a NaN we need it to
8405 // preserve the original LHS.
8406 std::swap(LHS, RHS);
8407 case ISD::SETOLT:
8408 case ISD::SETLT:
83238409 case ISD::SETLE:
8324 if (!UnsafeFPMath) break;
8325 // FALL THROUGH.
8326 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
8327 case ISD::SETLT:
83288410 Opcode = X86ISD::FMAX;
83298411 break;
83308412 }
33
44 declare float @bar()
55
6 define float @foo(float %a)
6 define float @foo(float %a) nounwind
77 {
88 %s = call float @bar()
99 %t = fcmp olt float %s, %a
1010 %u = select i1 %t, float %s, float %a
1111 ret float %u
1212 }
13 define float @hem(float %a)
13 define float @hem(float %a) nounwind
1414 {
1515 %s = call float @bar()
16 %t = fcmp uge float %s, %a
16 %t = fcmp ogt float %s, %a
1717 %u = select i1 %t, float %s, float %a
1818 ret float %u
1919 }
None ; RUN: llc < %s -march=x86-64 | FileCheck %s
0 ; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
1
2 ; Some of these patterns can be matched as SSE min or max. Some of
3 ; then can be matched provided that the operands are swapped.
4 ; Some of them can't be matched at all and require a comparison
5 ; and a conditional branch.
6
7 ; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse}
8 ; x_ : use 0.0 instead of %y
9 ; _inverse : swap the arms of the select.
10
11 ; CHECK: ogt:
12 ; CHECK-NEXT: maxsd %xmm1, %xmm0
13 ; CHECK-NEXT: ret
14 define double @ogt(double %x, double %y) nounwind {
15 %c = fcmp ogt double %x, %y
16 %d = select i1 %c, double %x, double %y
17 ret double %d
18 }
19
20 ; CHECK: olt:
21 ; CHECK-NEXT: minsd %xmm1, %xmm0
22 ; CHECK-NEXT: ret
23 define double @olt(double %x, double %y) nounwind {
24 %c = fcmp olt double %x, %y
25 %d = select i1 %c, double %x, double %y
26 ret double %d
27 }
28
29 ; CHECK: ogt_inverse:
30 ; CHECK-NEXT: minsd %xmm0, %xmm1
31 ; CHECK-NEXT: movapd %xmm1, %xmm0
32 ; CHECK-NEXT: ret
33 define double @ogt_inverse(double %x, double %y) nounwind {
34 %c = fcmp ogt double %x, %y
35 %d = select i1 %c, double %y, double %x
36 ret double %d
37 }
38
39 ; CHECK: olt_inverse:
40 ; CHECK-NEXT: maxsd %xmm0, %xmm1
41 ; CHECK-NEXT: movapd %xmm1, %xmm0
42 ; CHECK-NEXT: ret
43 define double @olt_inverse(double %x, double %y) nounwind {
44 %c = fcmp olt double %x, %y
45 %d = select i1 %c, double %y, double %x
46 ret double %d
47 }
48
49 ; CHECK: oge:
50 ; CHECK-NEXT: ucomisd %xmm1, %xmm0
51 define double @oge(double %x, double %y) nounwind {
52 %c = fcmp oge double %x, %y
53 %d = select i1 %c, double %x, double %y
54 ret double %d
55 }
56
57 ; CHECK: ole:
58 ; CHECK-NEXT: ucomisd %xmm0, %xmm1
59 define double @ole(double %x, double %y) nounwind {
60 %c = fcmp ole double %x, %y
61 %d = select i1 %c, double %x, double %y
62 ret double %d
63 }
64
65 ; CHECK: oge_inverse:
66 ; CHECK-NEXT: ucomisd %xmm1, %xmm0
67 define double @oge_inverse(double %x, double %y) nounwind {
68 %c = fcmp oge double %x, %y
69 %d = select i1 %c, double %y, double %x
70 ret double %d
71 }
72
73 ; CHECK: ole_inverse:
74 ; CHECK-NEXT: ucomisd %xmm0, %xmm1
75 define double @ole_inverse(double %x, double %y) nounwind {
76 %c = fcmp ole double %x, %y
77 %d = select i1 %c, double %y, double %x
78 ret double %d
79 }
80
81 ; CHECK: x_ogt:
82 ; CHECK-NEXT: pxor %xmm1, %xmm1
83 ; CHECK-NEXT: maxsd %xmm1, %xmm0
84 ; CHECK-NEXT: ret
85 define double @x_ogt(double %x) nounwind {
86 %c = fcmp ogt double %x, 0.000000e+00
87 %d = select i1 %c, double %x, double 0.000000e+00
88 ret double %d
89 }
90
91 ; CHECK: x_olt:
92 ; CHECK-NEXT: pxor %xmm1, %xmm1
93 ; CHECK-NEXT: minsd %xmm1, %xmm0
94 ; CHECK-NEXT: ret
95 define double @x_olt(double %x) nounwind {
96 %c = fcmp olt double %x, 0.000000e+00
97 %d = select i1 %c, double %x, double 0.000000e+00
98 ret double %d
99 }
100
101 ; CHECK: x_ogt_inverse:
102 ; CHECK-NEXT: pxor %xmm1, %xmm1
103 ; CHECK-NEXT: minsd %xmm0, %xmm1
104 ; CHECK-NEXT: movapd %xmm1, %xmm0
105 ; CHECK-NEXT: ret
106 define double @x_ogt_inverse(double %x) nounwind {
107 %c = fcmp ogt double %x, 0.000000e+00
108 %d = select i1 %c, double 0.000000e+00, double %x
109 ret double %d
110 }
111
112 ; CHECK: x_olt_inverse:
113 ; CHECK-NEXT: pxor %xmm1, %xmm1
114 ; CHECK-NEXT: maxsd %xmm0, %xmm1
115 ; CHECK-NEXT: movapd %xmm1, %xmm0
116 ; CHECK-NEXT: ret
117 define double @x_olt_inverse(double %x) nounwind {
118 %c = fcmp olt double %x, 0.000000e+00
119 %d = select i1 %c, double 0.000000e+00, double %x
120 ret double %d
121 }
122
123 ; CHECK: x_oge:
124 ; CHECK-NEXT: pxor %xmm1, %xmm1
125 ; CHECK-NEXT: maxsd %xmm1, %xmm0
126 ; CHECK-NEXT: ret
127 define double @x_oge(double %x) nounwind {
128 %c = fcmp oge double %x, 0.000000e+00
129 %d = select i1 %c, double %x, double 0.000000e+00
130 ret double %d
131 }
132
133 ; CHECK: x_ole:
134 ; CHECK-NEXT: pxor %xmm1, %xmm1
135 ; CHECK-NEXT: minsd %xmm1, %xmm0
136 ; CHECK-NEXT: ret
137 define double @x_ole(double %x) nounwind {
138 %c = fcmp ole double %x, 0.000000e+00
139 %d = select i1 %c, double %x, double 0.000000e+00
140 ret double %d
141 }
142
143 ; CHECK: x_oge_inverse:
144 ; CHECK-NEXT: pxor %xmm1, %xmm1
145 ; CHECK-NEXT: minsd %xmm0, %xmm1
146 ; CHECK-NEXT: movapd %xmm1, %xmm0
147 ; CHECK-NEXT: ret
148 define double @x_oge_inverse(double %x) nounwind {
149 %c = fcmp oge double %x, 0.000000e+00
150 %d = select i1 %c, double 0.000000e+00, double %x
151 ret double %d
152 }
153
154 ; CHECK: x_ole_inverse:
155 ; CHECK-NEXT: pxor %xmm1, %xmm1
156 ; CHECK-NEXT: maxsd %xmm0, %xmm1
157 ; CHECK-NEXT: movapd %xmm1, %xmm0
158 ; CHECK-NEXT: ret
159 define double @x_ole_inverse(double %x) nounwind {
160 %c = fcmp ole double %x, 0.000000e+00
161 %d = select i1 %c, double 0.000000e+00, double %x
162 ret double %d
163 }
164
165 ; CHECK: ugt:
166 ; CHECK-NEXT: ucomisd %xmm0, %xmm1
167 define double @ugt(double %x, double %y) nounwind {
168 %c = fcmp ugt double %x, %y
169 %d = select i1 %c, double %x, double %y
170 ret double %d
171 }
172
173 ; CHECK: ult:
174 ; CHECK-NEXT: ucomisd %xmm1, %xmm0
175 define double @ult(double %x, double %y) nounwind {
176 %c = fcmp ult double %x, %y
177 %d = select i1 %c, double %x, double %y
178 ret double %d
179 }
180
181 ; CHECK: ugt_inverse:
182 ; CHECK-NEXT: ucomisd %xmm0, %xmm1
183 define double @ugt_inverse(double %x, double %y) nounwind {
184 %c = fcmp ugt double %x, %y
185 %d = select i1 %c, double %y, double %x
186 ret double %d
187 }
188
189 ; CHECK: ult_inverse:
190 ; CHECK-NEXT: ucomisd %xmm1, %xmm0
191 define double @ult_inverse(double %x, double %y) nounwind {
192 %c = fcmp ult double %x, %y
193 %d = select i1 %c, double %y, double %x
194 ret double %d
195 }
196
197 ; CHECK: uge:
198 ; CHECK-NEXT: maxsd %xmm0, %xmm1
199 ; CHECK-NEXT: movapd %xmm1, %xmm0
200 ; CHECK-NEXT: ret
201 define double @uge(double %x, double %y) nounwind {
202 %c = fcmp uge double %x, %y
203 %d = select i1 %c, double %x, double %y
204 ret double %d
205 }
206
207 ; CHECK: ule:
208 ; CHECK-NEXT: minsd %xmm0, %xmm1
209 ; CHECK-NEXT: movapd %xmm1, %xmm0
210 ; CHECK-NEXT: ret
211 define double @ule(double %x, double %y) nounwind {
212 %c = fcmp ule double %x, %y
213 %d = select i1 %c, double %x, double %y
214 ret double %d
215 }
216
217 ; CHECK: uge_inverse:
218 ; CHECK-NEXT: minsd %xmm1, %xmm0
219 ; CHECK-NEXT: ret
220 define double @uge_inverse(double %x, double %y) nounwind {
221 %c = fcmp uge double %x, %y
222 %d = select i1 %c, double %y, double %x
223 ret double %d
224 }
225
226 ; CHECK: ule_inverse:
227 ; CHECK-NEXT: maxsd %xmm1, %xmm0
228 ; CHECK-NEXT: ret
229 define double @ule_inverse(double %x, double %y) nounwind {
230 %c = fcmp ule double %x, %y
231 %d = select i1 %c, double %y, double %x
232 ret double %d
233 }
234
235 ; CHECK: x_ugt:
236 ; CHECK-NEXT: pxor %xmm1, %xmm1
237 ; CHECK-NEXT: maxsd %xmm0, %xmm1
238 ; CHECK-NEXT: movapd %xmm1, %xmm0
239 ; CHECK-NEXT: ret
240 define double @x_ugt(double %x) nounwind {
241 %c = fcmp ugt double %x, 0.000000e+00
242 %d = select i1 %c, double %x, double 0.000000e+00
243 ret double %d
244 }
245
246 ; CHECK: x_ult:
247 ; CHECK-NEXT: pxor %xmm1, %xmm1
248 ; CHECK-NEXT: minsd %xmm0, %xmm1
249 ; CHECK-NEXT: movapd %xmm1, %xmm0
250 ; CHECK-NEXT: ret
251 define double @x_ult(double %x) nounwind {
252 %c = fcmp ult double %x, 0.000000e+00
253 %d = select i1 %c, double %x, double 0.000000e+00
254 ret double %d
255 }
256
257 ; CHECK: x_ugt_inverse:
258 ; CHECK-NEXT: pxor %xmm1, %xmm1
259 ; CHECK-NEXT: minsd %xmm1, %xmm0
260 ; CHECK-NEXT: ret
261 define double @x_ugt_inverse(double %x) nounwind {
262 %c = fcmp ugt double %x, 0.000000e+00
263 %d = select i1 %c, double 0.000000e+00, double %x
264 ret double %d
265 }
266
267 ; CHECK: x_ult_inverse:
268 ; CHECK-NEXT: pxor %xmm1, %xmm1
269 ; CHECK-NEXT: maxsd %xmm1, %xmm0
270 ; CHECK-NEXT: ret
271 define double @x_ult_inverse(double %x) nounwind {
272 %c = fcmp ult double %x, 0.000000e+00
273 %d = select i1 %c, double 0.000000e+00, double %x
274 ret double %d
275 }
276
277 ; CHECK: x_uge:
278 ; CHECK-NEXT: pxor %xmm1, %xmm1
279 ; CHECK-NEXT: maxsd %xmm0, %xmm1
280 ; CHECK-NEXT: movapd %xmm1, %xmm0
281 ; CHECK-NEXT: ret
282 define double @x_uge(double %x) nounwind {
283 %c = fcmp uge double %x, 0.000000e+00
284 %d = select i1 %c, double %x, double 0.000000e+00
285 ret double %d
286 }
287
288 ; CHECK: x_ule:
289 ; CHECK-NEXT: pxor %xmm1, %xmm1
290 ; CHECK-NEXT: minsd %xmm0, %xmm1
291 ; CHECK-NEXT: movapd %xmm1, %xmm0
292 ; CHECK-NEXT: ret
293 define double @x_ule(double %x) nounwind {
294 %c = fcmp ule double %x, 0.000000e+00
295 %d = select i1 %c, double %x, double 0.000000e+00
296 ret double %d
297 }
298
299 ; CHECK: x_uge_inverse:
300 ; CHECK-NEXT: pxor %xmm1, %xmm1
301 ; CHECK-NEXT: minsd %xmm1, %xmm0
302 ; CHECK-NEXT: ret
303 define double @x_uge_inverse(double %x) nounwind {
304 %c = fcmp uge double %x, 0.000000e+00
305 %d = select i1 %c, double 0.000000e+00, double %x
306 ret double %d
307 }
308
309 ; CHECK: x_ule_inverse:
310 ; CHECK-NEXT: pxor %xmm1, %xmm1
311 ; CHECK-NEXT: maxsd %xmm1, %xmm0
312 ; CHECK-NEXT: ret
313 define double @x_ule_inverse(double %x) nounwind {
314 %c = fcmp ule double %x, 0.000000e+00
315 %d = select i1 %c, double 0.000000e+00, double %x
316 ret double %d
317 }
318
319 ; Test a few more misc. cases.
1320
2321 ; CHECK: clampTo3k_a:
3322 ; CHECK: minsd