llvm.org GIT mirror
Recognize SSE min and max opportunities in even more cases. And fix a bug with the behavior of min/max instructions formed from fcmp uge comparisons. Also, use FiniteOnlyFPMath() for this code instead of UnsafeFPMath, as it is more specific. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82466 91177308-0d34-0410-b5e6-96231b3b80d8 Dan Gohman 10 years ago
3 changed file(s) with 447 addition(s) and 46 deletion(s).
 8255 8255 SDValue LHS = N->getOperand(1); 8256 8256 SDValue RHS = N->getOperand(2); 8257 8257 8258 // If we have SSE[12] support, try to form min/max nodes.⏎ 8258 // If we have SSE[12] support, try to form min/max nodes. SSE min/max⏎ 8259 // instructions have the peculiarity that if either operand is a NaN, 8260 // they chose what we call the RHS operand (and as such are not symmetric). 8261 // It happens that this matches the semantics of the common C idiom 8262 // x 8259 8263 if (Subtarget->hasSSE2() && 8260 8264 (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && 8261 8265 Cond.getOpcode() == ISD::SETCC) { 8262 8266 ISD::CondCode CC = cast(Cond.getOperand(2))->get(); 8263 8267 8264 8268 unsigned Opcode = 0; 8269 // Check for x CC y ? x : y. 8265 8270 if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { 8266 8271 switch (CC) { 8267 8272 default: break; 8268 case ISD::SETOLE: // (X <= Y) ? X : Y -> min 8269 case ISD::SETULE: 8270 case ISD::SETLE: 8271 if (!UnsafeFPMath) break; 8272 // FALL THROUGH. 8273 case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min 8274 case ISD::SETLT:⏎ 8273 case ISD::SETULT:⏎ 8274 // This can be a min if we can prove that at least one of the operands 8275 // is not a nan. 8276 if (!FiniteOnlyFPMath()) { 8277 if (DAG.isKnownNeverNaN(RHS)) { 8278 // Put the potential NaN in the RHS so that SSE will preserve it. 8279 std::swap(LHS, RHS); 8280 } else if (!DAG.isKnownNeverNaN(LHS)) 8281 break; 8282 } 8275 8283 Opcode = X86ISD::FMIN; 8276 8284 break; 8277 8278 case ISD::SETOGT: // (X > Y) ? X : Y -> max⏎ 8285 case ISD::SETOLE:⏎ 8286 // This can be a min if we can prove that at least one of the operands 8287 // is not a nan. 8288 if (!FiniteOnlyFPMath()) { 8289 if (DAG.isKnownNeverNaN(LHS)) { 8290 // Put the potential NaN in the RHS so that SSE will preserve it. 8291 std::swap(LHS, RHS); 8292 } else if (!DAG.isKnownNeverNaN(RHS)) 8293 break; 8294 } 8295 Opcode = X86ISD::FMIN; 8296 break; 8297 case ISD::SETULE: 8298 // This can be a min, but if either operand is a NaN we need it to 8299 // preserve the original LHS. 8300 std::swap(LHS, RHS); 8301 case ISD::SETOLT: 8302 case ISD::SETLT: 8303 case ISD::SETLE: 8304 Opcode = X86ISD::FMIN; 8305 break; 8306 8307 case ISD::SETOGE: 8308 // This can be a max if we can prove that at least one of the operands 8309 // is not a nan. 8310 if (!FiniteOnlyFPMath()) { 8311 if (DAG.isKnownNeverNaN(LHS)) { 8312 // Put the potential NaN in the RHS so that SSE will preserve it. 8313 std::swap(LHS, RHS); 8314 } else if (!DAG.isKnownNeverNaN(RHS)) 8315 break; 8316 } 8317 Opcode = X86ISD::FMAX; 8318 break; 8279 8319 case ISD::SETUGT: 8320 // This can be a max if we can prove that at least one of the operands 8321 // is not a nan. 8322 if (!FiniteOnlyFPMath()) { 8323 if (DAG.isKnownNeverNaN(RHS)) { 8324 // Put the potential NaN in the RHS so that SSE will preserve it. 8325 std::swap(LHS, RHS); 8326 } else if (!DAG.isKnownNeverNaN(LHS)) 8327 break; 8328 } 8329 Opcode = X86ISD::FMAX; 8330 break; 8331 case ISD::SETUGE: 8332 // This can be a max, but if either operand is a NaN we need it to 8333 // preserve the original LHS. 8334 std::swap(LHS, RHS); 8335 case ISD::SETOGT: 8280 8336 case ISD::SETGT: 8281 if (!UnsafeFPMath) break; 8282 // FALL THROUGH. 8283 case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max 8284 8337 case ISD::SETGE: 8285 8338 Opcode = X86ISD::FMAX; 8286 8339 break; 8287 8340 } 8341 // Check for x CC y ? y : x -- a min/max with reversed arms. 8288 8342 } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { 8289 8343 switch (CC) { 8290 8344 default: break; 8345 case ISD::SETOGE: 8346 // This can be a min if we can prove that at least one of the operands 8347 // is not a nan. 8348 if (!FiniteOnlyFPMath()) { 8349 if (DAG.isKnownNeverNaN(RHS)) { 8350 // Put the potential NaN in the RHS so that SSE will preserve it. 8351 std::swap(LHS, RHS); 8352 } else if (!DAG.isKnownNeverNaN(LHS)) 8353 break; 8354 } 8355 Opcode = X86ISD::FMIN; 8356 break; 8357 case ISD::SETUGT: 8358 // This can be a min if we can prove that at least one of the operands 8359 // is not a nan. 8360 if (!FiniteOnlyFPMath()) { 8361 if (DAG.isKnownNeverNaN(LHS)) { 8362 // Put the potential NaN in the RHS so that SSE will preserve it. 8363 std::swap(LHS, RHS); 8364 } else if (!DAG.isKnownNeverNaN(RHS)) 8365 break; 8366 } 8367 Opcode = X86ISD::FMIN; 8368 break; 8369 case ISD::SETUGE: 8370 // This can be a min, but if either operand is a NaN we need it to 8371 // preserve the original LHS. 8372 std::swap(LHS, RHS); 8291 8373 case ISD::SETOGT: 8292 // This can use a min only if the LHS isn't NaN. 8293 if (DAG.isKnownNeverNaN(LHS)) 8294 Opcode = X86ISD::FMIN; 8295 else if (DAG.isKnownNeverNaN(RHS)) { 8296 Opcode = X86ISD::FMIN; 8297 // Put the potential NaN in the RHS so that SSE will preserve it. 8298 std::swap(LHS, RHS); 8299 } 8300 break; 8301 8302 case ISD::SETUGT: // (X > Y) ? Y : X -> min 8303 8374 case ISD::SETGT: 8304 if (!UnsafeFPMath) break; 8305 // FALL THROUGH. 8306 case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min 8307 8375 case ISD::SETGE: 8308 8376 Opcode = X86ISD::FMIN; 8309 8377 break; 8310 8378 8379 case ISD::SETULT: 8380 // This can be a max if we can prove that at least one of the operands 8381 // is not a nan. 8382 if (!FiniteOnlyFPMath()) { 8383 if (DAG.isKnownNeverNaN(LHS)) { 8384 // Put the potential NaN in the RHS so that SSE will preserve it. 8385 std::swap(LHS, RHS); 8386 } else if (!DAG.isKnownNeverNaN(RHS)) 8387 break; 8388 } 8389 Opcode = X86ISD::FMAX; 8390 break; 8391 case ISD::SETOLE: 8392 // This can be a max if we can prove that at least one of the operands 8393 // is not a nan. 8394 if (!FiniteOnlyFPMath()) { 8395 if (DAG.isKnownNeverNaN(RHS)) { 8396 // Put the potential NaN in the RHS so that SSE will preserve it. 8397 std::swap(LHS, RHS); 8398 } else if (!DAG.isKnownNeverNaN(LHS)) 8399 break; 8400 } 8401 Opcode = X86ISD::FMAX; 8402 break; 8311 8403 case ISD::SETULE: 8312 // This can use a max only if the LHS isn't NaN. 8313 if (DAG.isKnownNeverNaN(LHS)) 8314 Opcode = X86ISD::FMAX; 8315 else if (DAG.isKnownNeverNaN(RHS)) { 8316 Opcode = X86ISD::FMAX; 8317 // Put the potential NaN in the RHS so that SSE will preserve it. 8318 std::swap(LHS, RHS); 8319 } 8320 break; 8321 8322 case ISD::SETOLE: // (X <= Y) ? Y : X -> max⏎ 8404 // This can be a max, but if either operand is a NaN we need it to⏎ 8405 // preserve the original LHS. 8406 std::swap(LHS, RHS); 8407 case ISD::SETOLT: 8408 case ISD::SETLT: 8323 8409 case ISD::SETLE: 8324 if (!UnsafeFPMath) break; 8325 // FALL THROUGH. 8326 case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max 8327 case ISD::SETLT: 8328 8410 Opcode = X86ISD::FMAX; 8329 8411 break; 8330 8412 }
 3 3 4 4 declare float @bar() 5 5 6 define float @foo(float %a)⏎ 6 define float @foo(float %a) nounwind⏎ 7 7 { 8 8 %s = call float @bar() 9 9 %t = fcmp olt float %s, %a 10 10 %u = select i1 %t, float %s, float %a 11 11 ret float %u 12 12 } 13 define float @hem(float %a)⏎ 13 define float @hem(float %a) nounwind⏎ 14 14 { 15 15 %s = call float @bar() 16 %t = fcmp uge float %s, %a⏎ 16 %t = fcmp ogt float %s, %a⏎ 17 17 %u = select i1 %t, float %s, float %a 18 18 ret float %u 19 19 }
 None ; RUN: llc < %s -march=x86-64 | FileCheck %s⏎ 0 ; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s⏎ 1 2 ; Some of these patterns can be matched as SSE min or max. Some of 3 ; then can be matched provided that the operands are swapped. 4 ; Some of them can't be matched at all and require a comparison 5 ; and a conditional branch. 6 7 ; The naming convention is {,x_}{o,u}{gt,lt,ge,le}{,_inverse} 8 ; x_ : use 0.0 instead of %y 9 ; _inverse : swap the arms of the select. 10 11 ; CHECK: ogt: 12 ; CHECK-NEXT: maxsd %xmm1, %xmm0 13 ; CHECK-NEXT: ret 14 define double @ogt(double %x, double %y) nounwind { 15 %c = fcmp ogt double %x, %y 16 %d = select i1 %c, double %x, double %y 17 ret double %d 18 } 19 20 ; CHECK: olt: 21 ; CHECK-NEXT: minsd %xmm1, %xmm0 22 ; CHECK-NEXT: ret 23 define double @olt(double %x, double %y) nounwind { 24 %c = fcmp olt double %x, %y 25 %d = select i1 %c, double %x, double %y 26 ret double %d 27 } 28 29 ; CHECK: ogt_inverse: 30 ; CHECK-NEXT: minsd %xmm0, %xmm1 31 ; CHECK-NEXT: movapd %xmm1, %xmm0 32 ; CHECK-NEXT: ret 33 define double @ogt_inverse(double %x, double %y) nounwind { 34 %c = fcmp ogt double %x, %y 35 %d = select i1 %c, double %y, double %x 36 ret double %d 37 } 38 39 ; CHECK: olt_inverse: 40 ; CHECK-NEXT: maxsd %xmm0, %xmm1 41 ; CHECK-NEXT: movapd %xmm1, %xmm0 42 ; CHECK-NEXT: ret 43 define double @olt_inverse(double %x, double %y) nounwind { 44 %c = fcmp olt double %x, %y 45 %d = select i1 %c, double %y, double %x 46 ret double %d 47 } 48 49 ; CHECK: oge: 50 ; CHECK-NEXT: ucomisd %xmm1, %xmm0 51 define double @oge(double %x, double %y) nounwind { 52 %c = fcmp oge double %x, %y 53 %d = select i1 %c, double %x, double %y 54 ret double %d 55 } 56 57 ; CHECK: ole: 58 ; CHECK-NEXT: ucomisd %xmm0, %xmm1 59 define double @ole(double %x, double %y) nounwind { 60 %c = fcmp ole double %x, %y 61 %d = select i1 %c, double %x, double %y 62 ret double %d 63 } 64 65 ; CHECK: oge_inverse: 66 ; CHECK-NEXT: ucomisd %xmm1, %xmm0 67 define double @oge_inverse(double %x, double %y) nounwind { 68 %c = fcmp oge double %x, %y 69 %d = select i1 %c, double %y, double %x 70 ret double %d 71 } 72 73 ; CHECK: ole_inverse: 74 ; CHECK-NEXT: ucomisd %xmm0, %xmm1 75 define double @ole_inverse(double %x, double %y) nounwind { 76 %c = fcmp ole double %x, %y 77 %d = select i1 %c, double %y, double %x 78 ret double %d 79 } 80 81 ; CHECK: x_ogt: 82 ; CHECK-NEXT: pxor %xmm1, %xmm1 83 ; CHECK-NEXT: maxsd %xmm1, %xmm0 84 ; CHECK-NEXT: ret 85 define double @x_ogt(double %x) nounwind { 86 %c = fcmp ogt double %x, 0.000000e+00 87 %d = select i1 %c, double %x, double 0.000000e+00 88 ret double %d 89 } 90 91 ; CHECK: x_olt: 92 ; CHECK-NEXT: pxor %xmm1, %xmm1 93 ; CHECK-NEXT: minsd %xmm1, %xmm0 94 ; CHECK-NEXT: ret 95 define double @x_olt(double %x) nounwind { 96 %c = fcmp olt double %x, 0.000000e+00 97 %d = select i1 %c, double %x, double 0.000000e+00 98 ret double %d 99 } 100 101 ; CHECK: x_ogt_inverse: 102 ; CHECK-NEXT: pxor %xmm1, %xmm1 103 ; CHECK-NEXT: minsd %xmm0, %xmm1 104 ; CHECK-NEXT: movapd %xmm1, %xmm0 105 ; CHECK-NEXT: ret 106 define double @x_ogt_inverse(double %x) nounwind { 107 %c = fcmp ogt double %x, 0.000000e+00 108 %d = select i1 %c, double 0.000000e+00, double %x 109 ret double %d 110 } 111 112 ; CHECK: x_olt_inverse: 113 ; CHECK-NEXT: pxor %xmm1, %xmm1 114 ; CHECK-NEXT: maxsd %xmm0, %xmm1 115 ; CHECK-NEXT: movapd %xmm1, %xmm0 116 ; CHECK-NEXT: ret 117 define double @x_olt_inverse(double %x) nounwind { 118 %c = fcmp olt double %x, 0.000000e+00 119 %d = select i1 %c, double 0.000000e+00, double %x 120 ret double %d 121 } 122 123 ; CHECK: x_oge: 124 ; CHECK-NEXT: pxor %xmm1, %xmm1 125 ; CHECK-NEXT: maxsd %xmm1, %xmm0 126 ; CHECK-NEXT: ret 127 define double @x_oge(double %x) nounwind { 128 %c = fcmp oge double %x, 0.000000e+00 129 %d = select i1 %c, double %x, double 0.000000e+00 130 ret double %d 131 } 132 133 ; CHECK: x_ole: 134 ; CHECK-NEXT: pxor %xmm1, %xmm1 135 ; CHECK-NEXT: minsd %xmm1, %xmm0 136 ; CHECK-NEXT: ret 137 define double @x_ole(double %x) nounwind { 138 %c = fcmp ole double %x, 0.000000e+00 139 %d = select i1 %c, double %x, double 0.000000e+00 140 ret double %d 141 } 142 143 ; CHECK: x_oge_inverse: 144 ; CHECK-NEXT: pxor %xmm1, %xmm1 145 ; CHECK-NEXT: minsd %xmm0, %xmm1 146 ; CHECK-NEXT: movapd %xmm1, %xmm0 147 ; CHECK-NEXT: ret 148 define double @x_oge_inverse(double %x) nounwind { 149 %c = fcmp oge double %x, 0.000000e+00 150 %d = select i1 %c, double 0.000000e+00, double %x 151 ret double %d 152 } 153 154 ; CHECK: x_ole_inverse: 155 ; CHECK-NEXT: pxor %xmm1, %xmm1 156 ; CHECK-NEXT: maxsd %xmm0, %xmm1 157 ; CHECK-NEXT: movapd %xmm1, %xmm0 158 ; CHECK-NEXT: ret 159 define double @x_ole_inverse(double %x) nounwind { 160 %c = fcmp ole double %x, 0.000000e+00 161 %d = select i1 %c, double 0.000000e+00, double %x 162 ret double %d 163 } 164 165 ; CHECK: ugt: 166 ; CHECK-NEXT: ucomisd %xmm0, %xmm1 167 define double @ugt(double %x, double %y) nounwind { 168 %c = fcmp ugt double %x, %y 169 %d = select i1 %c, double %x, double %y 170 ret double %d 171 } 172 173 ; CHECK: ult: 174 ; CHECK-NEXT: ucomisd %xmm1, %xmm0 175 define double @ult(double %x, double %y) nounwind { 176 %c = fcmp ult double %x, %y 177 %d = select i1 %c, double %x, double %y 178 ret double %d 179 } 180 181 ; CHECK: ugt_inverse: 182 ; CHECK-NEXT: ucomisd %xmm0, %xmm1 183 define double @ugt_inverse(double %x, double %y) nounwind { 184 %c = fcmp ugt double %x, %y 185 %d = select i1 %c, double %y, double %x 186 ret double %d 187 } 188 189 ; CHECK: ult_inverse: 190 ; CHECK-NEXT: ucomisd %xmm1, %xmm0 191 define double @ult_inverse(double %x, double %y) nounwind { 192 %c = fcmp ult double %x, %y 193 %d = select i1 %c, double %y, double %x 194 ret double %d 195 } 196 197 ; CHECK: uge: 198 ; CHECK-NEXT: maxsd %xmm0, %xmm1 199 ; CHECK-NEXT: movapd %xmm1, %xmm0 200 ; CHECK-NEXT: ret 201 define double @uge(double %x, double %y) nounwind { 202 %c = fcmp uge double %x, %y 203 %d = select i1 %c, double %x, double %y 204 ret double %d 205 } 206 207 ; CHECK: ule: 208 ; CHECK-NEXT: minsd %xmm0, %xmm1 209 ; CHECK-NEXT: movapd %xmm1, %xmm0 210 ; CHECK-NEXT: ret 211 define double @ule(double %x, double %y) nounwind { 212 %c = fcmp ule double %x, %y 213 %d = select i1 %c, double %x, double %y 214 ret double %d 215 } 216 217 ; CHECK: uge_inverse: 218 ; CHECK-NEXT: minsd %xmm1, %xmm0 219 ; CHECK-NEXT: ret 220 define double @uge_inverse(double %x, double %y) nounwind { 221 %c = fcmp uge double %x, %y 222 %d = select i1 %c, double %y, double %x 223 ret double %d 224 } 225 226 ; CHECK: ule_inverse: 227 ; CHECK-NEXT: maxsd %xmm1, %xmm0 228 ; CHECK-NEXT: ret 229 define double @ule_inverse(double %x, double %y) nounwind { 230 %c = fcmp ule double %x, %y 231 %d = select i1 %c, double %y, double %x 232 ret double %d 233 } 234 235 ; CHECK: x_ugt: 236 ; CHECK-NEXT: pxor %xmm1, %xmm1 237 ; CHECK-NEXT: maxsd %xmm0, %xmm1 238 ; CHECK-NEXT: movapd %xmm1, %xmm0 239 ; CHECK-NEXT: ret 240 define double @x_ugt(double %x) nounwind { 241 %c = fcmp ugt double %x, 0.000000e+00 242 %d = select i1 %c, double %x, double 0.000000e+00 243 ret double %d 244 } 245 246 ; CHECK: x_ult: 247 ; CHECK-NEXT: pxor %xmm1, %xmm1 248 ; CHECK-NEXT: minsd %xmm0, %xmm1 249 ; CHECK-NEXT: movapd %xmm1, %xmm0 250 ; CHECK-NEXT: ret 251 define double @x_ult(double %x) nounwind { 252 %c = fcmp ult double %x, 0.000000e+00 253 %d = select i1 %c, double %x, double 0.000000e+00 254 ret double %d 255 } 256 257 ; CHECK: x_ugt_inverse: 258 ; CHECK-NEXT: pxor %xmm1, %xmm1 259 ; CHECK-NEXT: minsd %xmm1, %xmm0 260 ; CHECK-NEXT: ret 261 define double @x_ugt_inverse(double %x) nounwind { 262 %c = fcmp ugt double %x, 0.000000e+00 263 %d = select i1 %c, double 0.000000e+00, double %x 264 ret double %d 265 } 266 267 ; CHECK: x_ult_inverse: 268 ; CHECK-NEXT: pxor %xmm1, %xmm1 269 ; CHECK-NEXT: maxsd %xmm1, %xmm0 270 ; CHECK-NEXT: ret 271 define double @x_ult_inverse(double %x) nounwind { 272 %c = fcmp ult double %x, 0.000000e+00 273 %d = select i1 %c, double 0.000000e+00, double %x 274 ret double %d 275 } 276 277 ; CHECK: x_uge: 278 ; CHECK-NEXT: pxor %xmm1, %xmm1 279 ; CHECK-NEXT: maxsd %xmm0, %xmm1 280 ; CHECK-NEXT: movapd %xmm1, %xmm0 281 ; CHECK-NEXT: ret 282 define double @x_uge(double %x) nounwind { 283 %c = fcmp uge double %x, 0.000000e+00 284 %d = select i1 %c, double %x, double 0.000000e+00 285 ret double %d 286 } 287 288 ; CHECK: x_ule: 289 ; CHECK-NEXT: pxor %xmm1, %xmm1 290 ; CHECK-NEXT: minsd %xmm0, %xmm1 291 ; CHECK-NEXT: movapd %xmm1, %xmm0 292 ; CHECK-NEXT: ret 293 define double @x_ule(double %x) nounwind { 294 %c = fcmp ule double %x, 0.000000e+00 295 %d = select i1 %c, double %x, double 0.000000e+00 296 ret double %d 297 } 298 299 ; CHECK: x_uge_inverse: 300 ; CHECK-NEXT: pxor %xmm1, %xmm1 301 ; CHECK-NEXT: minsd %xmm1, %xmm0 302 ; CHECK-NEXT: ret 303 define double @x_uge_inverse(double %x) nounwind { 304 %c = fcmp uge double %x, 0.000000e+00 305 %d = select i1 %c, double 0.000000e+00, double %x 306 ret double %d 307 } 308 309 ; CHECK: x_ule_inverse: 310 ; CHECK-NEXT: pxor %xmm1, %xmm1 311 ; CHECK-NEXT: maxsd %xmm1, %xmm0 312 ; CHECK-NEXT: ret 313 define double @x_ule_inverse(double %x) nounwind { 314 %c = fcmp ule double %x, 0.000000e+00 315 %d = select i1 %c, double 0.000000e+00, double %x 316 ret double %d 317 } 318 319 ; Test a few more misc. cases. 1 320 2 321 ; CHECK: clampTo3k_a: 3 322 ; CHECK: minsd