llvm.org GIT mirror llvm / f1fc3a8
Fix PR 1681. When X86 target uses +sse -sse2, keep f32 in SSE registers and f64 in x87. This is effectively a new codegen mode. Change addLegalFPImmediate to permit float and double variants to do different things. Adjust callers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42246 91177308-0d34-0410-b5e6-96231b3b80d8 Dale Johannesen 13 years ago
8 changed file(s) with 164 addition(s) and 97 deletion(s). Raw diff Collapse all Expand all
782782 /// addLegalFPImmediate - Indicate that this target can instruction select
783783 /// the specified FP immediate natively.
784784 void addLegalFPImmediate(const APFloat& Imm) {
785 // Incoming constants are expected to be double. We also add
786 // the float version. It is expected that all constants are exactly
787 // representable as floats.
788 assert(&Imm.getSemantics() == &APFloat::IEEEdouble);
789 APFloat Immf = APFloat(Imm);
790 // Rounding mode is not supposed to matter here...
791 if (Immf.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven) !=
792 APFloat::opOK)
793 assert(0);
794785 LegalFPImmediates.push_back(Imm);
795 LegalFPImmediates.push_back(Immf);
796786 }
797787
798788 /// setTargetDAGCombine - Targets should invoke this method for each target
139139 setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
140140 setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
141141 addLegalFPImmediate(APFloat(+0.0)); //F31
142 addLegalFPImmediate(APFloat(+0.0f)); //F31
142143 addLegalFPImmediate(APFloat(-0.0)); //-F31
144 addLegalFPImmediate(APFloat(-0.0f)); //-F31
143145
144146 setJumpBufSize(272);
145147 setJumpBufAlignment(16);
119119
120120 setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
121121 addLegalFPImmediate(APFloat(+0.0));
122 addLegalFPImmediate(APFloat(+0.0f));
122123 addLegalFPImmediate(APFloat(+1.0));
124 addLegalFPImmediate(APFloat(+1.0f));
123125 }
124126
125127 const char *IA64TargetLowering::getTargetNodeName(unsigned Opcode) const {
3939 X86TargetLowering::X86TargetLowering(TargetMachine &TM)
4040 : TargetLowering(TM) {
4141 Subtarget = &TM.getSubtarget();
42 X86ScalarSSE = Subtarget->hasSSE2();
42 X86ScalarSSEf64 = Subtarget->hasSSE2();
43 X86ScalarSSEf32 = Subtarget->hasSSE1();
4344 X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
4445
4546 RegInfo = TM.getRegisterInfo();
8687 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
8788 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
8889 } else {
89 if (X86ScalarSSE)
90 if (X86ScalarSSEf64)
9091 // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
9192 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
9293 else
9899 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
99100 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
100101 // SSE has no i16 to fp conversion, only i32
101 if (X86ScalarSSE) {
102 if (X86ScalarSSEf32) {
102103 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
103104 // f32 and f64 cases are Legal, f80 case is not
104105 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
117118 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
118119 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
119120
120 if (X86ScalarSSE) {
121 if (X86ScalarSSEf32) {
121122 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
122123 // f32 and f64 cases are Legal, f80 case is not
123124 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
136137 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
137138 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
138139 } else {
139 if (X86ScalarSSE && !Subtarget->hasSSE3())
140 if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
140141 // Expand FP_TO_UINT into a select.
141142 // FIXME: We would like to use a Custom expander here eventually to do
142143 // the optimal thing for SSE vs. the default expansion in the legalizer.
147148 }
148149
149150 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
150 if (!X86ScalarSSE) {
151 if (!X86ScalarSSEf64) {
151152 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
152153 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
153154 }
270271 else
271272 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
272273
273 if (X86ScalarSSE) {
274 if (X86ScalarSSEf64) {
275 // f32 and f64 use SSE.
274276 // Set up the FP register classes.
275277 addRegisterClass(MVT::f32, X86::FR32RegisterClass);
276278 addRegisterClass(MVT::f64, X86::FR64RegisterClass);
299301 // cases we handle.
300302 setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
301303 setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
302 addLegalFPImmediate(APFloat(+0.0)); // xorps / xorpd
304 addLegalFPImmediate(APFloat(+0.0)); // xorpd
305 addLegalFPImmediate(APFloat(+0.0f)); // xorps
303306
304307 // Conversions to long double (in X87) go through memory.
305308 setConvertAction(MVT::f32, MVT::f80, Expand);
308311 // Conversions from long double (in X87) go through memory.
309312 setConvertAction(MVT::f80, MVT::f32, Expand);
310313 setConvertAction(MVT::f80, MVT::f64, Expand);
314 } else if (X86ScalarSSEf32) {
315 // Use SSE for f32, x87 for f64.
316 // Set up the FP register classes.
317 addRegisterClass(MVT::f32, X86::FR32RegisterClass);
318 addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
319
320 // Use ANDPS to simulate FABS.
321 setOperationAction(ISD::FABS , MVT::f32, Custom);
322
323 // Use XORP to simulate FNEG.
324 setOperationAction(ISD::FNEG , MVT::f32, Custom);
325
326 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
327
328 // Use ANDPS and ORPS to simulate FCOPYSIGN.
329 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
330 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
331
332 // We don't support sin/cos/fmod
333 setOperationAction(ISD::FSIN , MVT::f32, Expand);
334 setOperationAction(ISD::FCOS , MVT::f32, Expand);
335 setOperationAction(ISD::FREM , MVT::f32, Expand);
336
337 // Expand FP immediates into loads from the stack, except for the special
338 // cases we handle.
339 setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
340 setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
341 addLegalFPImmediate(APFloat(+0.0f)); // xorps
342 addLegalFPImmediate(APFloat(+0.0)); // FLD0
343 addLegalFPImmediate(APFloat(+1.0)); // FLD1
344 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
345 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
346
347 // SSE->x87 conversions go through memory.
348 setConvertAction(MVT::f32, MVT::f64, Expand);
349 setConvertAction(MVT::f32, MVT::f80, Expand);
350
351 // x87->SSE truncations need to go through memory.
352 setConvertAction(MVT::f80, MVT::f32, Expand);
353 setConvertAction(MVT::f64, MVT::f32, Expand);
354 // And x87->x87 truncations also.
355 setConvertAction(MVT::f80, MVT::f64, Expand);
356
357 if (!UnsafeFPMath) {
358 setOperationAction(ISD::FSIN , MVT::f64 , Expand);
359 setOperationAction(ISD::FCOS , MVT::f64 , Expand);
360 }
311361 } else {
362 // f32 and f64 in x87.
312363 // Set up the FP register classes.
313364 addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
314365 addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
334385 addLegalFPImmediate(APFloat(+1.0)); // FLD1
335386 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
336387 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
388 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
389 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
390 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
391 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
337392 }
338393
339394 // Long double always uses X87.
582637
583638 // If this is an FP return with ScalarSSE, we need to move the value from
584639 // an XMM register onto the fp-stack.
585 if (X86ScalarSSE) {
640 if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) ||
641 (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) {
586642 SDOperand MemLoc;
587643
588644 // If this is a load into a scalarsse value, don't store the loaded value
658714
659715 // If we are using ScalarSSE, store ST(0) to the stack and reload it into
660716 // an XMM register.
661 if (X86ScalarSSE) {
717 if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) ||
718 (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) {
662719 // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
663720 // shouldn't be necessary except that RFP cannot be live across
664721 // multiple blocks. When stackifier is fixed, they can be uncoupled.
33333390 StackSlot, NULL, 0);
33343391
33353392 // These are really Legal; caller falls through into that case.
3336 if (SrcVT==MVT::i32 && Op.getValueType() != MVT::f80 && X86ScalarSSE)
3393 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32)
3394 return Result;
3395 if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64)
33373396 return Result;
33383397 if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 &&
33393398 Subtarget->is64Bit())
33413400
33423401 // Build the FILD
33433402 SDVTList Tys;
3344 bool useSSE = X86ScalarSSE && Op.getValueType() != MVT::f80;
3403 bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) ||
3404 (X86ScalarSSEf64 && Op.getValueType() == MVT::f64);
33453405 if (useSSE)
33463406 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
33473407 else
33893449 SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
33903450
33913451 // These are really Legal.
3392 if (Op.getValueType() == MVT::i32 && X86ScalarSSE &&
3393 Op.getOperand(0).getValueType() != MVT::f80)
3452 if (Op.getValueType() == MVT::i32 &&
3453 X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32)
3454 return Result;
3455 if (Op.getValueType() == MVT::i32 &&
3456 X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)
33943457 return Result;
33953458 if (Subtarget->is64Bit() &&
33963459 Op.getValueType() == MVT::i64 &&
34073470
34083471 SDOperand Chain = DAG.getEntryNode();
34093472 SDOperand Value = Op.getOperand(0);
3410 if (X86ScalarSSE && Op.getOperand(0).getValueType() != MVT::f80) {
3473 if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) ||
3474 (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) {
34113475 assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
34123476 Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
34133477 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
36193683 // pressure reason)?
36203684 SDOperand Cmp = Cond.getOperand(1);
36213685 unsigned Opc = Cmp.getOpcode();
3622 bool IllegalFPCMov = !X86ScalarSSE &&
3623 MVT::isFloatingPoint(Op.getValueType()) &&
3686 bool IllegalFPCMov =
3687 ! ((X86ScalarSSEf32 && Op.getValueType()==MVT::f32) ||
3688 (X86ScalarSSEf64 && Op.getValueType()==MVT::f64)) &&
36243689 !hasFPCMov(cast(CC)->getSignExtended());
36253690 if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) &&
36263691 !IllegalFPCMov) {
372372 /// X86StackPtr - X86 physical register used as stack ptr.
373373 unsigned X86StackPtr;
374374
375 /// X86ScalarSSE - Select between SSE2 or x87 floating point ops.
376 bool X86ScalarSSE;
375 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
376 /// floating point ops.
377 /// When SSE is available, use it for f32 operations.
378 /// When SSE2 is available, use it for f64 operations.
379 bool X86ScalarSSEf32;
380 bool X86ScalarSSEf64;
377381
378382 SDNode *LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode*TheCall,
379383 unsigned CallingConv, SelectionDAG &DAG);
151151 [(X86fpset RFP80:$src)]>;// ST(0) = FPR
152152 }
153153
154 // FpI - Floating Point Psuedo Instruction template. Predicated on FPStack.
155 // Note that f80-only instructions are used even in SSE mode and use FpI_
156 // not this predicate.
157 class FpI pattern> :
158 FpI_, Requires<[FPStack]>;
154 // FpIf32, FpIf64 - Floating Point Psuedo Instruction template.
155 // f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
156 // f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
157 // f80 instructions cannot use SSE and use neither of these.
158 class FpIf32 pattern> :
159 FpI_, Requires<[FPStackf32]>;
160 class FpIf64 pattern> :
161 FpI_, Requires<[FPStackf64]>;
159162
160163 // Register copies. Just copies, the shortening ones do not truncate.
161 def MOV_Fp3232 : FpI<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>;
162 def MOV_Fp3264 : FpI<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>;
163 def MOV_Fp6432 : FpI<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>;
164 def MOV_Fp6464 : FpI<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>;
165 def MOV_Fp8032 : FpI<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>;
166 def MOV_Fp3280 : FpI<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>;
167 def MOV_Fp8064 : FpI<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>;
168 def MOV_Fp6480 : FpI<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>;
164 def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>;
165 def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>;
166 def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>;
167 def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>;
168 def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>;
169 def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>;
170 def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>;
171 def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>;
169172 def MOV_Fp8080 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>;
170173
171174 // Factoring for arithmetic.
172175 multiclass FPBinary_rr {
173176 // Register op register -> register
174177 // These are separated out because they have no reversed form.
175 def _Fp32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
178 def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
176179 [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
177 def _Fp64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
180 def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
178181 [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
179182 def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
180183 [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
184187 // These instructions cannot address 80-bit memory.
185188 multiclass FPBinary {
186189 // ST(0) = ST(0) + [mem]
187 def _Fp32m : FpI<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
190 def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
188191 [(set RFP32:$dst,
189192 (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
190 def _Fp64m : FpI<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
193 def _Fp64m : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
191194 [(set RFP64:$dst,
192195 (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
193 def _Fp64m32: FpI<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
196 def _Fp64m32: FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
194197 [(set RFP64:$dst,
195198 (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
196199 def _Fp80m32: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
204207 def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
205208 !strconcat("f", !strconcat(asmstring, "{l}\t$src"))>;
206209 // ST(0) = ST(0) + [memint]
207 def _FpI16m32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW,
210 def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW,
208211 [(set RFP32:$dst, (OpNode RFP32:$src1,
209212 (X86fild addr:$src2, i16)))]>;
210 def _FpI32m32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW,
213 def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW,
211214 [(set RFP32:$dst, (OpNode RFP32:$src1,
212215 (X86fild addr:$src2, i32)))]>;
213 def _FpI16m64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW,
216 def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW,
214217 [(set RFP64:$dst, (OpNode RFP64:$src1,
215218 (X86fild addr:$src2, i16)))]>;
216 def _FpI32m64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW,
219 def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW,
217220 [(set RFP64:$dst, (OpNode RFP64:$src1,
218221 (X86fild addr:$src2, i32)))]>;
219222 def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), OneArgFPRW,
270273
271274 // Unary operations.
272275 multiclass FPUnary opcode, string asmstring> {
273 def _Fp32 : FpI<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
276 def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
274277 [(set RFP32:$dst, (OpNode RFP32:$src))]>;
275 def _Fp64 : FpI<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
278 def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
276279 [(set RFP64:$dst, (OpNode RFP64:$src))]>;
277280 def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
278281 [(set RFP80:$dst, (OpNode RFP80:$src))]>;
285288 defm SIN : FPUnary;
286289 defm COS : FPUnary;
287290
288 def TST_Fp32 : FpI<(outs), (ins RFP32:$src), OneArgFP,
291 def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP,
289292 []>;
290 def TST_Fp64 : FpI<(outs), (ins RFP64:$src), OneArgFP,
293 def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP,
291294 []>;
292295 def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP,
293296 []>;
295298
296299 // Floating point cmovs.
297300 multiclass FPCMov {
298 def _Fp32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), CondMovFP,
301 def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), CondMovFP,
299302 [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
300303 cc))]>;
301 def _Fp64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), CondMovFP,
304 def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), CondMovFP,
302305 [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
303306 cc))]>;
304307 def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), CondMovFP,
336339
337340 // Floating point loads & stores.
338341 let isLoad = 1 in {
339 def LD_Fp32m : FpI<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
342 def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
340343 [(set RFP32:$dst, (loadf32 addr:$src))]>;
341 def LD_Fp64m : FpI<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
344 def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
342345 [(set RFP64:$dst, (loadf64 addr:$src))]>;
343346 def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
344347 [(set RFP80:$dst, (loadf80 addr:$src))]>;
345348 }
346 def LD_Fp32m64 : FpI<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
349 def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
347350 [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
348351 def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
349352 [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
350353 def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
351354 [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
352 def ILD_Fp16m32: FpI<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
355 def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
353356 [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
354 def ILD_Fp32m32: FpI<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
357 def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
355358 [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
356 def ILD_Fp64m32: FpI<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
359 def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
357360 [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
358 def ILD_Fp16m64: FpI<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
361 def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
359362 [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
360 def ILD_Fp32m64: FpI<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
363 def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
361364 [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
362 def ILD_Fp64m64: FpI<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
365 def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
363366 [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
364367 def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
365368 [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
368371 def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
369372 [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
370373
371 def ST_Fp32m : FpI<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
374 def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
372375 [(store RFP32:$src, addr:$op)]>;
373 def ST_Fp64m32 : FpI<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
376 def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
374377 [(truncstoref32 RFP64:$src, addr:$op)]>;
375 def ST_Fp64m : FpI<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
378 def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
376379 [(store RFP64:$src, addr:$op)]>;
377380 def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
378381 [(truncstoref32 RFP80:$src, addr:$op)]>;
380383 [(truncstoref64 RFP80:$src, addr:$op)]>;
381384 // FST does not support 80-bit memory target; FSTP must be used.
382385
383 def ST_FpP32m : FpI<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
384 def ST_FpP64m32 : FpI<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
385 def ST_FpP64m : FpI<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
386 def ST_FpP80m32 : FpI<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
387 def ST_FpP80m64 : FpI<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
386 def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
387 def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
388 def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
389 def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
390 def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
388391 def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
389392 [(store RFP80:$src, addr:$op)]>;
390 def IST_Fp16m32 : FpI<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
391 def IST_Fp32m32 : FpI<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
392 def IST_Fp64m32 : FpI<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
393 def IST_Fp16m64 : FpI<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
394 def IST_Fp32m64 : FpI<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
395 def IST_Fp64m64 : FpI<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
393 def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
394 def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
395 def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
396 def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
397 def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
398 def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
396399 def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
397400 def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
398401 def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
455458
456459 // Floating point constant loads.
457460 let isReMaterializable = 1 in {
458 def LD_Fp032 : FpI<(outs RFP32:$dst), (ins), ZeroArgFP,
461 def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
459462 [(set RFP32:$dst, fpimm0)]>;
460 def LD_Fp132 : FpI<(outs RFP32:$dst), (ins), ZeroArgFP,
463 def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
461464 [(set RFP32:$dst, fpimm1)]>;
462 def LD_Fp064 : FpI<(outs RFP64:$dst), (ins), ZeroArgFP,
465 def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
463466 [(set RFP64:$dst, fpimm0)]>;
464 def LD_Fp164 : FpI<(outs RFP64:$dst), (ins), ZeroArgFP,
467 def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
465468 [(set RFP64:$dst, fpimm1)]>;
466469 def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
467470 [(set RFP80:$dst, fpimm0)]>;
474477
475478
476479 // Floating point compares.
477 def UCOM_Fpr32 : FpI<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
480 def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
478481 []>; // FPSW = cmp ST(0) with ST(i)
479 def UCOM_FpIr32: FpI<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
482 def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
480483 [(X86cmp RFP32:$lhs, RFP32:$rhs)]>; // CC = ST(0) cmp ST(i)
481 def UCOM_Fpr64 : FpI<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
484 def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
482485 []>; // FPSW = cmp ST(0) with ST(i)
483 def UCOM_FpIr64: FpI<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
486 def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
484487 [(X86cmp RFP64:$lhs, RFP64:$rhs)]>; // CC = ST(0) cmp ST(i)
485488 def UCOM_Fpr80 : FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
486489 []>; // FPSW = cmp ST(0) with ST(i)
534537 def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op, RFP80:$src)>;
535538
536539 // Floating point constant -0.0 and -1.0
537 def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStack]>;
538 def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStack]>;
539 def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStack]>;
540 def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStack]>;
540 def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
541 def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
542 def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
543 def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
541544 def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
542545 def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
543546
544547 // Used to conv. i64 to f64 since there isn't a SSE version.
545548 def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
546549
547 def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, Requires<[FPStack]>;
548 def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, Requires<[FPStack]>;
549 def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, Requires<[FPStack]>;
550 def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, Requires<[FPStackf32]>;
551 def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, Requires<[FPStackf32]>;
552 def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, Requires<[FPStackf64]>;
159159 def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
160160 def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
161161 def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
162 def FPStack : Predicate<"!Subtarget->hasSSE2()">;
162 def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
163 def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
163164 def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
164165 def In64BitMode : Predicate<"Subtarget->is64Bit()">;
165166 def HasLow4G : Predicate<"Subtarget->hasLow4GUserSpaceAddress()">;
4949 Requires<[HasSSE1]>;
5050 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (outs FR32:$dst), (ins),
5151 "#IMPLICIT_DEF $dst",
52 [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
52 [(set FR32:$dst, (undef))]>, Requires<[HasSSE1]>;
5353 def IMPLICIT_DEF_FR64 : I<0, Pseudo, (outs FR64:$dst), (ins),
5454 "#IMPLICIT_DEF $dst",
5555 [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;