llvm.org GIT mirror llvm / ef54162
Allow target to handle STRICT floating-point nodes The ISD::STRICT_ nodes used to implement the constrained floating-point intrinsics are currently never passed to the target back-end, which makes it impossible to handle them correctly (e.g. mark instructions are depending on a floating-point status and control register, or mark instructions as possibly trapping). This patch allows the target to use setOperationAction to switch the action on ISD::STRICT_ nodes to Legal. If this is done, the SelectionDAG common code will stop converting the STRICT nodes to regular floating-point nodes, but instead pass the STRICT nodes to the target using normal SelectionDAG matching rules. To avoid having the back-end duplicate all the floating-point instruction patterns to handle both strict and non-strict variants, we make the MI codegen explicitly aware of the floating-point exceptions by introducing two new concepts: - A new MCID flag "mayRaiseFPException" that the target should set on any instruction that possibly can raise FP exception according to the architecture definition. - A new MI flag FPExcept that CodeGen/SelectionDAG will set on any MI instruction resulting from expansion of any constrained FP intrinsic. Any MI instruction that is *both* marked as mayRaiseFPException *and* FPExcept then needs to be considered as raising exceptions by MI-level codegen (e.g. scheduling). Setting those two new flags is straightforward. The mayRaiseFPException flag is simply set via TableGen by marking all relevant instruction patterns in the .td files. The FPExcept flag is set in SDNodeFlags when creating the STRICT_ nodes in the SelectionDAG, and gets inherited in the MachineSDNode nodes created from it during instruction selection. The flag is then transfered to an MIFlag when creating the MI from the MachineSDNode. This is handled just like fast-math flags like no-nans are handled today. This patch includes both common code changes required to implement the new features, and the SystemZ implementation. Reviewed By: andrew.w.kaylor Differential Revision: https://reviews.llvm.org/D55506 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362663 91177308-0d34-0410-b5e6-96231b3b80d8 Ulrich Weigand 2 months ago
82 changed file(s) with 5799 addition(s) and 383 deletion(s). Raw diff Collapse all Expand all
101101 // no unsigned wrap.
102102 NoSWrap = 1 << 12, // Instruction supports binary operator
103103 // no signed wrap.
104 IsExact = 1 << 13 // Instruction supports division is
104 IsExact = 1 << 13, // Instruction supports division is
105105 // known to be exact.
106 FPExcept = 1 << 14, // Instruction may raise floating-point
107 // exceptions.
106108 };
107109
108110 private:
829831 return mayLoad(Type) || mayStore(Type);
830832 }
831833
834 /// Return true if this instruction could possibly raise a floating-point
835 /// exception. This is the case if the instruction is a floating-point
836 /// instruction that can in principle raise an exception, as indicated
837 /// by the MCID::MayRaiseFPException property, *and* at the same time,
838 /// the instruction is used in a context where we expect floating-point
839 /// exceptions might be enabled, as indicated by the FPExcept MI flag.
840 bool mayRaiseFPException() const {
841 return hasProperty(MCID::MayRaiseFPException) &&
842 getFlag(MachineInstr::MIFlag::FPExcept);
843 }
844
832845 //===--------------------------------------------------------------------===//
833846 // Flags that indicate whether an instruction can be modified by a method.
834847 //===--------------------------------------------------------------------===//
367367 bool ApproximateFuncs : 1;
368368 bool AllowReassociation : 1;
369369
370 // We assume instructions do not raise floating-point exceptions by default,
371 // and only those marked explicitly may do so. We could choose to represent
372 // this via a positive "FPExcept" flags like on the MI level, but having a
373 // negative "NoFPExcept" flag here (that defaults to true) makes the flag
374 // intersection logic more straightforward.
375 bool NoFPExcept : 1;
376
370377 public:
371378 /// Default constructor turns off all optimization flags.
372379 SDNodeFlags()
374381 Exact(false), NoNaNs(false), NoInfs(false),
375382 NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
376383 AllowContract(false), ApproximateFuncs(false),
377 AllowReassociation(false) {}
384 AllowReassociation(false), NoFPExcept(true) {}
378385
379386 /// Propagate the fast-math-flags from an IR FPMathOperator.
380387 void copyFMF(const FPMathOperator &FPMO) {
436443 void setAllowReassociation(bool b) {
437444 setDefined();
438445 AllowReassociation = b;
446 }
447 void setFPExcept(bool b) {
448 setDefined();
449 NoFPExcept = !b;
439450 }
440451
441452 // These are accessors for each flag.
450461 bool hasAllowContract() const { return AllowContract; }
451462 bool hasApproximateFuncs() const { return ApproximateFuncs; }
452463 bool hasAllowReassociation() const { return AllowReassociation; }
464 bool hasFPExcept() const { return !NoFPExcept; }
453465
454466 bool isFast() const {
455 return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs &&
467 return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoFPExcept &&
456468 AllowContract && ApproximateFuncs && AllowReassociation;
457469 }
458470
472484 AllowContract &= Flags.AllowContract;
473485 ApproximateFuncs &= Flags.ApproximateFuncs;
474486 AllowReassociation &= Flags.AllowReassociation;
487 NoFPExcept &= Flags.NoFPExcept;
475488 }
476489 };
477490
133133 FoldableAsLoad,
134134 MayLoad,
135135 MayStore,
136 MayRaiseFPException,
136137 Predicable,
137138 NotDuplicable,
138139 UnmodeledSideEffects,
402403 /// may not actually modify anything, for example.
403404 bool mayStore() const { return Flags & (1ULL << MCID::MayStore); }
404405
406 /// Return true if this instruction may raise a floating-point exception.
407 bool mayRaiseFPException() const {
408 return Flags & (1ULL << MCID::MayRaiseFPException);
409 }
410
405411 /// Return true if this instruction has side
406412 /// effects that are not modeled by other flags. This does not return true
407413 /// for instructions whose effects are captured by:
455455 bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand?
456456 bit mayLoad = ?; // Is it possible for this inst to read memory?
457457 bit mayStore = ?; // Is it possible for this inst to write memory?
458 bit mayRaiseFPException = 0; // Can this raise a floating-point exception?
458459 bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote?
459460 bit isCommutable = 0; // Is this 3 operand instruction commutable?
460461 bit isTerminator = 0; // Is this part of the terminator for a basic block?
466466 def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
467467 def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
468468
469 def strict_fadd : SDNode<"ISD::STRICT_FADD",
470 SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
471 def strict_fsub : SDNode<"ISD::STRICT_FSUB",
472 SDTFPBinOp, [SDNPHasChain]>;
473 def strict_fmul : SDNode<"ISD::STRICT_FMUL",
474 SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
475 def strict_fdiv : SDNode<"ISD::STRICT_FDIV",
476 SDTFPBinOp, [SDNPHasChain]>;
477 def strict_frem : SDNode<"ISD::STRICT_FREM",
478 SDTFPBinOp, [SDNPHasChain]>;
479 def strict_fma : SDNode<"ISD::STRICT_FMA",
480 SDTFPTernaryOp, [SDNPHasChain]>;
481 def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT",
482 SDTFPUnaryOp, [SDNPHasChain]>;
483 def strict_fsin : SDNode<"ISD::STRICT_FSIN",
484 SDTFPUnaryOp, [SDNPHasChain]>;
485 def strict_fcos : SDNode<"ISD::STRICT_FCOS",
486 SDTFPUnaryOp, [SDNPHasChain]>;
487 def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2",
488 SDTFPUnaryOp, [SDNPHasChain]>;
489 def strict_fpow : SDNode<"ISD::STRICT_FPOW",
490 SDTFPBinOp, [SDNPHasChain]>;
491 def strict_flog2 : SDNode<"ISD::STRICT_FLOG2",
492 SDTFPUnaryOp, [SDNPHasChain]>;
493 def strict_frint : SDNode<"ISD::STRICT_FRINT",
494 SDTFPUnaryOp, [SDNPHasChain]>;
495 def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT",
496 SDTFPUnaryOp, [SDNPHasChain]>;
497 def strict_fceil : SDNode<"ISD::STRICT_FCEIL",
498 SDTFPUnaryOp, [SDNPHasChain]>;
499 def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR",
500 SDTFPUnaryOp, [SDNPHasChain]>;
501 def strict_fround : SDNode<"ISD::STRICT_FROUND",
502 SDTFPUnaryOp, [SDNPHasChain]>;
503 def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC",
504 SDTFPUnaryOp, [SDNPHasChain]>;
505 def strict_fminnum : SDNode<"ISD::STRICT_FMINNUM",
506 SDTFPBinOp, [SDNPHasChain,
507 SDNPCommutative, SDNPAssociative]>;
508 def strict_fmaxnum : SDNode<"ISD::STRICT_FMAXNUM",
509 SDTFPBinOp, [SDNPHasChain,
510 SDNPCommutative, SDNPAssociative]>;
511 def strict_fpround : SDNode<"ISD::STRICT_FP_ROUND",
512 SDTFPRoundOp, [SDNPHasChain]>;
513 def strict_fpextend : SDNode<"ISD::STRICT_FP_EXTEND",
514 SDTFPExtendOp, [SDNPHasChain]>;
515
469516 def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
470517 def select : SDNode<"ISD::SELECT" , SDTSelect>;
471518 def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>;
11761223 def setne : PatFrag<(ops node:$lhs, node:$rhs),
11771224 (setcc node:$lhs, node:$rhs, SETNE)>;
11781225
1226 // Convenience fragments to match both strict and non-strict fp operations
1227 def any_fadd : PatFrags<(ops node:$lhs, node:$rhs),
1228 [(strict_fadd node:$lhs, node:$rhs),
1229 (fadd node:$lhs, node:$rhs)]>;
1230 def any_fsub : PatFrags<(ops node:$lhs, node:$rhs),
1231 [(strict_fsub node:$lhs, node:$rhs),
1232 (fsub node:$lhs, node:$rhs)]>;
1233 def any_fmul : PatFrags<(ops node:$lhs, node:$rhs),
1234 [(strict_fmul node:$lhs, node:$rhs),
1235 (fmul node:$lhs, node:$rhs)]>;
1236 def any_fdiv : PatFrags<(ops node:$lhs, node:$rhs),
1237 [(strict_fdiv node:$lhs, node:$rhs),
1238 (fdiv node:$lhs, node:$rhs)]>;
1239 def any_frem : PatFrags<(ops node:$lhs, node:$rhs),
1240 [(strict_frem node:$lhs, node:$rhs),
1241 (frem node:$lhs, node:$rhs)]>;
1242 def any_fma : PatFrags<(ops node:$src1, node:$src2, node:$src3),
1243 [(strict_fma node:$src1, node:$src2, node:$src3),
1244 (fma node:$src1, node:$src2, node:$src3)]>;
1245 def any_fsqrt : PatFrags<(ops node:$src),
1246 [(strict_fsqrt node:$src),
1247 (fsqrt node:$src)]>;
1248 def any_fsin : PatFrags<(ops node:$src),
1249 [(strict_fsin node:$src),
1250 (fsin node:$src)]>;
1251 def any_fcos : PatFrags<(ops node:$src),
1252 [(strict_fcos node:$src),
1253 (fcos node:$src)]>;
1254 def any_fexp2 : PatFrags<(ops node:$src),
1255 [(strict_fexp2 node:$src),
1256 (fexp2 node:$src)]>;
1257 def any_fpow : PatFrags<(ops node:$lhs, node:$rhs),
1258 [(strict_fpow node:$lhs, node:$rhs),
1259 (fpow node:$lhs, node:$rhs)]>;
1260 def any_flog2 : PatFrags<(ops node:$src),
1261 [(strict_flog2 node:$src),
1262 (flog2 node:$src)]>;
1263 def any_frint : PatFrags<(ops node:$src),
1264 [(strict_frint node:$src),
1265 (frint node:$src)]>;
1266 def any_fnearbyint : PatFrags<(ops node:$src),
1267 [(strict_fnearbyint node:$src),
1268 (fnearbyint node:$src)]>;
1269 def any_fceil : PatFrags<(ops node:$src),
1270 [(strict_fceil node:$src),
1271 (fceil node:$src)]>;
1272 def any_ffloor : PatFrags<(ops node:$src),
1273 [(strict_ffloor node:$src),
1274 (ffloor node:$src)]>;
1275 def any_fround : PatFrags<(ops node:$src),
1276 [(strict_fround node:$src),
1277 (fround node:$src)]>;
1278 def any_ftrunc : PatFrags<(ops node:$src),
1279 [(strict_ftrunc node:$src),
1280 (ftrunc node:$src)]>;
1281 def any_fmaxnum : PatFrags<(ops node:$lhs, node:$rhs),
1282 [(strict_fmaxnum node:$lhs, node:$rhs),
1283 (fmaxnum node:$lhs, node:$rhs)]>;
1284 def any_fminnum : PatFrags<(ops node:$lhs, node:$rhs),
1285 [(strict_fminnum node:$lhs, node:$rhs),
1286 (fminnum node:$lhs, node:$rhs)]>;
1287 def any_fpround : PatFrags<(ops node:$src),
1288 [(strict_fpround node:$src),
1289 (fpround node:$src)]>;
1290 def any_fpextend : PatFrags<(ops node:$src),
1291 [(strict_fpextend node:$src),
1292 (fpextend node:$src)]>;
1293
11791294 multiclass binary_atomic_op_ord {
11801295 def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
11811296 (!cast(#NAME) node:$ptr, node:$val)> {
7777 std::next(MI.getIterator()) == IntoMI.getIterator())
7878 return true;
7979
80 return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
81 empty(MI.implicit_operands());
80 return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
81 !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands());
8282 }
228228 } // end anonymous namespace
229229
230230 bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
231 if (MI->isCall() || MI->hasUnmodeledSideEffects())
231 if (MI->isCall() || MI->mayRaiseFPException() ||
232 MI->hasUnmodeledSideEffects())
232233 return false;
233234 auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
234235 (void)IsRegMask;
203203 .Case("nuw" , MIToken::kw_nuw)
204204 .Case("nsw" , MIToken::kw_nsw)
205205 .Case("exact" , MIToken::kw_exact)
206 .Case("fpexcept", MIToken::kw_fpexcept)
206207 .Case("debug-location", MIToken::kw_debug_location)
207208 .Case("same_value", MIToken::kw_cfi_same_value)
208209 .Case("offset", MIToken::kw_cfi_offset)
7272 kw_nuw,
7373 kw_nsw,
7474 kw_exact,
75 kw_fpexcept,
7576 kw_debug_location,
7677 kw_cfi_same_value,
7778 kw_cfi_offset,
11351135 Token.is(MIToken::kw_reassoc) ||
11361136 Token.is(MIToken::kw_nuw) ||
11371137 Token.is(MIToken::kw_nsw) ||
1138 Token.is(MIToken::kw_exact)) {
1138 Token.is(MIToken::kw_exact) ||
1139 Token.is(MIToken::kw_fpexcept)) {
11391140 // Mine frame and fast math flags
11401141 if (Token.is(MIToken::kw_frame_setup))
11411142 Flags |= MachineInstr::FrameSetup;
11611162 Flags |= MachineInstr::NoSWrap;
11621163 if (Token.is(MIToken::kw_exact))
11631164 Flags |= MachineInstr::IsExact;
1165 if (Token.is(MIToken::kw_fpexcept))
1166 Flags |= MachineInstr::FPExcept;
11641167
11651168 lex();
11661169 }
712712 OS << "nsw ";
713713 if (MI.getFlag(MachineInstr::IsExact))
714714 OS << "exact ";
715 if (MI.getFlag(MachineInstr::FPExcept))
716 OS << "fpexcept ";
715717
716718 OS << TII->getName(MI.getOpcode());
717719 if (I < E)
381381
382382 // Ignore stuff that we obviously can't move.
383383 if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
384 MI->hasUnmodeledSideEffects())
384 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
385385 return false;
386386
387387 if (MI->mayLoad()) {
11771177 }
11781178
11791179 if (isPosition() || isDebugInstr() || isTerminator() ||
1180 hasUnmodeledSideEffects())
1180 mayRaiseFPException() || hasUnmodeledSideEffects())
11811181 return false;
11821182
11831183 // See if this instruction does a load. If so, we have to guarantee that the
15431543 OS << "nsw ";
15441544 if (getFlag(MachineInstr::IsExact))
15451545 OS << "exact ";
1546 if (getFlag(MachineInstr::FPExcept))
1547 OS << "fpexcept ";
15461548
15471549 // Print the opcode name.
15481550 if (TII)
578578 /// Return true if the instruction causes a chain between memory
579579 /// references before and after it.
580580 static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
581 return MI.isCall() || MI.hasUnmodeledSideEffects() ||
581 return MI.isCall() || MI.mayRaiseFPException() ||
582 MI.hasUnmodeledSideEffects() ||
582583 (MI.hasOrderedMemoryRef() &&
583584 (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
584585 }
32373238
32383239 // Assume ordered loads and stores may have a loop carried dependence.
32393240 if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3241 SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
32403242 SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
32413243 return true;
32423244
18241824 assert(Def->isBitcast() && "Invalid definition");
18251825
18261826 // Bail if there are effects that a plain copy will not expose.
1827 if (Def->hasUnmodeledSideEffects())
1827 if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
18281828 return ValueTrackerResult();
18291829
18301830 // Bitcasts with more than one def are not supported.
711711 AAForDep = UseAA ? AA : nullptr;
712712
713713 BarrierChain = nullptr;
714 SUnit *FPBarrierChain = nullptr;
714715
715716 this->TrackLaneMasks = TrackLaneMasks;
716717 MISUnitMap.clear();
870871 addBarrierChain(NonAliasStores);
871872 addBarrierChain(NonAliasLoads);
872873
874 // Add dependency against previous FP barrier and reset FP barrier.
875 if (FPBarrierChain)
876 FPBarrierChain->addPredBarrier(BarrierChain);
877 FPBarrierChain = BarrierChain;
878
873879 continue;
880 }
881
882 // Instructions that may raise FP exceptions depend on each other.
883 if (MI.mayRaiseFPException()) {
884 if (FPBarrierChain)
885 FPBarrierChain->addPredBarrier(SU);
886 FPBarrierChain = SU;
874887 }
875888
876889 // If it's not a store or a variant load, we're done.
882882
883883 if (Flags.hasExact())
884884 MI->setFlag(MachineInstr::MIFlag::IsExact);
885
886 if (Flags.hasFPExcept())
887 MI->setFlag(MachineInstr::MIFlag::FPExcept);
885888 }
886889
887890 // Emit all of the actual operands of this instruction, adding them to the
69536953 Result = DAG.getNode(Opcode, sdl, VTs,
69546954 { Chain, getValue(FPI.getArgOperand(0)),
69556955 getValue(FPI.getArgOperand(1)) });
6956
6957 if (FPI.getExceptionBehavior() !=
6958 ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) {
6959 SDNodeFlags Flags;
6960 Flags.setFPExcept(true);
6961 Result->setFlags(Flags);
6962 }
69566963
69576964 assert(Result.getNode()->getNumValues() == 2);
69586965 SDValue OutChain = Result.getValue(1);
11211121 #endif
11221122
11231123 // When we are using non-default rounding modes or FP exception behavior
1124 // FP operations are represented by StrictFP pseudo-operations. They
1125 // need to be simplified here so that the target-specific instruction
1126 // selectors know how to handle them.
1127 //
1128 // If the current node is a strict FP pseudo-op, the isStrictFPOp()
1129 // function will provide the corresponding normal FP opcode to which the
1130 // node should be mutated.
1131 //
1132 // FIXME: The backends need a way to handle FP constraints.
1133 if (Node->isStrictFPOpcode())
1124 // FP operations are represented by StrictFP pseudo-operations. For
1125 // targets that do not (yet) understand strict FP operations directly,
1126 // we convert them to normal FP opcodes instead at this point. This
1127 // will allow them to be handled by existing target-specific instruction
1128 // selectors.
1129 if (Node->isStrictFPOpcode() &&
1130 (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
1131 != TargetLowering::Legal))
11341132 Node = CurDAG->mutateStrictFPToFP(Node);
11351133
11361134 LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
898898 return true;
899899
900900 // Avoid instructions obviously unsafe for remat.
901 if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects())
901 if (MI.isNotDuplicable() || MI.mayStore() || MI.mayRaiseFPException() ||
902 MI.hasUnmodeledSideEffects())
902903 return false;
903904
904905 // Don't remat inline asm. We have no idea how expensive it is
661661 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
662662 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
663663 }
664
665 // Constrained floating-point operations default to expand.
666 setOperationAction(ISD::STRICT_FADD, VT, Expand);
667 setOperationAction(ISD::STRICT_FSUB, VT, Expand);
668 setOperationAction(ISD::STRICT_FMUL, VT, Expand);
669 setOperationAction(ISD::STRICT_FDIV, VT, Expand);
670 setOperationAction(ISD::STRICT_FREM, VT, Expand);
671 setOperationAction(ISD::STRICT_FMA, VT, Expand);
672 setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
673 setOperationAction(ISD::STRICT_FPOW, VT, Expand);
674 setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
675 setOperationAction(ISD::STRICT_FSIN, VT, Expand);
676 setOperationAction(ISD::STRICT_FCOS, VT, Expand);
677 setOperationAction(ISD::STRICT_FEXP, VT, Expand);
678 setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
679 setOperationAction(ISD::STRICT_FLOG, VT, Expand);
680 setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
681 setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
682 setOperationAction(ISD::STRICT_FRINT, VT, Expand);
683 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
684 setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
685 setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
686 setOperationAction(ISD::STRICT_FROUND, VT, Expand);
687 setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
688 setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
689 setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
690 setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
691 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
664692
665693 // For most targets @llvm.get.dynamic.area.offset just returns 0.
666694 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
400400 setOperationAction(ISD::FSINCOS, VT, Expand);
401401 setOperationAction(ISD::FREM, VT, Expand);
402402 setOperationAction(ISD::FPOW, VT, Expand);
403
404 // Handle constrained floating-point operations.
405 setOperationAction(ISD::STRICT_FADD, VT, Legal);
406 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
407 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
408 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
409 setOperationAction(ISD::STRICT_FMA, VT, Legal);
410 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
411 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
412 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
413 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
414 if (Subtarget.hasFPExtension()) {
415 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
416 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
417 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
418 setOperationAction(ISD::STRICT_FROUND, VT, Legal);
419 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
420 }
403421 }
404422 }
405423
431449 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
432450 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
433451 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
452
453 // Handle constrained floating-point operations.
454 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
455 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
456 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
457 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
458 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
459 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
460 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
461 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
462 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
463 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
464 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
465 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
434466 }
435467
436468 // The vector enhancements facility 1 has instructions for these.
474506 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
475507 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
476508 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
509
510 // Handle constrained floating-point operations.
511 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
512 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
513 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
514 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
515 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
516 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
517 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
518 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
519 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
520 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
521 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
522 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
523 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
524 MVT::v4f32, MVT::v2f64 }) {
525 setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
526 setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
527 }
477528 }
478529
479530 // We have fused multiply-addition for f32 and f64 but not f128.
5151
5252 // Moves between two floating-point registers that also set the condition
5353 // codes.
54 let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
54 let Uses = [FPC], mayRaiseFPException = 1,
55 Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
5556 defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
5657 defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
5758 defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
6768
6869 // Use a normal load-and-test for compare against zero in case of
6970 // vector support (via a pseudo to simplify instruction selection).
70 let Uses = [FPC], Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
71 let Uses = [FPC], mayRaiseFPException = 1,
72 Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
7173 def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
7274 def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
7375 def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
172174 // Convert floating-point values to narrower representations, rounding
173175 // according to the current mode. The destination of LEXBR and LDXBR
174176 // is a 128-bit value, but only the first register of the pair is used.
175 let Uses = [FPC] in {
176 def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround, FP32, FP64>;
177 let Uses = [FPC], mayRaiseFPException = 1 in {
178 def LEDBR : UnaryRRE<"ledbr", 0xB344, any_fpround, FP32, FP64>;
177179 def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
178180 def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
179181
186188 }
187189
188190 let Predicates = [FeatureNoVectorEnhancements1] in {
189 def : Pat<(f32 (fpround FP128:$src)),
191 def : Pat<(f32 (any_fpround FP128:$src)),
190192 (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
191 def : Pat<(f64 (fpround FP128:$src)),
193 def : Pat<(f64 (any_fpround FP128:$src)),
192194 (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
193195 }
194196
195197 // Extend register floating-point values to wider representations.
196 let Uses = [FPC] in {
197 def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>;
198 let Uses = [FPC], mayRaiseFPException = 1 in {
199 def LDEBR : UnaryRRE<"ldebr", 0xB304, any_fpextend, FP64, FP32>;
198200 def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>;
199201 def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>;
200202 }
201203 let Predicates = [FeatureNoVectorEnhancements1] in {
202 def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
203 def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
204 def : Pat<(f128 (any_fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
205 def : Pat<(f128 (any_fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
204206 }
205207
206208 // Extend memory floating-point values to wider representations.
207 let Uses = [FPC] in {
209 let Uses = [FPC], mayRaiseFPException = 1 in {
208210 def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>;
209211 def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
210212 def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
217219 }
218220
219221 // Convert a signed integer register value to a floating-point one.
220 let Uses = [FPC] in {
222 let Uses = [FPC], mayRaiseFPException = 1 in {
221223 def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>;
222224 def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>;
223225 def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
229231
230232 // The FP extension feature provides versions of the above that allow
231233 // specifying rounding mode and inexact-exception suppression flags.
232 let Uses = [FPC], Predicates = [FeatureFPExtension] in {
234 let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureFPExtension] in {
233235 def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32, GR32>;
234236 def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64, GR32>;
235237 def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>;
241243
242244 // Convert am unsigned integer register value to a floating-point one.
243245 let Predicates = [FeatureFPExtension] in {
244 let Uses = [FPC] in {
246 let Uses = [FPC], mayRaiseFPException = 1 in {
245247 def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>;
246248 def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64, GR32>;
247249 def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>;
262264
263265 // Convert a floating-point register value to a signed integer value,
264266 // with the second operand (modifier M3) specifying the rounding mode.
265 let Uses = [FPC], Defs = [CC] in {
267 let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
266268 def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>;
267269 def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>;
268270 def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>;
283285
284286 // The FP extension feature provides versions of the above that allow
285287 // also specifying the inexact-exception suppression flag.
286 let Uses = [FPC], Predicates = [FeatureFPExtension], Defs = [CC] in {
288 let Uses = [FPC], mayRaiseFPException = 1,
289 Predicates = [FeatureFPExtension], Defs = [CC] in {
287290 def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>;
288291 def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>;
289292 def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>;
295298
296299 // Convert a floating-point register value to an unsigned integer value.
297300 let Predicates = [FeatureFPExtension] in {
298 let Uses = [FPC], Defs = [CC] in {
301 let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
299302 def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>;
300303 def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>;
301304 def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>;
361364 def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32, FP32>;
362365
363366 // Square root.
364 let Uses = [FPC] in {
365 def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>;
366 def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>;
367 def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
368
369 def SQEB : UnaryRXE<"sqeb", 0xED14, loadu, FP32, 4>;
370 def SQDB : UnaryRXE<"sqdb", 0xED15, loadu, FP64, 8>;
367 let Uses = [FPC], mayRaiseFPException = 1 in {
368 def SQEBR : UnaryRRE<"sqebr", 0xB314, any_fsqrt, FP32, FP32>;
369 def SQDBR : UnaryRRE<"sqdbr", 0xB315, any_fsqrt, FP64, FP64>;
370 def SQXBR : UnaryRRE<"sqxbr", 0xB316, any_fsqrt, FP128, FP128>;
371
372 def SQEB : UnaryRXE<"sqeb", 0xED14, loadu, FP32, 4>;
373 def SQDB : UnaryRXE<"sqdb", 0xED15, loadu, FP64, 8>;
371374 }
372375
373376 // Round to an integer, with the second operand (modifier M3) specifying
374377 // the rounding mode. These forms always check for inexact conditions.
375 let Uses = [FPC] in {
378 let Uses = [FPC], mayRaiseFPException = 1 in {
376379 def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32, FP32>;
377380 def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64, FP64>;
378381 def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>;
380383
381384 // frint rounds according to the current mode (modifier 0) and detects
382385 // inexact conditions.
383 def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>;
384 def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>;
385 def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
386 def : Pat<(any_frint FP32:$src), (FIEBR 0, FP32:$src)>;
387 def : Pat<(any_frint FP64:$src), (FIDBR 0, FP64:$src)>;
388 def : Pat<(any_frint FP128:$src), (FIXBR 0, FP128:$src)>;
386389
387390 let Predicates = [FeatureFPExtension] in {
388391 // Extended forms of the FIxBR instructions. M4 can be set to 4
389392 // to suppress detection of inexact conditions.
390 let Uses = [FPC] in {
393 let Uses = [FPC], mayRaiseFPException = 1 in {
391394 def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32, FP32>;
392395 def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64, FP64>;
393396 def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>;
394397 }
395398
396399 // fnearbyint is like frint but does not detect inexact conditions.
397 def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>;
398 def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>;
399 def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
400 def : Pat<(any_fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>;
401 def : Pat<(any_fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>;
402 def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
400403
401404 // floor is no longer allowed to raise an inexact condition,
402405 // so restrict it to the cases where the condition can be suppressed.
403406 // Mode 7 is round towards -inf.
404 def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>;
405 def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>;
406 def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
407 def : Pat<(any_ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>;
408 def : Pat<(any_ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>;
409 def : Pat<(any_ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
407410
408411 // Same idea for ceil, where mode 6 is round towards +inf.
409 def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>;
410 def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>;
411 def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
412 def : Pat<(any_fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>;
413 def : Pat<(any_fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>;
414 def : Pat<(any_fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
412415
413416 // Same idea for trunc, where mode 5 is round towards zero.
414 def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>;
415 def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>;
416 def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
417 def : Pat<(any_ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>;
418 def : Pat<(any_ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>;
419 def : Pat<(any_ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
417420
418421 // Same idea for round, where mode 1 is round towards nearest with
419422 // ties away from zero.
420 def : Pat<(fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>;
421 def : Pat<(fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>;
422 def : Pat<(fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
423 def : Pat<(any_fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>;
424 def : Pat<(any_fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>;
425 def : Pat<(any_fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
423426 }
424427
425428 //===----------------------------------------------------------------------===//
427430 //===----------------------------------------------------------------------===//
428431
429432 // Addition.
430 let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
433 let Uses = [FPC], mayRaiseFPException = 1,
434 Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
431435 let isCommutable = 1 in {
432 def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>;
433 def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>;
434 def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
435 }
436 def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>;
437 def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>;
436 def AEBR : BinaryRRE<"aebr", 0xB30A, any_fadd, FP32, FP32>;
437 def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>;
438 def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;
439 }
440 def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
441 def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>;
438442 }
439443
440444 // Subtraction.
441 let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
442 def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>;
443 def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>;
444 def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
445
446 def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>;
447 def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>;
445 let Uses = [FPC], mayRaiseFPException = 1,
446 Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
447 def SEBR : BinaryRRE<"sebr", 0xB30B, any_fsub, FP32, FP32>;
448 def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>;
449 def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>;
450
451 def SEB : BinaryRXE<"seb", 0xED0B, any_fsub, FP32, load, 4>;
452 def SDB : BinaryRXE<"sdb", 0xED1B, any_fsub, FP64, load, 8>;
448453 }
449454
450455 // Multiplication.
451 let Uses = [FPC] in {
456 let Uses = [FPC], mayRaiseFPException = 1 in {
452457 let isCommutable = 1 in {
453 def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>;
454 def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>;
455 def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>;
456 }
457 def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>;
458 def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>;
458 def MEEBR : BinaryRRE<"meebr", 0xB317, any_fmul, FP32, FP32>;
459 def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>;
460 def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>;
461 }
462 def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>;
463 def MDB : BinaryRXE<"mdb", 0xED1C, any_fmul, FP64, load, 8>;
459464 }
460465
461466 // f64 multiplication of two FP32 registers.
462 let Uses = [FPC] in
467 let Uses = [FPC], mayRaiseFPException = 1 in
463468 def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
464 def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
469 def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
470 (f64 (fpextend FP32:$src2))),
465471 (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
466472 FP32:$src1, subreg_h32), FP32:$src2)>;
467473
468474 // f64 multiplication of an FP32 register and an f32 memory.
469 let Uses = [FPC] in
475 let Uses = [FPC], mayRaiseFPException = 1 in
470476 def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
471 def : Pat<(fmul (f64 (fpextend FP32:$src1)),
472 (f64 (extloadf32 bdxaddr12only:$addr))),
477 def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
478 (f64 (extloadf32 bdxaddr12only:$addr))),
473479 (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
474480 bdxaddr12only:$addr)>;
475481
476482 // f128 multiplication of two FP64 registers.
477 let Uses = [FPC] in
483 let Uses = [FPC], mayRaiseFPException = 1 in
478484 def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
479485 let Predicates = [FeatureNoVectorEnhancements1] in
480 def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
486 def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
487 (f128 (fpextend FP64:$src2))),
481488 (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
482489 FP64:$src1, subreg_h64), FP64:$src2)>;
483490
484491 // f128 multiplication of an FP64 register and an f64 memory.
485 let Uses = [FPC] in
492 let Uses = [FPC], mayRaiseFPException = 1 in
486493 def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
487494 let Predicates = [FeatureNoVectorEnhancements1] in
488 def : Pat<(fmul (f128 (fpextend FP64:$src1)),
489 (f128 (extloadf64 bdxaddr12only:$addr))),
495 def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
496 (f128 (extloadf64 bdxaddr12only:$addr))),
490497 (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
491498 bdxaddr12only:$addr)>;
492499
493500 // Fused multiply-add.
494 let Uses = [FPC] in {
495 def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>;
496 def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>;
497
498 def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>;
499 def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>;
501 let Uses = [FPC], mayRaiseFPException = 1 in {
502 def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
503 def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
504
505 def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
506 def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
500507 }
501508
502509 // Fused multiply-subtract.
503 let Uses = [FPC] in {
504 def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>;
505 def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>;
506
507 def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>;
508 def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>;
510 let Uses = [FPC], mayRaiseFPException = 1 in {
511 def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
512 def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
513
514 def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
515 def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
509516 }
510517
511518 // Division.
512 let Uses = [FPC] in {
513 def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>;
514 def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>;
515 def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
516
517 def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>;
518 def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
519 let Uses = [FPC], mayRaiseFPException = 1 in {
520 def DEBR : BinaryRRE<"debr", 0xB30D, any_fdiv, FP32, FP32>;
521 def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>;
522 def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>;
523
524 def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>;
525 def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;
519526 }
520527
521528 // Divide to integer.
522 let Uses = [FPC], Defs = [CC] in {
529 let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
523530 def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>;
524531 def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>;
525532 }
528535 // Comparisons
529536 //===----------------------------------------------------------------------===//
530537
531 let Uses = [FPC], Defs = [CC], CCValues = 0xF in {
538 let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in {
532539 def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>;
533540 def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>;
534541 def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>;
569576 }
570577 }
571578
572 let Defs = [FPC] in {
579 let Defs = [FPC], mayRaiseFPException = 1 in {
573580 def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
574581 def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
575582 }
923923 // See comments in SystemZInstrFP.td for the suppression flags and
924924 // rounding modes.
925925 multiclass VectorRounding {
926 def : FPConversion;
927 def : FPConversion;
928 def : FPConversion;
929 def : FPConversion;
930 def : FPConversion;
931 def : FPConversionfround, tr, tr, 4, 1>;
926 def : FPConversionany_frint, tr, tr, 0, 0>;
927 def : FPConversion;
928 def : FPConversion;
929 def : FPConversion;
930 def : FPConversion;
931 def : FPConversion;
932932 }
933933
934934 let Predicates = [FeatureVector] in {
935935 // Add.
936 let Uses = [FPC] in {
936 let Uses = [FPC], mayRaiseFPException = 1 in {
937937 def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
938 def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
939 def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
938 def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>;
939 def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>;
940940 let Predicates = [FeatureVectorEnhancements1] in {
941 def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>;
942 def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>;
943 def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>;
941 def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>;
942 def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>;
943 def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>;
944944 }
945945 }
946946
947947 // Convert from fixed 64-bit.
948 let Uses = [FPC] in {
948 let Uses = [FPC], mayRaiseFPException = 1 in {
949949 def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
950950 def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
951951 def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
953953 def : FPConversion;
954954
955955 // Convert from logical 64-bit.
956 let Uses = [FPC] in {
956 let Uses = [FPC], mayRaiseFPException = 1 in {
957957 def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
958958 def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
959959 def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
961961 def : FPConversion;
962962
963963 // Convert to fixed 64-bit.
964 let Uses = [FPC] in {
964 let Uses = [FPC], mayRaiseFPException = 1 in {
965965 def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
966966 def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
967967 def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
970970 def : FPConversion;
971971
972972 // Convert to logical 64-bit.
973 let Uses = [FPC] in {
973 let Uses = [FPC], mayRaiseFPException = 1 in {
974974 def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
975975 def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
976976 def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
979979 def : FPConversion;
980980
981981 // Divide.
982 let Uses = [FPC] in {
982 let Uses = [FPC], mayRaiseFPException = 1 in {
983983 def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
984 def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
985 def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
984 def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>;
985 def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>;
986986 let Predicates = [FeatureVectorEnhancements1] in {
987 def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>;
988 def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>;
989 def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>;
987 def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>;
988 def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>;
989 def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>;
990990 }
991991 }
992992
993993 // Load FP integer.
994 let Uses = [FPC] in {
994 let Uses = [FPC], mayRaiseFPException = 1 in {
995995 def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
996996 def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
997997 def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
999999 defm : VectorRounding;
10001000 defm : VectorRounding;
10011001 let Predicates = [FeatureVectorEnhancements1] in {
1002 let Uses = [FPC] in {
1002 let Uses = [FPC], mayRaiseFPException = 1 in {
10031003 def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
10041004 def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
10051005 def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>;
10101010 }
10111011
10121012 // Load lengthened.
1013 let Uses = [FPC] in {
1013 let Uses = [FPC], mayRaiseFPException = 1 in {
10141014 def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
10151015 def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
1016 def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>;
1016 def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>;
10171017 }
10181018 let Predicates = [FeatureVectorEnhancements1] in {
1019 let Uses = [FPC] in {
1019 let Uses = [FPC], mayRaiseFPException = 1 in {
10201020 let isAsmParserOnly = 1 in {
10211021 def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>;
10221022 def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
10231023 def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
10241024 }
1025 def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>;
1026 }
1027 def : Pat<(f128 (fpextend (f32 VR32:$src))),
1025 def WFLLD : UnaryVRRa<"wflld", 0xE7C4, any_fpextend, v128xb, v64db, 3, 8>;
1026 }
1027 def : Pat<(f128 (any_fpextend (f32 VR32:$src))),
10281028 (WFLLD (WLDEB VR32:$src))>;
10291029 }
10301030
10311031 // Load rounded.
1032 let Uses = [FPC] in {
1032 let Uses = [FPC], mayRaiseFPException = 1 in {
10331033 def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
10341034 def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
10351035 def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
10361036 }
10371037 def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
1038 def : FPConversionfpround, v32sb, v64db, 0, 0>;
1038 def : FPConversionany_fpround, v32sb, v64db, 0, 0>;
10391039 let Predicates = [FeatureVectorEnhancements1] in {
1040 let Uses = [FPC] in {
1040 let Uses = [FPC], mayRaiseFPException = 1 in {
10411041 let isAsmParserOnly = 1 in {
10421042 def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>;
10431043 def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
10451045 }
10461046 def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>;
10471047 }
1048 def : FPConversion;
1049 def : Pat<(f32 (fpround (f128 VR128:$src))),
1048 def : FPConversion;
1049 def : Pat<(f32 (any_fpround (f128 VR128:$src))),
10501050 (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>;
10511051 }
10521052
10531053 // Maximum.
10541054 multiclass VectorMax {
1055 def : FPMinMaxfmaxnum, tr, 4>;
1055 def : FPMinMaxany_fmaxnum, tr, 4>;
10561056 def : FPMinMax;
10571057 }
10581058 let Predicates = [FeatureVectorEnhancements1] in {
1059 let Uses = [FPC] in {
1059 let Uses = [FPC], mayRaiseFPException = 1 in {
10601060 def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
10611061 def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
10621062 v128db, v128db, 3, 0>;
10781078
10791079 // Minimum.
10801080 multiclass VectorMin {
1081 def : FPMinMaxfminnum, tr, 4>;
1081 def : FPMinMaxany_fminnum, tr, 4>;
10821082 def : FPMinMax;
10831083 }
10841084 let Predicates = [FeatureVectorEnhancements1] in {
1085 let Uses = [FPC] in {
1085 let Uses = [FPC], mayRaiseFPException = 1 in {
10861086 def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
10871087 def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
10881088 v128db, v128db, 3, 0>;
11031103 }
11041104
11051105 // Multiply.
1106 let Uses = [FPC] in {
1106 let Uses = [FPC], mayRaiseFPException = 1 in {
11071107 def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
1108 def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
1109 def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
1108 def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>;
1109 def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>;
11101110 let Predicates = [FeatureVectorEnhancements1] in {
1111 def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>;
1112 def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>;
1113 def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>;
1111 def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>;
1112 def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>;
1113 def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>;
11141114 }
11151115 }
11161116
11171117 // Multiply and add.
1118 let Uses = [FPC] in {
1118 let Uses = [FPC], mayRaiseFPException = 1 in {
11191119 def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
1120 def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
1121 def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
1120 def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>;
1121 def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>;
11221122 let Predicates = [FeatureVectorEnhancements1] in {
1123 def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>;
1124 def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>;
1125 def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>;
1123 def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>;
1124 def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>;
1125 def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>;
11261126 }
11271127 }
11281128
11291129 // Multiply and subtract.
1130 let Uses = [FPC] in {
1130 let Uses = [FPC], mayRaiseFPException = 1 in {
11311131 def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
1132 def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
1133 def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
1132 def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>;
1133 def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>;
11341134 let Predicates = [FeatureVectorEnhancements1] in {
1135 def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>;
1136 def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>;
1137 def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>;
1135 def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>;
1136 def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>;
1137 def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>;
11381138 }
11391139 }
11401140
11411141 // Negative multiply and add.
1142 let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
1142 let Uses = [FPC], mayRaiseFPException = 1,
1143 Predicates = [FeatureVectorEnhancements1] in {
11431144 def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;
1144 def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>;
1145 def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>;
1146 def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>;
1147 def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>;
1148 def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>;
1145 def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>;
1146 def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, any_fnma, v64db, v64db, 8, 3>;
1147 def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, any_fnma, v128sb, v128sb, 0, 2>;
1148 def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, any_fnma, v32sb, v32sb, 8, 2>;
1149 def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, any_fnma, v128xb, v128xb, 8, 4>;
11491150 }
11501151
11511152 // Negative multiply and subtract.
1152 let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
1153 let Uses = [FPC], mayRaiseFPException = 1,
1154 Predicates = [FeatureVectorEnhancements1] in {
11531155 def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;
1154 def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>;
1155 def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>;
1156 def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>;
1157 def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>;
1158 def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>;
1156 def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>;
1157 def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, any_fnms, v64db, v64db, 8, 3>;
1158 def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, any_fnms, v128sb, v128sb, 0, 2>;
1159 def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, any_fnms, v32sb, v32sb, 8, 2>;
1160 def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, any_fnms, v128xb, v128xb, 8, 4>;
11591161 }
11601162
11611163 // Perform sign operation.
11961198 }
11971199
11981200 // Square root.
1199 let Uses = [FPC] in {
1201 let Uses = [FPC], mayRaiseFPException = 1 in {
12001202 def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
1201 def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
1202 def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
1203 def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>;
1204 def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>;
12031205 let Predicates = [FeatureVectorEnhancements1] in {
1204 def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>;
1205 def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>;
1206 def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>;
1206 def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>;
1207 def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>;
1208 def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>;
12071209 }
12081210 }
12091211
12101212 // Subtract.
1211 let Uses = [FPC] in {
1213 let Uses = [FPC], mayRaiseFPException = 1 in {
12121214 def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
1213 def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
1214 def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
1215 def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>;
1216 def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>;
12151217 let Predicates = [FeatureVectorEnhancements1] in {
1216 def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>;
1217 def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>;
1218 def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>;
1218 def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>;
1219 def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>;
1220 def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>;
12191221 }
12201222 }
12211223
12381240
12391241 let Predicates = [FeatureVector] in {
12401242 // Compare scalar.
1241 let Uses = [FPC], Defs = [CC] in {
1243 let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
12421244 def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
12431245 def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
12441246 let Predicates = [FeatureVectorEnhancements1] in {
12481250 }
12491251
12501252 // Compare and signal scalar.
1251 let Uses = [FPC], Defs = [CC] in {
1253 let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
12521254 def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
12531255 def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
12541256 let Predicates = [FeatureVectorEnhancements1] in {
12581260 }
12591261
12601262 // Compare equal.
1261 let Uses = [FPC] in {
1263 let Uses = [FPC], mayRaiseFPException = 1 in {
12621264 def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
12631265 defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
12641266 v128g, v128db, 3, 0>;
12751277 }
12761278
12771279 // Compare and signal equal.
1278 let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
1280 let Uses = [FPC], mayRaiseFPException = 1,
1281 Predicates = [FeatureVectorEnhancements1] in {
12791282 defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag,
12801283 v128g, v128db, 3, 4>;
12811284 defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
12891292 }
12901293
12911294 // Compare high.
1292 let Uses = [FPC] in {
1295 let Uses = [FPC], mayRaiseFPException = 1 in {
12931296 def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
12941297 defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
12951298 v128g, v128db, 3, 0>;
13061309 }
13071310
13081311 // Compare and signal high.
1309 let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
1312 let Uses = [FPC], mayRaiseFPException = 1,
1313 Predicates = [FeatureVectorEnhancements1] in {
13101314 defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag,
13111315 v128g, v128db, 3, 4>;
13121316 defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
13201324 }
13211325
13221326 // Compare high or equal.
1323 let Uses = [FPC] in {
1327 let Uses = [FPC], mayRaiseFPException = 1 in {
13241328 def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
13251329 defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
13261330 v128g, v128db, 3, 0>;
13371341 }
13381342
13391343 // Compare and signal high or equal.
1340 let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
1344 let Uses = [FPC], mayRaiseFPException = 1,
1345 Predicates = [FeatureVectorEnhancements1] in {
13411346 defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag,
13421347 v128g, v128db, 3, 4>;
13431348 defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
661661 (sub node:$src1, node:$src2)]>;
662662
663663 // Fused multiply-subtract, using the natural operand order.
664 def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
665 (fma node:$src1, node:$src2, (fneg node:$src3))>;
664 def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
665 (any_fma node:$src1, node:$src2, (fneg node:$src3))>;
666666
667667 // Fused multiply-add and multiply-subtract, but with the order of the
668668 // operands matching SystemZ's MA and MS instructions.
669 def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
670 (fma node:$src2, node:$src3, node:$src1)>;
671 def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
672 (fma node:$src2, node:$src3, (fneg node:$src1))>;
669 def z_any_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
670 (any_fma node:$src2, node:$src3, node:$src1)>;
671 def z_any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
672 (any_fma node:$src2, node:$src3, (fneg node:$src1))>;
673673
674674 // Negative fused multiply-add and multiply-subtract.
675 def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
676 (fneg (fma node:$src1, node:$src2, node:$src3))>;
677 def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
678 (fneg (fms node:$src1, node:$src2, node:$src3))>;
675 def any_fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
676 (fneg (any_fma node:$src1, node:$src2, node:$src3))>;
677 def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
678 (fneg (any_fms node:$src1, node:$src2, node:$src3))>;
679679
680680 // Floating-point negative absolute.
681681 def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
0 ; Test 32-bit floating-point strict addition.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
3 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
5
6 declare float @foo()
7 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
8
9 ; Check register addition.
10 define float @f1(float %f1, float %f2) {
11 ; CHECK-LABEL: f1:
12 ; CHECK: aebr %f0, %f2
13 ; CHECK: br %r14
14 %res = call float @llvm.experimental.constrained.fadd.f32(
15 float %f1, float %f2,
16 metadata !"round.dynamic",
17 metadata !"fpexcept.strict")
18 ret float %res
19 }
20
21 ; Check the low end of the AEB range.
22 define float @f2(float %f1, float *%ptr) {
23 ; CHECK-LABEL: f2:
24 ; CHECK: aeb %f0, 0(%r2)
25 ; CHECK: br %r14
26 %f2 = load float, float *%ptr
27 %res = call float @llvm.experimental.constrained.fadd.f32(
28 float %f1, float %f2,
29 metadata !"round.dynamic",
30 metadata !"fpexcept.strict")
31 ret float %res
32 }
33
34 ; Check the high end of the aligned AEB range.
35 define float @f3(float %f1, float *%base) {
36 ; CHECK-LABEL: f3:
37 ; CHECK: aeb %f0, 4092(%r2)
38 ; CHECK: br %r14
39 %ptr = getelementptr float, float *%base, i64 1023
40 %f2 = load float, float *%ptr
41 %res = call float @llvm.experimental.constrained.fadd.f32(
42 float %f1, float %f2,
43 metadata !"round.dynamic",
44 metadata !"fpexcept.strict")
45 ret float %res
46 }
47
48 ; Check the next word up, which needs separate address logic.
49 ; Other sequences besides this one would be OK.
50 define float @f4(float %f1, float *%base) {
51 ; CHECK-LABEL: f4:
52 ; CHECK: aghi %r2, 4096
53 ; CHECK: aeb %f0, 0(%r2)
54 ; CHECK: br %r14
55 %ptr = getelementptr float, float *%base, i64 1024
56 %f2 = load float, float *%ptr
57 %res = call float @llvm.experimental.constrained.fadd.f32(
58 float %f1, float %f2,
59 metadata !"round.dynamic",
60 metadata !"fpexcept.strict")
61 ret float %res
62 }
63
64 ; Check negative displacements, which also need separate address logic.
65 define float @f5(float %f1, float *%base) {
66 ; CHECK-LABEL: f5:
67 ; CHECK: aghi %r2, -4
68 ; CHECK: aeb %f0, 0(%r2)
69 ; CHECK: br %r14
70 %ptr = getelementptr float, float *%base, i64 -1
71 %f2 = load float, float *%ptr
72 %res = call float @llvm.experimental.constrained.fadd.f32(
73 float %f1, float %f2,
74 metadata !"round.dynamic",
75 metadata !"fpexcept.strict")
76 ret float %res
77 }
78
79 ; Check that AEB allows indices.
80 define float @f6(float %f1, float *%base, i64 %index) {
81 ; CHECK-LABEL: f6:
82 ; CHECK: sllg %r1, %r3, 2
83 ; CHECK: aeb %f0, 400(%r1,%r2)
84 ; CHECK: br %r14
85 %ptr1 = getelementptr float, float *%base, i64 %index
86 %ptr2 = getelementptr float, float *%ptr1, i64 100
87 %f2 = load float, float *%ptr2
88 %res = call float @llvm.experimental.constrained.fadd.f32(
89 float %f1, float %f2,
90 metadata !"round.dynamic",
91 metadata !"fpexcept.strict")
92 ret float %res
93 }
94
95 ; Check that additions of spilled values can use AEB rather than AEBR.
96 define float @f7(float *%ptr0) {
97 ; CHECK-LABEL: f7:
98 ; CHECK: brasl %r14, foo@PLT
99 ; CHECK-SCALAR: aeb %f0, 16{{[04]}}(%r15)
100 ; CHECK: br %r14
101 %ptr1 = getelementptr float, float *%ptr0, i64 2
102 %ptr2 = getelementptr float, float *%ptr0, i64 4
103 %ptr3 = getelementptr float, float *%ptr0, i64 6
104 %ptr4 = getelementptr float, float *%ptr0, i64 8
105 %ptr5 = getelementptr float, float *%ptr0, i64 10
106 %ptr6 = getelementptr float, float *%ptr0, i64 12
107 %ptr7 = getelementptr float, float *%ptr0, i64 14
108 %ptr8 = getelementptr float, float *%ptr0, i64 16
109 %ptr9 = getelementptr float, float *%ptr0, i64 18
110 %ptr10 = getelementptr float, float *%ptr0, i64 20
111
112 %val0 = load float, float *%ptr0
113 %val1 = load float, float *%ptr1
114 %val2 = load float, float *%ptr2
115 %val3 = load float, float *%ptr3
116 %val4 = load float, float *%ptr4
117 %val5 = load float, float *%ptr5
118 %val6 = load float, float *%ptr6
119 %val7 = load float, float *%ptr7
120 %val8 = load float, float *%ptr8
121 %val9 = load float, float *%ptr9
122 %val10 = load float, float *%ptr10
123
124 %ret = call float @foo()
125
126 %add0 = call float @llvm.experimental.constrained.fadd.f32(
127 float %ret, float %val0,
128 metadata !"round.dynamic",
129 metadata !"fpexcept.strict")
130 %add1 = call float @llvm.experimental.constrained.fadd.f32(
131 float %add0, float %val1,
132 metadata !"round.dynamic",
133 metadata !"fpexcept.strict")
134 %add2 = call float @llvm.experimental.constrained.fadd.f32(
135 float %add1, float %val2,
136 metadata !"round.dynamic",
137 metadata !"fpexcept.strict")
138 %add3 = call float @llvm.experimental.constrained.fadd.f32(
139 float %add2, float %val3,
140 metadata !"round.dynamic",
141 metadata !"fpexcept.strict")
142 %add4 = call float @llvm.experimental.constrained.fadd.f32(
143 float %add3, float %val4,
144 metadata !"round.dynamic",
145 metadata !"fpexcept.strict")
146 %add5 = call float @llvm.experimental.constrained.fadd.f32(
147 float %add4, float %val5,
148 metadata !"round.dynamic",
149 metadata !"fpexcept.strict")
150 %add6 = call float @llvm.experimental.constrained.fadd.f32(
151 float %add5, float %val6,
152 metadata !"round.dynamic",
153 metadata !"fpexcept.strict")
154 %add7 = call float @llvm.experimental.constrained.fadd.f32(
155 float %add6, float %val7,
156 metadata !"round.dynamic",
157 metadata !"fpexcept.strict")
158 %add8 = call float @llvm.experimental.constrained.fadd.f32(
159 float %add7, float %val8,
160 metadata !"round.dynamic",
161 metadata !"fpexcept.strict")
162 %add9 = call float @llvm.experimental.constrained.fadd.f32(
163 float %add8, float %val9,
164 metadata !"round.dynamic",
165 metadata !"fpexcept.strict")
166 %add10 = call float @llvm.experimental.constrained.fadd.f32(
167 float %add9, float %val10,
168 metadata !"round.dynamic",
169 metadata !"fpexcept.strict")
170
171 ret float %add10
172 }
0 ; Test strict 64-bit floating-point addition.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
3 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
5 declare double @foo()
6 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
7
8 ; Check register addition.
9 define double @f1(double %f1, double %f2) {
10 ; CHECK-LABEL: f1:
11 ; CHECK: adbr %f0, %f2
12 ; CHECK: br %r14
13 %res = call double @llvm.experimental.constrained.fadd.f64(
14 double %f1, double %f2,
15 metadata !"round.dynamic",
16 metadata !"fpexcept.strict")
17 ret double %res
18 }
19
20 ; Check the low end of the ADB range.
21 define double @f2(double %f1, double *%ptr) {
22 ; CHECK-LABEL: f2:
23 ; CHECK: adb %f0, 0(%r2)
24 ; CHECK: br %r14
25 %f2 = load double, double *%ptr
26 %res = call double @llvm.experimental.constrained.fadd.f64(
27 double %f1, double %f2,
28 metadata !"round.dynamic",
29 metadata !"fpexcept.strict")
30 ret double %res
31 }
32
33 ; Check the high end of the aligned ADB range.
34 define double @f3(double %f1, double *%base) {
35 ; CHECK-LABEL: f3:
36 ; CHECK: adb %f0, 4088(%r2)
37 ; CHECK: br %r14
38 %ptr = getelementptr double, double *%base, i64 511
39 %f2 = load double, double *%ptr
40 %res = call double @llvm.experimental.constrained.fadd.f64(
41 double %f1, double %f2,
42 metadata !"round.dynamic",
43 metadata !"fpexcept.strict")
44 ret double %res
45 }
46
47 ; Check the next doubleword up, which needs separate address logic.
48 ; Other sequences besides this one would be OK.
49 define double @f4(double %f1, double *%base) {
50 ; CHECK-LABEL: f4:
51 ; CHECK: aghi %r2, 4096
52 ; CHECK: adb %f0, 0(%r2)
53 ; CHECK: br %r14
54 %ptr = getelementptr double, double *%base, i64 512
55 %f2 = load double, double *%ptr
56 %res = call double @llvm.experimental.constrained.fadd.f64(
57 double %f1, double %f2,
58 metadata !"round.dynamic",
59 metadata !"fpexcept.strict")
60 ret double %res
61 }
62
63 ; Check negative displacements, which also need separate address logic.
64 define double @f5(double %f1, double *%base) {
65 ; CHECK-LABEL: f5:
66 ; CHECK: aghi %r2, -8
67 ; CHECK: adb %f0, 0(%r2)
68 ; CHECK: br %r14
69 %ptr = getelementptr double, double *%base, i64 -1
70 %f2 = load double, double *%ptr
71 %res = call double @llvm.experimental.constrained.fadd.f64(
72 double %f1, double %f2,
73 metadata !"round.dynamic",
74 metadata !"fpexcept.strict")
75 ret double %res
76 }
77
78 ; Check that ADB allows indices.
79 define double @f6(double %f1, double *%base, i64 %index) {
80 ; CHECK-LABEL: f6:
81 ; CHECK: sllg %r1, %r3, 3
82 ; CHECK: adb %f0, 800(%r1,%r2)
83 ; CHECK: br %r14
84 %ptr1 = getelementptr double, double *%base, i64 %index
85 %ptr2 = getelementptr double, double *%ptr1, i64 100
86 %f2 = load double, double *%ptr2
87 %res = call double @llvm.experimental.constrained.fadd.f64(
88 double %f1, double %f2,
89 metadata !"round.dynamic",
90 metadata !"fpexcept.strict")
91 ret double %res
92 }
93
94 ; Check that additions of spilled values can use ADB rather than ADBR.
95 define double @f7(double *%ptr0) {
96 ; CHECK-LABEL: f7:
97 ; CHECK: brasl %r14, foo@PLT
98 ; CHECK-SCALAR: adb %f0, 160(%r15)
99 ; CHECK: br %r14
100 %ptr1 = getelementptr double, double *%ptr0, i64 2
101 %ptr2 = getelementptr double, double *%ptr0, i64 4
102 %ptr3 = getelementptr double, double *%ptr0, i64 6
103 %ptr4 = getelementptr double, double *%ptr0, i64 8
104 %ptr5 = getelementptr double, double *%ptr0, i64 10
105 %ptr6 = getelementptr double, double *%ptr0, i64 12
106 %ptr7 = getelementptr double, double *%ptr0, i64 14
107 %ptr8 = getelementptr double, double *%ptr0, i64 16
108 %ptr9 = getelementptr double, double *%ptr0, i64 18
109 %ptr10 = getelementptr double, double *%ptr0, i64 20
110
111 %val0 = load double, double *%ptr0
112 %val1 = load double, double *%ptr1
113 %val2 = load double, double *%ptr2
114 %val3 = load double, double *%ptr3
115 %val4 = load double, double *%ptr4
116 %val5 = load double, double *%ptr5
117 %val6 = load double, double *%ptr6
118 %val7 = load double, double *%ptr7
119 %val8 = load double, double *%ptr8
120 %val9 = load double, double *%ptr9
121 %val10 = load double, double *%ptr10
122
123 %ret = call double @foo()
124
125 %add0 = call double @llvm.experimental.constrained.fadd.f64(
126 double %ret, double %val0,
127 metadata !"round.dynamic",
128 metadata !"fpexcept.strict")
129 %add1 = call double @llvm.experimental.constrained.fadd.f64(
130 double %add0, double %val1,
131 metadata !"round.dynamic",
132 metadata !"fpexcept.strict")
133 %add2 = call double @llvm.experimental.constrained.fadd.f64(
134 double %add1, double %val2,
135 metadata !"round.dynamic",
136 metadata !"fpexcept.strict")
137 %add3 = call double @llvm.experimental.constrained.fadd.f64(
138 double %add2, double %val3,
139 metadata !"round.dynamic",
140 metadata !"fpexcept.strict")
141 %add4 = call double @llvm.experimental.constrained.fadd.f64(
142 double %add3, double %val4,
143 metadata !"round.dynamic",
144 metadata !"fpexcept.strict")
145 %add5 = call double @llvm.experimental.constrained.fadd.f64(
146 double %add4, double %val5,
147 metadata !"round.dynamic",
148 metadata !"fpexcept.strict")
149 %add6 = call double @llvm.experimental.constrained.fadd.f64(
150 double %add5, double %val6,
151 metadata !"round.dynamic",
152 metadata !"fpexcept.strict")
153 %add7 = call double @llvm.experimental.constrained.fadd.f64(
154 double %add6, double %val7,
155 metadata !"round.dynamic",
156 metadata !"fpexcept.strict")
157 %add8 = call double @llvm.experimental.constrained.fadd.f64(
158 double %add7, double %val8,
159 metadata !"round.dynamic",
160 metadata !"fpexcept.strict")
161 %add9 = call double @llvm.experimental.constrained.fadd.f64(
162 double %add8, double %val9,
163 metadata !"round.dynamic",
164 metadata !"fpexcept.strict")
165 %add10 = call double @llvm.experimental.constrained.fadd.f64(
166 double %add9, double %val10,
167 metadata !"round.dynamic",
168 metadata !"fpexcept.strict")
169
170 ret double %add10
171 }
0 ; Test strict 128-bit floating-point addition.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
3
4 declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata)
5
6 ; There is no memory form of 128-bit addition.
7 define void @f1(fp128 *%ptr, float %f2) {
8 ; CHECK-LABEL: f1:
9 ; CHECK-DAG: lxebr %f0, %f0
10 ; CHECK-DAG: ld %f1, 0(%r2)
11 ; CHECK-DAG: ld %f3, 8(%r2)
12 ; CHECK: axbr %f0, %f1
13 ; CHECK: std %f0, 0(%r2)
14 ; CHECK: std %f2, 8(%r2)
15 ; CHECK: br %r14
16 %f1 = load fp128, fp128 *%ptr
17 %f2x = fpext float %f2 to fp128
18 %sum = call fp128 @llvm.experimental.constrained.fadd.f128(
19 fp128 %f1, fp128 %f2x,
20 metadata !"round.dynamic",
21 metadata !"fpexcept.strict")
22 store fp128 %sum, fp128 *%ptr
23 ret void
24 }
0 ; Test strict 128-bit floating-point addition on z14.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
3
4 declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata)
5
6 define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
7 ; CHECK-LABEL: f1:
8 ; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
9 ; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
10 ; CHECK: wfaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
11 ; CHECK: vst [[RES]], 0(%r2)
12 ; CHECK: br %r14
13 %f1 = load fp128, fp128 *%ptr1
14 %f2 = load fp128, fp128 *%ptr2
15 %sum = call fp128 @llvm.experimental.constrained.fadd.f128(
16 fp128 %f1, fp128 %f2,
17 metadata !"round.dynamic",
18 metadata !"fpexcept.strict")
19 store fp128 %sum, fp128 *%ptr1
20 ret void
21 }
0 ; Verify that strict FP operations are not rescheduled
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
3
4 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
5 declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
6 declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
7 declare float @llvm.sqrt.f32(float)
8 declare void @llvm.s390.sfpc(i32)
9
10 ; For non-strict operations, we expect the post-RA scheduler to
11 ; separate the two square root instructions on z13.
12 define void @f1(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
13 ; CHECK-LABEL: f1:
14 ; CHECK: sqebr
15 ; CHECK: {{aebr|sebr}}
16 ; CHECK: sqebr
17 ; CHECK: br %r14
18
19 %add = fadd float %f1, %f2
20 %sub = fsub float %f3, %f4
21 %sqrt1 = call float @llvm.sqrt.f32(float %f2)
22 %sqrt2 = call float @llvm.sqrt.f32(float %f4)
23
24 %ptr1 = getelementptr float, float *%ptr0, i64 1
25 %ptr2 = getelementptr float, float *%ptr0, i64 2
26 %ptr3 = getelementptr float, float *%ptr0, i64 3
27
28 store float %add, float *%ptr0
29 store float %sub, float *%ptr1
30 store float %sqrt1, float *%ptr2
31 store float %sqrt2, float *%ptr3
32
33 ret void
34 }
35
36 ; But for strict operations, this must not happen.
37 define void @f2(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
38 ; CHECK-LABEL: f2:
39 ; CHECK: {{aebr|sebr}}
40 ; CHECK: {{aebr|sebr}}
41 ; CHECK: sqebr
42 ; CHECK: sqebr
43 ; CHECK: br %r14
44
45 %add = call float @llvm.experimental.constrained.fadd.f32(
46 float %f1, float %f2,
47 metadata !"round.dynamic",
48 metadata !"fpexcept.strict")
49 %sub = call float @llvm.experimental.constrained.fsub.f32(
50 float %f3, float %f4,
51 metadata !"round.dynamic",
52 metadata !"fpexcept.strict")
53 %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
54 float %f2,
55 metadata !"round.dynamic",
56 metadata !"fpexcept.strict")
57 %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
58 float %f4,
59 metadata !"round.dynamic",
60 metadata !"fpexcept.strict")
61
62 %ptr1 = getelementptr float, float *%ptr0, i64 1
63 %ptr2 = getelementptr float, float *%ptr0, i64 2
64 %ptr3 = getelementptr float, float *%ptr0, i64 3
65
66 store float %add, float *%ptr0
67 store float %sub, float *%ptr1
68 store float %sqrt1, float *%ptr2
69 store float %sqrt2, float *%ptr3
70
71 ret void
72 }
73
74 ; On the other hand, strict operations that use the fpexcept.ignore
75 ; exception behaviour should be scheduled freely.
76 define void @f3(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
77 ; CHECK-LABEL: f3:
78 ; CHECK: sqebr
79 ; CHECK: {{aebr|sebr}}
80 ; CHECK: sqebr
81 ; CHECK: br %r14
82
83 %add = call float @llvm.experimental.constrained.fadd.f32(
84 float %f1, float %f2,
85 metadata !"round.dynamic",
86 metadata !"fpexcept.ignore")
87 %sub = call float @llvm.experimental.constrained.fsub.f32(
88 float %f3, float %f4,
89 metadata !"round.dynamic",
90 metadata !"fpexcept.ignore")
91 %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
92 float %f2,
93 metadata !"round.dynamic",
94 metadata !"fpexcept.ignore")
95 %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
96 float %f4,
97 metadata !"round.dynamic",
98 metadata !"fpexcept.ignore")
99
100 %ptr1 = getelementptr float, float *%ptr0, i64 1
101 %ptr2 = getelementptr float, float *%ptr0, i64 2
102 %ptr3 = getelementptr float, float *%ptr0, i64 3
103
104 store float %add, float *%ptr0
105 store float %sub, float *%ptr1
106 store float %sqrt1, float *%ptr2
107 store float %sqrt2, float *%ptr3
108
109 ret void
110 }
111
112 ; However, even non-strict operations must not be scheduled across an SFPC.
113 define void @f4(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
114 ; CHECK-LABEL: f4:
115 ; CHECK: {{aebr|sebr}}
116 ; CHECK: {{aebr|sebr}}
117 ; CHECK: sfpc
118 ; CHECK: sqebr
119 ; CHECK: sqebr
120 ; CHECK: br %r14
121
122 %add = fadd float %f1, %f2
123 %sub = fsub float %f3, %f4
124 call void @llvm.s390.sfpc(i32 0)
125 %sqrt1 = call float @llvm.sqrt.f32(float %f2)
126 %sqrt2 = call float @llvm.sqrt.f32(float %f4)
127
128 %ptr1 = getelementptr float, float *%ptr0, i64 1
129 %ptr2 = getelementptr float, float *%ptr0, i64 2
130 %ptr3 = getelementptr float, float *%ptr0, i64 3
131
132 store float %add, float *%ptr0
133 store float %sub, float *%ptr1
134 store float %sqrt1, float *%ptr2
135 store float %sqrt2, float *%ptr3
136
137 ret void
138 }
139
0 ; Test strict floating-point truncations.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
3 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
5 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
6
7 declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
8 declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata)
9 declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata)
10
11 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
12 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
13
14 ; Test f64->f32.
15 define float @f1(double %d1, double %d2) {
16 ; CHECK-LABEL: f1:
17 ; CHECK-SCALAR: ledbr %f0, %f2
18 ; CHECK-VECTOR: ledbra %f0, 0, %f2, 0
19 ; CHECK: br %r14
20 %res = call float @llvm.experimental.constrained.fptrunc.f32.f64(
21 double %d2,
22 metadata !"round.dynamic",
23 metadata !"fpexcept.strict")
24 ret float %res
25 }
26
27 ; Test f128->f32.
28 define float @f2(fp128 *%ptr) {
29 ; CHECK-LABEL: f2:
30 ; CHECK: lexbr %f0, %f0
31 ; CHECK: br %r14
32 %val = load fp128, fp128 *%ptr
33 %res = call float @llvm.experimental.constrained.fptrunc.f32.f128(
34 fp128 %val,
35 metadata !"round.dynamic",
36 metadata !"fpexcept.strict")
37 ret float %res
38 }
39
40 ; Make sure that we don't use %f0 as the destination of LEXBR when %f2
41 ; is still live.
42 define void @f3(float *%dst, fp128 *%ptr, float %d1, float %d2) {
43 ; CHECK-LABEL: f3:
44 ; CHECK: lexbr %f1, %f1
45 ; CHECK: aebr %f1, %f2
46 ; CHECK: ste %f1, 0(%r2)
47 ; CHECK: br %r14
48 %val = load fp128, fp128 *%ptr
49 %conv = call float @llvm.experimental.constrained.fptrunc.f32.f128(
50 fp128 %val,
51 metadata !"round.dynamic",
52 metadata !"fpexcept.strict")
53 %res = call float @llvm.experimental.constrained.fadd.f32(
54 float %conv, float %d2,
55 metadata !"round.dynamic",
56 metadata !"fpexcept.strict")
57 store float %res, float *%dst
58 ret void
59 }
60
61 ; Test f128->f64.
62 define double @f4(fp128 *%ptr) {
63 ; CHECK-LABEL: f4:
64 ; CHECK: ldxbr %f0, %f0
65 ; CHECK: br %r14
66 %val = load fp128, fp128 *%ptr
67 %res = call double @llvm.experimental.constrained.fptrunc.f64.f128(
68 fp128 %val,
69 metadata !"round.dynamic",
70 metadata !"fpexcept.strict")
71 ret double %res
72 }
73
74 ; Like f3, but for f128->f64.
75 define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) {
76 ; CHECK-LABEL: f5:
77 ; CHECK: ldxbr %f1, %f1
78 ; CHECK-SCALAR: adbr %f1, %f2
79 ; CHECK-SCALAR: std %f1, 0(%r2)
80 ; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2
81 ; CHECK-VECTOR: std [[REG]], 0(%r2)
82 ; CHECK: br %r14
83 %val = load fp128, fp128 *%ptr
84 %conv = call double @llvm.experimental.constrained.fptrunc.f64.f128(
85 fp128 %val,
86 metadata !"round.dynamic",
87 metadata !"fpexcept.strict")
88 %res = call double @llvm.experimental.constrained.fadd.f64(
89 double %conv, double %d2,
90 metadata !"round.dynamic",
91 metadata !"fpexcept.strict")
92 store double %res, double *%dst
93 ret void
94 }
0 ; Test strict extensions of f32 to f64.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
3 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
5 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
6
7 declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
8
9 ; Check register extension.
10 define double @f1(float %val) {
11 ; CHECK-LABEL: f1:
12 ; CHECK: ldebr %f0, %f0
13 ; CHECK: br %r14
14 %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
15 metadata !"fpexcept.strict")
16 ret double %res
17 }
18
19 ; Check extension from memory.
20 ; FIXME: This should really use LDEB, but there is no strict "extload" yet.
21 define double @f2(float *%ptr) {
22 ; CHECK-LABEL: f2:
23 ; CHECK-SCALAR: le %f0, 0(%r2)
24 ; CHECK-VECTOR: lde %f0, 0(%r2)
25 ; CHECK: ldebr %f0, %f0
26 ; CHECK: br %r14
27 %val = load float, float *%ptr
28 %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
29 metadata !"fpexcept.strict")
30 ret double %res
31 }
32
0 ; Test strict extensions of f32 to f128.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
3
4 declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata)
5
6 ; Check register extension.
7 define void @f1(fp128 *%dst, float %val) {
8 ; CHECK-LABEL: f1:
9 ; CHECK: lxebr %f0, %f0
10 ; CHECK: std %f0, 0(%r2)
11 ; CHECK: std %f2, 8(%r2)
12 ; CHECK: br %r14
13 %res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
14 metadata !"fpexcept.strict")
15 store fp128 %res, fp128 *%dst
16 ret void
17 }
18
19 ; Check extension from memory.
20 ; FIXME: This should really use LXEB, but there is no strict "extload" yet.
21 define void @f2(fp128 *%dst, float *%ptr) {
22 ; CHECK-LABEL: f2:
23 ; CHECK: le %f0, 0(%r3)
24 ; CHECK: lxebr %f0, %f0
25 ; CHECK: std %f0, 0(%r2)
26 ; CHECK: std %f2, 8(%r2)
27 ; CHECK: br %r14
28 %val = load float, float *%ptr
29 %res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
30 metadata !"fpexcept.strict")
31 store fp128 %res, fp128 *%dst
32 ret void
33 }
34
0 ; Test strict extensions of f64 to f128.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
3
4 declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata)
5
6 ; Check register extension.
7 define void @f1(fp128 *%dst, double %val) {
8 ; CHECK-LABEL: f1:
9 ; CHECK: lxdbr %f0, %f0
10 ; CHECK: std %f0, 0(%r2)
11 ; CHECK: std %f2, 8(%r2)
12 ; CHECK: br %r14
13 %res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
14 metadata !"fpexcept.strict")
15 store fp128 %res, fp128 *%dst
16 ret void
17 }
18
19 ; Check extension from memory.
20 ; FIXME: This should really use LXDB, but there is no strict "extload" yet.
21 define void @f2(fp128 *%dst, double *%ptr) {
22 ; CHECK-LABEL: f2:
23 ; CHECK: ld %f0, 0(%r3)
24 ; CHECK: lxdbr %f0, %f0
25 ; CHECK: std %f0, 0(%r2)
26 ; CHECK: std %f2, 8(%r2)
27 ; CHECK: br %r14
28 %val = load double, double *%ptr
29 %res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
30 metadata !"fpexcept.strict")
31 store fp128 %res, fp128 *%dst
32 ret void
33 }
34
0 ; Test f128 floating-point strict truncations/extensions on z14.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
3
4 declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata)
5 declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata)
6
7 declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata)
8 declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata)
9
10 ; Test f128->f64.
11 define double @f1(fp128 *%ptr) {
12 ; CHECK-LABEL: f1:
13 ; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
14 ; CHECK: wflrx %f0, [[REG]], 0, 0
15 ; CHECK: br %r14
16 %val = load fp128, fp128 *%ptr
17 %res = call double @llvm.experimental.constrained.fptrunc.f64.f128(
18 fp128 %val,
19 metadata !"round.dynamic",
20 metadata !"fpexcept.strict")
21 ret double %res
22 }
23
24 ; Test f128->f32.
25 define float @f2(fp128 *%ptr) {
26 ; CHECK-LABEL: f2:
27 ; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
28 ; CHECK: wflrx %f0, [[REG]], 0, 3
29 ; CHECK: ledbra %f0, 0, %f0, 0
30 ; CHECK: br %r14
31 %val = load fp128, fp128 *%ptr
32 %res = call float @llvm.experimental.constrained.fptrunc.f32.f128(
33 fp128 %val,
34 metadata !"round.dynamic",
35 metadata !"fpexcept.strict")
36 ret float %res
37 }
38
39 ; Test f64->f128.
40 define void @f3(fp128 *%dst, double %val) {
41 ; CHECK-LABEL: f3:
42 ; CHECK: wflld [[RES:%v[0-9]+]], %f0
43 ; CHECK: vst [[RES]], 0(%r2)
44 ; CHECK: br %r14
45 %res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
46 metadata !"fpexcept.strict")
47 store fp128 %res, fp128 *%dst
48 ret void
49 }
50
51 ; Test f32->f128.
52 define void @f4(fp128 *%dst, float %val) {
53 ; CHECK-LABEL: f4:
54 ; CHECK: ldebr %f0, %f0
55 ; CHECK: wflld [[RES:%v[0-9]+]], %f0
56 ; CHECK: vst [[RES]], 0(%r2)
57 ; CHECK: br %r14
58 %res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
59 metadata !"fpexcept.strict")
60 store fp128 %res, fp128 *%dst
61 ret void
62 }
63
0 ; Test strict 32-bit floating-point division.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
3 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
5
6 declare float @foo()
7 declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
8
9 ; Check register division.
10 define float @f1(float %f1, float %f2) {
11 ; CHECK-LABEL: f1:
12 ; CHECK: debr %f0, %f2
13 ; CHECK: br %r14
14 %res = call float @llvm.experimental.constrained.fdiv.f32(
15 float %f1, float %f2,
16 metadata !"round.dynamic",
17 metadata !"fpexcept.strict")
18 ret float %res
19 }
20
21 ; Check the low end of the DEB range.
22 define float @f2(float %f1, float *%ptr) {
23 ; CHECK-LABEL: f2:
24 ; CHECK: deb %f0, 0(%r2)
25 ; CHECK: br %r14
26 %f2 = load float, float *%ptr
27 %res = call float @llvm.experimental.constrained.fdiv.f32(
28 float %f1, float %f2,
29 metadata !"round.dynamic",
30 metadata !"fpexcept.strict")
31 ret float %res
32 }
33
34 ; Check the high end of the aligned DEB range.
35 define float @f3(float %f1, float *%base) {
36 ; CHECK-LABEL: f3:
37 ; CHECK: deb %f0, 4092(%r2)
38 ; CHECK: br %r14
39 %ptr = getelementptr float, float *%base, i64 1023
40 %f2 = load float, float *%ptr
41 %res = call float @llvm.experimental.constrained.fdiv.f32(
42 float %f1, float %f2,
43 metadata !"round.dynamic",
44 metadata !"fpexcept.strict")
45 ret float %res
46 }
47
48 ; Check the next word up, which needs separate address logic.
49 ; Other sequences besides this one would be OK.
50 define float @f4(float %f1, float *%base) {
51 ; CHECK-LABEL: f4:
52 ; CHECK: aghi %r2, 4096
53 ; CHECK: deb %f0, 0(%r2)
54 ; CHECK: br %r14
55 %ptr = getelementptr float, float *%base, i64 1024
56 %f2 = load float, float *%ptr
57 %res = call float @llvm.experimental.constrained.fdiv.f32(
58 float %f1, float %f2,
59 metadata !"round.dynamic",
60 metadata !"fpexcept.strict")
61 ret float %res
62 }
63
64 ; Check negative displacements, which also need separate address logic.
65 define float @f5(float %f1, float *%base) {
66 ; CHECK-LABEL: f5:
67 ; CHECK: aghi %r2, -4
68 ; CHECK: deb %f0, 0(%r2)
69 ; CHECK: br %r14
70 %ptr = getelementptr float, float *%base, i64 -1
71 %f2 = load float, float *%ptr
72 %res = call float @llvm.experimental.constrained.fdiv.f32(
73 float %f1, float %f2,
74 metadata !"round.dynamic",
75 metadata !"fpexcept.strict")
76 ret float %res
77 }
78
79 ; Check that DEB allows indices.
80 define float @f6(float %f1, float *%base, i64 %index) {
81 ; CHECK-LABEL: f6:
82 ; CHECK: sllg %r1, %r3, 2
83 ; CHECK: deb %f0, 400(%r1,%r2)
84 ; CHECK: br %r14
85 %ptr1 = getelementptr float, float *%base, i64 %index
86 %ptr2 = getelementptr float, float *%ptr1, i64 100
87 %f2 = load float, float *%ptr2
88 %res = call float @llvm.experimental.constrained.fdiv.f32(
89 float %f1, float %f2,
90 metadata !"round.dynamic",
91 metadata !"fpexcept.strict")
92 ret float %res
93 }
94
95 ; Check that divisions of spilled values can use DEB rather than DEBR.
96 define float @f7(float *%ptr0) {
97 ; CHECK-LABEL: f7:
98 ; CHECK: brasl %r14, foo@PLT
99 ; CHECK-SCALAR: deb %f0, 16{{[04]}}(%r15)
100 ; CHECK: br %r14
101 %ptr1 = getelementptr float, float *%ptr0, i64 2
102 %ptr2 = getelementptr float, float *%ptr0, i64 4
103 %ptr3 = getelementptr float, float *%ptr0, i64 6
104 %ptr4 = getelementptr float, float *%ptr0, i64 8
105 %ptr5 = getelementptr float, float *%ptr0, i64 10
106 %ptr6 = getelementptr float, float *%ptr0, i64 12
107 %ptr7 = getelementptr float, float *%ptr0, i64 14
108 %ptr8 = getelementptr float, float *%ptr0, i64 16
109 %ptr9 = getelementptr float, float *%ptr0, i64 18
110 %ptr10 = getelementptr float, float *%ptr0, i64 20
111
112 %val0 = load float, float *%ptr0
113 %val1 = load float, float *%ptr1
114 %val2 = load float, float *%ptr2
115 %val3 = load float, float *%ptr3
116 %val4 = load float, float *%ptr4
117 %val5 = load float, float *%ptr5
118 %val6 = load float, float *%ptr6
119 %val7 = load float, float *%ptr7
120 %val8 = load float, float *%ptr8
121 %val9 = load float, float *%ptr9
122 %val10 = load float, float *%ptr10
123
124 %ret = call float @foo()
125
126 %div0 = call float @llvm.experimental.constrained.fdiv.f32(
127 float %ret, float %val0,
128 metadata !"round.dynamic",
129 metadata !"fpexcept.strict")
130 %div1 = call float @llvm.experimental.constrained.fdiv.f32(
131 float %div0, float %val1,
132 metadata !"round.dynamic",
133 metadata !"fpexcept.strict")
134 %div2 = call float @llvm.experimental.constrained.fdiv.f32(
135 float %div1, float %val2,
136 metadata !"round.dynamic",
137 metadata !"fpexcept.strict")
138 %div3 = call float @llvm.experimental.constrained.fdiv.f32(
139 float %div2, float %val3,
140 metadata !"round.dynamic",
141 metadata !"fpexcept.strict")
142 %div4 = call float @llvm.experimental.constrained.fdiv.f32(
143 float %div3, float %val4,
144 metadata !"round.dynamic",
145 metadata !"fpexcept.strict")
146 %div5 = call float @llvm.experimental.constrained.fdiv.f32(
147 float %div4, float %val5,
148 metadata !"round.dynamic",
149 metadata !"fpexcept.strict")
150 %div6 = call float @llvm.experimental.constrained.fdiv.f32(
151 float %div5, float %val6,
152 metadata !"round.dynamic",
153 metadata !"fpexcept.strict")
154 %div7 = call float @llvm.experimental.constrained.fdiv.f32(
155 float %div6, float %val7,
156 metadata !"round.dynamic",
157 metadata !"fpexcept.strict")
158 %div8 = call float @llvm.experimental.constrained.fdiv.f32(
159 float %div7, float %val8,
160 metadata !"round.dynamic",
161 metadata !"fpexcept.strict")
162 %div9 = call float @llvm.experimental.constrained.fdiv.f32(
163 float %div8, float %val9,
164 metadata !"round.dynamic",
165 metadata !"fpexcept.strict")
166 %div10 = call float @llvm.experimental.constrained.fdiv.f32(
167 float %div9, float %val10,
168 metadata !"round.dynamic",
169 metadata !"fpexcept.strict")
170
171 ret float %div10
172 }
0 ; Test strict 64-bit floating-point division.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
3 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
5
6 declare double @foo()
7 declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
8
9 ; Check register division.
10 define double @f1(double %f1, double %f2) {
11 ; CHECK-LABEL: f1:
12 ; CHECK: ddbr %f0, %f2
13 ; CHECK: br %r14
14 %res = call double @llvm.experimental.constrained.fdiv.f64(
15 double %f1, double %f2,
16 metadata !"round.dynamic",
17 metadata !"fpexcept.strict")
18 ret double %res
19 }
20
21 ; Check the low end of the DDB range.
22 define double @f2(double %f1, double *%ptr) {
23 ; CHECK-LABEL: f2:
24 ; CHECK: ddb %f0, 0(%r2)
25 ; CHECK: br %r14
26 %f2 = load double, double *%ptr
27 %res = call double @llvm.experimental.constrained.fdiv.f64(
28 double %f1, double %f2,
29 metadata !"round.dynamic",
30 metadata !"fpexcept.strict")
31 ret double %res
32 }
33
34 ; Check the high end of the aligned DDB range.
35 define double @f3(double %f1, double *%base) {
36 ; CHECK-LABEL: f3:
37 ; CHECK: ddb %f0, 4088(%r2)
38 ; CHECK: br %r14
39 %ptr = getelementptr double, double *%base, i64 511
40 %f2 = load double, double *%ptr
41 %res = call double @llvm.experimental.constrained.fdiv.f64(
42 double %f1, double %f2,
43 metadata !"round.dynamic",
44 metadata !"fpexcept.strict")
45 ret double %res
46 }
47
48 ; Check the next doubleword up, which needs separate address logic.
49 ; Other sequences besides this one would be OK.
50 define double @f4(double %f1, double *%base) {
51 ; CHECK-LABEL: f4:
52 ; CHECK: aghi %r2, 4096
53 ; CHECK: ddb %f0, 0(%r2)
54 ; CHECK: br %r14
55 %ptr = getelementptr double, double *%base, i64 512
56 %f2 = load double, double *%ptr
57 %res = call double @llvm.experimental.constrained.fdiv.f64(
58 double %f1, double %f2,
59 metadata !"round.dynamic",
60 metadata !"fpexcept.strict")
61 ret double %res
62 }
63
64 ; Check negative displacements, which also need separate address logic.
65 define double @f5(double %f1, double *%base) {
66 ; CHECK-LABEL: f5:
67 ; CHECK: aghi %r2, -8
68 ; CHECK: ddb %f0, 0(%r2)
69 ; CHECK: br %r14
70 %ptr = getelementptr double, double *%base, i64 -1
71 %f2 = load double, double *%ptr
72 %res = call double @llvm.experimental.constrained.fdiv.f64(
73 double %f1, double %f2,
74 metadata !"round.dynamic",
75 metadata !"fpexcept.strict")
76 ret double %res
77 }
78
79 ; Check that DDB allows indices.
80 define double @f6(double %f1, double *%base, i64 %index) {
81 ; CHECK-LABEL: f6:
82 ; CHECK: sllg %r1, %r3, 3
83 ; CHECK: ddb %f0, 800(%r1,%r2)
84 ; CHECK: br %r14
85 %ptr1 = getelementptr double, double *%base, i64 %index
86 %ptr2 = getelementptr double, double *%ptr1, i64 100
87 %f2 = load double, double *%ptr2
88 %res = call double @llvm.experimental.constrained.fdiv.f64(
89 double %f1, double %f2,
90 metadata !"round.dynamic",
91 metadata !"fpexcept.strict")
92 ret double %res
93 }
94
95 ; Check that divisions of spilled values can use DDB rather than DDBR.
96 define double @f7(double *%ptr0) {
97 ; CHECK-LABEL: f7:
98 ; CHECK: brasl %r14, foo@PLT
99 ; CHECK-SCALAR: ddb %f0, 160(%r15)
100 ; CHECK: br %r14
101 %ptr1 = getelementptr double, double *%ptr0, i64 2
102 %ptr2 = getelementptr double, double *%ptr0, i64 4
103 %ptr3 = getelementptr double, double *%ptr0, i64 6
104 %ptr4 = getelementptr double, double *%ptr0, i64 8
105 %ptr5 = getelementptr double, double *%ptr0, i64 10
106 %ptr6 = getelementptr double, double *%ptr0, i64 12
107 %ptr7 = getelementptr double, double *%ptr0, i64 14
108 %ptr8 = getelementptr double, double *%ptr0, i64 16
109 %ptr9 = getelementptr double, double *%ptr0, i64 18
110 %ptr10 = getelementptr double, double *%ptr0, i64 20
111
112 %val0 = load double, double *%ptr0
113 %val1 = load double, double *%ptr1
114 %val2 = load double, double *%ptr2
115 %val3 = load double, double *%ptr3
116 %val4 = load double, double *%ptr4
117 %val5 = load double, double *%ptr5
118 %val6 = load double, double *%ptr6
119 %val7 = load double, double *%ptr7
120 %val8 = load double, double *%ptr8
121 %val9 = load double, double *%ptr9
122 %val10 = load double, double *%ptr10
123
124 %ret = call double @foo()
125
126 %div0 = call double @llvm.experimental.constrained.fdiv.f64(
127 double %ret, double %val0,
128 metadata !"round.dynamic",
129 metadata !"fpexcept.strict")
130 %div1 = call double @llvm.experimental.constrained.fdiv.f64(
131 double %div0, double %val1,
132 metadata !"round.dynamic",
133 metadata !"fpexcept.strict")
134 %div2 = call double @llvm.experimental.constrained.fdiv.f64(
135 double %div1, double %val2,
136 metadata !"round.dynamic",
137 metadata !"fpexcept.strict")
138 %div3 = call double @llvm.experimental.constrained.fdiv.f64(
139 double %div2, double %val3,
140 metadata !"round.dynamic",
141 metadata !"fpexcept.strict")
142 %div4 = call double @llvm.experimental.constrained.fdiv.f64(
143 double %div3, double %val4,
144 metadata !"round.dynamic",
145 metadata !"fpexcept.strict")
146 %div5 = call double @llvm.experimental.constrained.fdiv.f64(
147 double %div4, double %val5,
148 metadata !"round.dynamic",
149 metadata !"fpexcept.strict")
150 %div6 = call double @llvm.experimental.constrained.fdiv.f64(
151 double %div5, double %val6,
152 metadata !"round.dynamic",
153 metadata !"fpexcept.strict")
154 %div7 = call double @llvm.experimental.constrained.fdiv.f64(
155 double %div6, double %val7,
156 metadata !"round.dynamic",
157 metadata !"fpexcept.strict")
158 %div8 = call double @llvm.experimental.constrained.fdiv.f64(
159 double %div7, double %val8,
160 metadata !"round.dynamic",
161 metadata !"fpexcept.strict")
162 %div9 = call double @llvm.experimental.constrained.fdiv.f64(
163 double %div8, double %val9,
164 metadata !"round.dynamic",
165 metadata !"fpexcept.strict")
166 %div10 = call double @llvm.experimental.constrained.fdiv.f64(
167 double %div9, double %val10,
168 metadata !"round.dynamic",
169 metadata !"fpexcept.strict")
170
171 ret double %div10
172 }
0 ; Test strict 128-bit floating-point division.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
3
4 declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata)
5
6 ; There is no memory form of 128-bit division.
7 define void @f1(fp128 *%ptr, float %f2) {
8 ; CHECK-LABEL: f1:
9 ; CHECK-DAG: lxebr %f0, %f0
10 ; CHECK-DAG: ld %f1, 0(%r2)
11 ; CHECK-DAG: ld %f3, 8(%r2)
12 ; CHECK: dxbr %f1, %f0
13 ; CHECK: std %f1, 0(%r2)
14 ; CHECK: std %f3, 8(%r2)
15 ; CHECK: br %r14
16 %f1 = load fp128, fp128 *%ptr
17 %f2x = fpext float %f2 to fp128
18 %sum = call fp128 @llvm.experimental.constrained.fdiv.f128(
19 fp128 %f1, fp128 %f2x,
20 metadata !"round.dynamic",
21 metadata !"fpexcept.strict")
22 store fp128 %sum, fp128 *%ptr
23 ret void
24 }
0 ; Test strict 128-bit floating-point division on z14.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
3
4 declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata)
5
6 define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
7 ; CHECK-LABEL: f1:
8 ; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
9 ; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
10 ; CHECK: wfdxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
11 ; CHECK: vst [[RES]], 0(%r2)
12 ; CHECK: br %r14
13 %f1 = load fp128, fp128 *%ptr1
14 %f2 = load fp128, fp128 *%ptr2
15 %sum = call fp128 @llvm.experimental.constrained.fdiv.f128(
16 fp128 %f1, fp128 %f2,
17 metadata !"round.dynamic",
18 metadata !"fpexcept.strict")
19 store fp128 %sum, fp128 *%ptr1
20 ret void
21 }
0 ; Test strict multiplication of two f32s, producing an f32 result.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
3 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
5
6 declare float @foo()
7 declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
8
9 ; Check register multiplication.
10 define float @f1(float %f1, float %f2) {
11 ; CHECK-LABEL: f1:
12 ; CHECK: meebr %f0, %f2
13 ; CHECK: br %r14
14 %res = call float @llvm.experimental.constrained.fmul.f32(
15 float %f1, float %f2,
16 metadata !"round.dynamic",
17 metadata !"fpexcept.strict")
18 ret float %res
19 }
20
21 ; Check the low end of the MEEB range.
22 define float @f2(float %f1, float *%ptr) {
23 ; CHECK-LABEL: f2:
24 ; CHECK: meeb %f0, 0(%r2)
25 ; CHECK: br %r14
26 %f2 = load float, float *%ptr
27 %res = call float @llvm.experimental.constrained.fmul.f32(
28 float %f1, float %f2,
29 metadata !"round.dynamic",
30 metadata !"fpexcept.strict")
31 ret float %res
32 }
33
34 ; Check the high end of the aligned MEEB range.
35 define float @f3(float %f1, float *%base) {
36 ; CHECK-LABEL: f3:
37 ; CHECK: meeb %f0, 4092(%r2)
38 ; CHECK: br %r14
39 %ptr = getelementptr float, float *%base, i64 1023
40 %f2 = load float, float *%ptr
41 %res = call float @llvm.experimental.constrained.fmul.f32(
42 float %f1, float %f2,
43 metadata !"round.dynamic",
44 metadata !"fpexcept.strict")
45 ret float %res
46 }
47
48 ; Check the next word up, which needs separate address logic.
49 ; Other sequences besides this one would be OK.
50 define float @f4(float %f1, float *%base) {
51 ; CHECK-LABEL: f4:
52 ; CHECK: aghi %r2, 4096
53 ; CHECK: meeb %f0, 0(%r2)
54 ; CHECK: br %r14
55 %ptr = getelementptr float, float *%base, i64 1024
56 %f2 = load float, float *%ptr
57 %res = call float @llvm.experimental.constrained.fmul.f32(
58 float %f1, float %f2,
59 metadata !"round.dynamic",
60 metadata !"fpexcept.strict")
61 ret float %res
62 }
63
64 ; Check negative displacements, which also need separate address logic.
65 define float @f5(float %f1, float *%base) {
66 ; CHECK-LABEL: f5:
67 ; CHECK: aghi %r2, -4
68 ; CHECK: meeb %f0, 0(%r2)
69 ; CHECK: br %r14
70 %ptr = getelementptr float, float *%base, i64 -1
71 %f2 = load float, float *%ptr
72 %res = call float @llvm.experimental.constrained.fmul.f32(
73 float %f1, float %f2,
74 metadata !"round.dynamic",
75 metadata !"fpexcept.strict")
76 ret float %res
77 }
78
79 ; Check that MEEB allows indices.
80 define float @f6(float %f1, float *%base, i64 %index) {
81 ; CHECK-LABEL: f6:
82 ; CHECK: sllg %r1, %r3, 2
83 ; CHECK: meeb %f0, 400(%r1,%r2)
84 ; CHECK: br %r14
85 %ptr1 = getelementptr float, float *%base, i64 %index
86 %ptr2 = getelementptr float, float *%ptr1, i64 100
87 %f2 = load float, float *%ptr2
88 %res = call float @llvm.experimental.constrained.fmul.f32(
89 float %f1, float %f2,
90 metadata !"round.dynamic",
91 metadata !"fpexcept.strict")
92 ret float %res
93 }
94
95 ; Check that multiplications of spilled values can use MEEB rather than MEEBR.
96 define float @f7(float *%ptr0) {
97 ; CHECK-LABEL: f7:
98 ; CHECK: brasl %r14, foo@PLT
99 ; CHECK-SCALAR: meeb %f0, 16{{[04]}}(%r15)
100 ; CHECK: br %r14
101 %ptr1 = getelementptr float, float *%ptr0, i64 2
102 %ptr2 = getelementptr float, float *%ptr0, i64 4
103 %ptr3 = getelementptr float, float *%ptr0, i64 6
104 %ptr4 = getelementptr float, float *%ptr0, i64 8
105 %ptr5 = getelementptr float, float *%ptr0, i64 10
106 %ptr6 = getelementptr float, float *%ptr0, i64 12
107 %ptr7 = getelementptr float, float *%ptr0, i64 14
108 %ptr8 = getelementptr float, float *%ptr0, i64 16
109 %ptr9 = getelementptr float, float *%ptr0, i64 18
110 %ptr10 = getelementptr float, float *%ptr0, i64 20
111
112 %val0 = load float, float *%ptr0
113 %val1 = load float, float *%ptr1
114 %val2 = load float, float *%ptr2
115 %val3 = load float, float *%ptr3
116 %val4 = load float, float *%ptr4
117 %val5 = load float, float *%ptr5
118 %val6 = load float, float *%ptr6
119 %val7 = load float, float *%ptr7
120 %val8 = load float, float *%ptr8
121 %val9 = load float, float *%ptr9
122 %val10 = load float, float *%ptr10
123
124 %ret = call float @foo()
125
126 %mul0 = call float @llvm.experimental.constrained.fmul.f32(
127 float %ret, float %val0,
128 metadata !"round.dynamic",
129 metadata !"fpexcept.strict")
130 %mul1 = call float @llvm.experimental.constrained.fmul.f32(
131 float %mul0, float %val1,
132 metadata !"round.dynamic",
133 metadata !"fpexcept.strict")
134 %mul2 = call float @llvm.experimental.constrained.fmul.f32(
135 float %mul1, float %val2,
136 metadata !"round.dynamic",
137 metadata !"fpexcept.strict")
138 %mul3 = call float @llvm.experimental.constrained.fmul.f32(
139 float %mul2, float %val3,
140 metadata !"round.dynamic",
141 metadata !"fpexcept.strict")
142 %mul4 = call float @llvm.experimental.constrained.fmul.f32(
143 float %mul3, float %val4,
144 metadata !"round.dynamic",
145 metadata !"fpexcept.strict")
146 %mul5 = call float @llvm.experimental.constrained.fmul.f32(
147 float %mul4, float %val5,
148 metadata !"round.dynamic",
149 metadata !"fpexcept.strict")
150 %mul6 = call float @llvm.experimental.constrained.fmul.f32(
151 float %mul5, float %val6,
152 metadata !"round.dynamic",
153 metadata !"fpexcept.strict")
154 %mul7 = call float @llvm.experimental.constrained.fmul.f32(
155 float %mul6, float %val7,
156 metadata !"round.dynamic",
157 metadata !"fpexcept.strict")
158 %mul8 = call float @llvm.experimental.constrained.fmul.f32(
159 float %mul7, float %val8,
160 metadata !"round.dynamic",
161 metadata !"fpexcept.strict")
162 %mul9 = call float @llvm.experimental.constrained.fmul.f32(
163 float %mul8, float %val9,
164 metadata !"round.dynamic",
165 metadata !"fpexcept.strict")
166 %mul10 = call float @llvm.experimental.constrained.fmul.f32(
167 float %mul9, float %val10,
168 metadata !"round.dynamic",
169 metadata !"fpexcept.strict")
170
171 ret float %mul10
172 }
0 ; Test strict multiplication of two f32s, producing an f64 result.
1 ; FIXME: we do not have a strict version of fpext yet
2 ;
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5 declare float @foo()
6 declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
7
8 ; Check register multiplication.
9 define double @f1(float %f1, float %f2) {
10 ; CHECK-LABEL: f1:
11 ; CHECK: mdebr %f0, %f2
12 ; CHECK: br %r14
13 %f1x = fpext float %f1 to double
14 %f2x = fpext float %f2 to double
15 %res = call double @llvm.experimental.constrained.fmul.f64(
16 double %f1x, double %f2x,
17 metadata !"round.dynamic",
18 metadata !"fpexcept.strict")
19 ret double %res
20 }
21
22 ; Check the low end of the MDEB range.
23 define double @f2(float %f1, float *%ptr) {
24 ; CHECK-LABEL: f2:
25 ; CHECK: mdeb %f0, 0(%r2)
26 ; CHECK: br %r14
27 %f2 = load float, float *%ptr
28 %f1x = fpext float %f1 to double
29 %f2x = fpext float %f2 to double
30 %res = call double @llvm.experimental.constrained.fmul.f64(
31 double %f1x, double %f2x,
32 metadata !"round.dynamic",
33 metadata !"fpexcept.strict")
34 ret double %res
35 }
36
37 ; Check the high end of the aligned MDEB range.
38 define double @f3(float %f1, float *%base) {
39 ; CHECK-LABEL: f3:
40 ; CHECK: mdeb %f0, 4092(%r2)
41 ; CHECK: br %r14
42 %ptr = getelementptr float, float *%base, i64 1023
43 %f2 = load float, float *%ptr
44 %f1x = fpext float %f1 to double
45 %f2x = fpext float %f2 to double
46 %res = call double @llvm.experimental.constrained.fmul.f64(
47 double %f1x, double %f2x,
48 metadata !"round.dynamic",
49 metadata !"fpexcept.strict")
50 ret double %res
51 }
52
53 ; Check the next word up, which needs separate address logic.
54 ; Other sequences besides this one would be OK.
55 define double @f4(float %f1, float *%base) {
56 ; CHECK-LABEL: f4:
57 ; CHECK: aghi %r2, 4096
58 ; CHECK: mdeb %f0, 0(%r2)
59 ; CHECK: br %r14
60 %ptr = getelementptr float, float *%base, i64 1024
61 %f2 = load float, float *%ptr
62 %f1x = fpext float %f1 to double
63 %f2x = fpext float %f2 to double
64 %res = call double @llvm.experimental.constrained.fmul.f64(
65 double %f1x, double %f2x,
66 metadata !"round.dynamic",
67 metadata !"fpexcept.strict")
68 ret double %res
69 }
70
71 ; Check negative displacements, which also need separate address logic.
72 define double @f5(float %f1, float *%base) {
73 ; CHECK-LABEL: f5:
74 ; CHECK: aghi %r2, -4
75 ; CHECK: mdeb %f0, 0(%r2)
76 ; CHECK: br %r14
77 %ptr = getelementptr float, float *%base, i64 -1
78 %f2 = load float, float *%ptr
79 %f1x = fpext float %f1 to double
80 %f2x = fpext float %f2 to double
81 %res = call double @llvm.experimental.constrained.fmul.f64(
82 double %f1x, double %f2x,
83 metadata !"round.dynamic",
84 metadata !"fpexcept.strict")
85 ret double %res
86 }
87
88 ; Check that MDEB allows indices.
89 define double @f6(float %f1, float *%base, i64 %index) {
90 ; CHECK-LABEL: f6:
91 ; CHECK: sllg %r1, %r3, 2
92 ; CHECK: mdeb %f0, 400(%r1,%r2)
93 ; CHECK: br %r14
94 %ptr1 = getelementptr float, float *%base, i64 %index
95 %ptr2 = getelementptr float, float *%ptr1, i64 100
96 %f2 = load float, float *%ptr2
97 %f1x = fpext float %f1 to double
98 %f2x = fpext float %f2 to double
99 %res = call double @llvm.experimental.constrained.fmul.f64(
100 double %f1x, double %f2x,
101 metadata !"round.dynamic",
102 metadata !"fpexcept.strict")
103 ret double %res
104 }
105
106 ; Check that multiplications of spilled values can use MDEB rather than MDEBR.
107 define float @f7(float *%ptr0) {
108 ; CHECK-LABEL: f7:
109 ; CHECK: brasl %r14, foo@PLT
110 ; CHECK: mdeb %f0, 16{{[04]}}(%r15)
111 ; CHECK: br %r14
112 %ptr1 = getelementptr float, float *%ptr0, i64 2
113 %ptr2 = getelementptr float, float *%ptr0, i64 4
114 %ptr3 = getelementptr float, float *%ptr0, i64 6
115 %ptr4 = getelementptr float, float *%ptr0, i64 8
116 %ptr5 = getelementptr float, float *%ptr0, i64 10
117 %ptr6 = getelementptr float, float *%ptr0, i64 12
118 %ptr7 = getelementptr float, float *%ptr0, i64 14
119 %ptr8 = getelementptr float, float *%ptr0, i64 16
120 %ptr9 = getelementptr float, float *%ptr0, i64 18
121 %ptr10 = getelementptr float, float *%ptr0, i64 20
122
123 %val0 = load float, float *%ptr0
124 %val1 = load float, float *%ptr1
125 %val2 = load float, float *%ptr2
126 %val3 = load float, float *%ptr3
127 %val4 = load float, float *%ptr4
128 %val5 = load float, float *%ptr5
129 %val6 = load float, float *%ptr6
130 %val7 = load float, float *%ptr7
131 %val8 = load float, float *%ptr8
132 %val9 = load float, float *%ptr9
133 %val10 = load float, float *%ptr10
134
135 %frob0 = fadd float %val0, %val0
136 %frob1 = fadd float %val1, %val1
137 %frob2 = fadd float %val2, %val2
138 %frob3 = fadd float %val3, %val3
139 %frob4 = fadd float %val4, %val4
140 %frob5 = fadd float %val5, %val5
141 %frob6 = fadd float %val6, %val6
142 %frob7 = fadd float %val7, %val7
143 %frob8 = fadd float %val8, %val8
144 %frob9 = fadd float %val9, %val9
145 %frob10 = fadd float %val9, %val10
146
147 store float %frob0, float *%ptr0
148 store float %frob1, float *%ptr1
149 store float %frob2, float *%ptr2
150 store float %frob3, float *%ptr3
151 store float %frob4, float *%ptr4
152 store float %frob5, float *%ptr5
153 store float %frob6, float *%ptr6
154 store float %frob7, float *%ptr7
155 store float %frob8, float *%ptr8
156 store float %frob9, float *%ptr9
157 store float %frob10, float *%ptr10
158
159 %ret = call float @foo()
160
161 %accext0 = fpext float %ret to double
162 %ext0 = fpext float %frob0 to double
163 %mul0 = call double @llvm.experimental.constrained.fmul.f64(
164 double %accext0, double %ext0,
165 metadata !"round.dynamic",
166 metadata !"fpexcept.strict")
167 %extra0 = call double @llvm.experimental.constrained.fmul.f64(
168 double %mul0, double 1.01,
169 metadata !"round.dynamic",
170 metadata !"fpexcept.strict")
171 %trunc0 = fptrunc double %extra0 to float
172
173 %accext1 = fpext float %trunc0 to double
174 %ext1 = fpext float %frob1 to double
175 %mul1 = call double @llvm.experimental.constrained.fmul.f64(
176 double %accext1, double %ext1,
177 metadata !"round.dynamic",
178 metadata !"fpexcept.strict")
179 %extra1 = call double @llvm.experimental.constrained.fmul.f64(
180 double %mul1, double 1.11,
181 metadata !"round.dynamic",
182 metadata !"fpexcept.strict")
183 %trunc1 = fptrunc double %extra1 to float
184
185 %accext2 = fpext float %trunc1 to double
186 %ext2 = fpext float %frob2 to double
187 %mul2 = call double @llvm.experimental.constrained.fmul.f64(
188 double %accext2, double %ext2,
189 metadata !"round.dynamic",
190 metadata !"fpexcept.strict")
191 %extra2 = call double @llvm.experimental.constrained.fmul.f64(
192 double %mul2, double 1.21,
193 metadata !"round.dynamic",
194 metadata !"fpexcept.strict")
195 %trunc2 = fptrunc double %extra2 to float
196
197 %accext3 = fpext float %trunc2 to double
198 %ext3 = fpext float %frob3 to double
199 %mul3 = call double @llvm.experimental.constrained.fmul.f64(
200 double %accext3, double %ext3,
201 metadata !"round.dynamic",
202 metadata !"fpexcept.strict")
203 %extra3 = call double @llvm.experimental.constrained.fmul.f64(
204 double %mul3, double 1.31,
205 metadata !"round.dynamic",
206 metadata !"fpexcept.strict")
207 %trunc3 = fptrunc double %extra3 to float
208
209 %accext4 = fpext float %trunc3 to double
210 %ext4 = fpext float %frob4 to double
211 %mul4 = call double @llvm.experimental.constrained.fmul.f64(
212 double %accext4, double %ext4,
213 metadata !"round.dynamic",
214 metadata !"fpexcept.strict")
215 %extra4 = call double @llvm.experimental.constrained.fmul.f64(
216 double %mul4, double 1.41,
217 metadata !"round.dynamic",
218 metadata !"fpexcept.strict")
219 %trunc4 = fptrunc double %extra4 to float
220
221 %accext5 = fpext float %trunc4 to double
222 %ext5 = fpext float %frob5 to double
223 %mul5 = call double @llvm.experimental.constrained.fmul.f64(
224 double %accext5, double %ext5,
225 metadata !"round.dynamic",
226 metadata !"fpexcept.strict")
227 %extra5 = call double @llvm.experimental.constrained.fmul.f64(
228 double %mul5, double 1.51,
229 metadata !"round.dynamic",
230 metadata !"fpexcept.strict")
231 %trunc5 = fptrunc double %extra5 to float
232
233 %accext6 = fpext float %trunc5 to double
234 %ext6 = fpext float %frob6 to double
235 %mul6 = call double @llvm.experimental.constrained.fmul.f64(
236 double %accext6, double %ext6,
237 metadata !"round.dynamic",
238 metadata !"fpexcept.strict")
239 %extra6 = call double @llvm.experimental.constrained.fmul.f64(
240 double %mul6, double 1.61,
241 metadata !"round.dynamic",
242 metadata !"fpexcept.strict")
243 %trunc6 = fptrunc double %extra6 to float
244
245 %accext7 = fpext float %trunc6 to double
246 %ext7 = fpext float %frob7 to double
247 %mul7 = call double @llvm.experimental.constrained.fmul.f64(
248 double %accext7, double %ext7,
249 metadata !"round.dynamic",
250 metadata !"fpexcept.strict")
251 %extra7 = call double @llvm.experimental.constrained.fmul.f64(
252 double %mul7, double 1.71,
253 metadata !"round.dynamic",
254 metadata !"fpexcept.strict")
255 %trunc7 = fptrunc double %extra7 to float
256
257 %accext8 = fpext float %trunc7 to double
258 %ext8 = fpext float %frob8 to double
259 %mul8 = call double @llvm.experimental.constrained.fmul.f64(
260 double %accext8, double %ext8,
261 metadata !"round.dynamic",
262 metadata !"fpexcept.strict")
263 %extra8 = call double @llvm.experimental.constrained.fmul.f64(
264 double %mul8, double 1.81,
265 metadata !"round.dynamic",
266 metadata !"fpexcept.strict")
267 %trunc8 = fptrunc double %extra8 to float
268
269 %accext9 = fpext float %trunc8 to double
270 %ext9 = fpext float %frob9 to double
271 %mul9 = call double @llvm.experimental.constrained.fmul.f64(
272 double %accext9, double %ext9,
273 metadata !"round.dynamic",
274 metadata !"fpexcept.strict")
275 %extra9 = call double @llvm.experimental.constrained.fmul.f64(
276 double %mul9, double 1.91,
277 metadata !"round.dynamic",
278 metadata !"fpexcept.strict")
279 %trunc9 = fptrunc double %extra9 to float
280
281 ret float %trunc9
282 }
0 ; Test strict multiplication of two f64s, producing an f64 result.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
3 ; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
5
6 declare double @foo()
7 declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
8
9 ; Check register multiplication.
10 define double @f1(double %f1, double %f2) {
11 ; CHECK-LABEL: f1:
12 ; CHECK: mdbr %f0, %f2
13 ; CHECK: br %r14
14 %res = call double @llvm.experimental.constrained.fmul.f64(
15 double %f1, double %f2,
16 metadata !"round.dynamic",
17 metadata !"fpexcept.strict")
18 ret double %res
19 }
20
21 ; Check the low end of the MDB range.
22 define double @f2(double %f1, double *%ptr) {
23 ; CHECK-LABEL: f2:
24 ; CHECK: mdb %f0, 0(%r2)
25 ; CHECK: br %r14
26 %f2 = load double, double *%ptr
27 %res = call double @llvm.experimental.constrained.fmul.f64(
28 double %f1, double %f2,
29 metadata !"round.dynamic",
30 metadata !"fpexcept.strict")
31 ret double %res
32 }
33
34 ; Check the high end of the aligned MDB range.
35 define double @f3(double %f1, double *%base) {
36 ; CHECK-LABEL: f3:
37 ; CHECK: mdb %f0, 4088(%r2)
38 ; CHECK: br %r14
39 %ptr = getelementptr double, double *%base, i64 511
40 %f2 = load double, double *%ptr
41 %res = call double @llvm.experimental.constrained.fmul.f64(
42 double %f1, double %f2,
43 metadata !"round.dynamic",
44 metadata !"fpexcept.strict")
45 ret double %res
46 }
47
48 ; Check the next doubleword up, which needs separate address logic.
49 ; Other sequences besides this one would be OK.
50 define double @f4(double %f1, double *%base) {
51 ; CHECK-LABEL: f4:
52 ; CHECK: aghi %r2, 4096
53 ; CHECK: mdb %f0, 0(%r2)
54 ; CHECK: br %r14
55 %ptr = getelementptr double, double *%base, i64 512
56 %f2 = load double, double *%ptr
57 %res = call double @llvm.experimental.constrained.fmul.f64(
58 double %f1, double %f2,
59 metadata !"round.dynamic",
60 metadata !"fpexcept.strict")
61 ret double %res
62 }
63
64 ; Check negative displacements, which also need separate address logic.
65 define double @f5(double %f1, double *%base) {
66 ; CHECK-LABEL: f5:
67 ; CHECK: aghi %r2, -8
68 ; CHECK: mdb %f0, 0(%r2)
69 ; CHECK: br %r14
70 %ptr = getelementptr double, double *%base, i64 -1
71 %f2 = load double, double *%ptr
72 %res = call double @llvm.experimental.constrained.fmul.f64(
73 double %f1, double %f2,
74 metadata !"round.dynamic",
75 metadata !"fpexcept.strict")
76 ret double %res
77 }
78
79 ; Check that MDB allows indices.
80 define double @f6(double %f1, double *%base, i64 %index) {
81 ; CHECK-LABEL: f6:
82 ; CHECK: sllg %r1, %r3, 3
83 ; CHECK: mdb %f0, 800(%r1,%r2)
84 ; CHECK: br %r14
85 %ptr1 = getelementptr double, double *%base, i64 %index
86 %ptr2 = getelementptr double, double *%ptr1, i64 100
87 %f2 = load double, double *%ptr2
88 %res = call double @llvm.experimental.constrained.fmul.f64(
89 double %f1, double %f2,
90 metadata !"round.dynamic",
91 metadata !"fpexcept.strict")
92 ret double %res
93 }
94
95 ; Check that multiplications of spilled values can use MDB rather than MDBR.
96 define double @f7(double *%ptr0) {
97 ; CHECK-LABEL: f7:
98 ; CHECK: brasl %r14, foo@PLT
99 ; CHECK-SCALAR: mdb %f0, 160(%r15)
100 ; CHECK: br %r14
101 %ptr1 = getelementptr double, double *%ptr0, i64 2
102 %ptr2 = getelementptr double, double *%ptr0, i64 4
103 %ptr3 = getelementptr double, double *%ptr0, i64 6
104 %ptr4 = getelementptr double, double *%ptr0, i64 8
105 %ptr5 = getelementptr double, double *%ptr0, i64 10
106 %ptr6 = getelementptr double, double *%ptr0, i64 12
107 %ptr7 = getelementptr double, double *%ptr0, i64 14
108 %ptr8 = getelementptr double, double *%ptr0, i64 16
109 %ptr9 = getelementptr double, double *%ptr0, i64 18
110 %ptr10 = getelementptr double, double *%ptr0, i64 20
111
112 %val0 = load double, double *%ptr0
113 %val1 = load double, double *%ptr1
114 %val2 = load double, double *%ptr2
115 %val3 = load double, double *%ptr3
116 %val4 = load double, double *%ptr4
117 %val5 = load double, double *%ptr5
118 %val6 = load double, double *%ptr6
119 %val7 = load double, double *%ptr7
120 %val8 = load double, double *%ptr8
121 %val9 = load double, double *%ptr9
122 %val10 = load double, double *%ptr10
123
124 %ret = call double @foo()
125
126 %mul0 = call double @llvm.experimental.constrained.fmul.f64(
127 double %ret, double %val0,
128 metadata !"round.dynamic",
129 metadata !"fpexcept.strict")
130 %mul1 = call double @llvm.experimental.constrained.fmul.f64(
131 double %mul0, double %val1,
132 metadata !"round.dynamic",
133 metadata !"fpexcept.strict")
134 %mul2 = call double @llvm.experimental.constrained.fmul.f64(
135 double %mul1, double %val2,
136 metadata !"round.dynamic",
137 metadata !"fpexcept.strict")
138 %mul3 = call double @llvm.experimental.constrained.fmul.f64(
139 double %mul2, double %val3,
140 metadata !"round.dynamic",
141 metadata !"fpexcept.strict")
142 %mul4 = call double @llvm.experimental.constrained.fmul.f64(
143 double %mul3, double %val4,
144 metadata !"round.dynamic",
145 metadata !"fpexcept.strict")
146 %mul5 = call double @llvm.experimental.constrained.fmul.f64(
147 double %mul4, double %val5,
148 metadata !"round.dynamic",
149 metadata !"fpexcept.strict")
150 %mul6 = call double @llvm.experimental.constrained.fmul.f64(
151 double %mul5, double %val6,
152 metadata !"round.dynamic",
153 metadata !"fpexcept.strict")
154 %mul7 = call double @llvm.experimental.constrained.fmul.f64(
155 double %mul6, double %val7,
156 metadata !"round.dynamic",
157 metadata !"fpexcept.strict")
158 %mul8 = call double @llvm.experimental.constrained.fmul.f64(
159 double %mul7, double %val8,
160 metadata !"round.dynamic",
161 metadata !"fpexcept.strict")
162 %mul9 = call double @llvm.experimental.constrained.fmul.f64(
163 double %mul8, double %val9,
164 metadata !"round.dynamic",
165 metadata !"fpexcept.strict")
166 %mul10 = call double @llvm.experimental.constrained.fmul.f64(
167 double %mul9, double %val10,
168 metadata !"round.dynamic",
169 metadata !"fpexcept.strict")
170
171 ret double %mul10
172 }
0 ; Test strict multiplication of two f64s, producing an f128 result.
1 ; FIXME: we do not have a strict version of fpext yet
2 ;
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5 declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
6
7 declare double @foo()
8
9 ; Check register multiplication. "mxdbr %f0, %f2" is not valid from LLVM's
10 ; point of view, because %f2 is the low register of the FP128 %f0. Pass the
11 ; multiplier in %f4 instead.
12 define void @f1(double %f1, double %dummy, double %f2, fp128 *%dst) {
13 ; CHECK-LABEL: f1:
14 ; CHECK: mxdbr %f0, %f4
15 ; CHECK: std %f0, 0(%r2)
16 ; CHECK: std %f2, 8(%r2)
17 ; CHECK: br %r14
18 %f1x = fpext double %f1 to fp128
19 %f2x = fpext double %f2 to fp128
20 %res = call fp128 @llvm.experimental.constrained.fmul.f128(
21 fp128 %f1x, fp128 %f2x,
22 metadata !"round.dynamic",
23 metadata !"fpexcept.strict")
24 store fp128 %res, fp128 *%dst
25 ret void
26 }
27
28 ; Check the low end of the MXDB range.
29 define void @f2(double %f1, double *%ptr, fp128 *%dst) {
30 ; CHECK-LABEL: f2:
31 ; CHECK: mxdb %f0, 0(%r2)
32 ; CHECK: std %f0, 0(%r3)
33 ; CHECK: std %f2, 8(%r3)
34 ; CHECK: br %r14
35 %f2 = load double, double *%ptr
36 %f1x = fpext double %f1 to fp128
37 %f2x = fpext double %f2 to fp128
38 %res = call fp128 @llvm.experimental.constrained.fmul.f128(
39 fp128 %f1x, fp128 %f2x,
40 metadata !"round.dynamic",
41 metadata !"fpexcept.strict")
42 store fp128 %res, fp128 *%dst
43 ret void
44 }
45
46 ; Check the high end of the aligned MXDB range.
47 define void @f3(double %f1, double *%base, fp128 *%dst) {
48 ; CHECK-LABEL: f3:
49 ; CHECK: mxdb %f0, 4088(%r2)
50 ; CHECK: std %f0, 0(%r3)
51 ; CHECK: std %f2, 8(%r3)
52 ; CHECK: br %r14
53 %ptr = getelementptr double, double *%base, i64 511
54 %f2 = load double, double *%ptr
55 %f1x = fpext double %f1 to fp128
56 %f2x = fpext double %f2 to fp128
57 %res = call fp128 @llvm.experimental.constrained.fmul.f128(
58 fp128 %f1x, fp128 %f2x,
59 metadata !"round.dynamic",
60 metadata !"fpexcept.strict")
61 store fp128 %res, fp128 *%dst
62 ret void
63 }
64
65 ; Check the next doubleword up, which needs separate address logic.
66 ; Other sequences besides this one would be OK.
67 define void @f4(double %f1, double *%base, fp128 *%dst) {
68 ; CHECK-LABEL: f4:
69 ; CHECK: aghi %r2, 4096
70 ; CHECK: mxdb %f0, 0(%r2)
71 ; CHECK: std %f0, 0(%r3)
72 ; CHECK: std %f2, 8(%r3)
73 ; CHECK: br %r14
74 %ptr = getelementptr double, double *%base, i64 512
75 %f2 = load double, double *%ptr
76 %f1x = fpext double %f1 to fp128
77 %f2x = fpext double %f2 to fp128
78 %res = call fp128 @llvm.experimental.constrained.fmul.f128(
79 fp128 %f1x, fp128 %f2x,
80 metadata !"round.dynamic",
81 metadata !"fpexcept.strict")
82 store fp128 %res, fp128 *%dst
83 ret void
84 }
85
86 ; Check negative displacements, which also need separate address logic.