llvm.org GIT mirror llvm / ee0d992
Add support for fast-math flags to the FCmp instruction. FCmp behaves a lot like a floating-point binary operator in many ways, and can benefit from fast-math information. Flags such as nsz and nnan can affect if this fcmp (in combination with a select) can be treated as a fminnum/fmaxnum operation. This adds backwards-compatible bitcode support, IR parsing and writing, LangRef changes and IRBuilder changes. I'll need to audit InstSimplify and InstCombine in a followup to find places where flags should be copied. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241901 91177308-0d34-0410-b5e6-96231b3b80d8 James Molloy 5 years ago
8 changed file(s) with 162 addition(s) and 67 deletion(s). Raw diff Collapse all Expand all
18361836
18371837 LLVM IR floating-point binary ops (:ref:`fadd `,
18381838 :ref:`fsub `, :ref:`fmul `, :ref:`fdiv `,
1839 :ref:`frem `) have the following flags that can be set to enable
1840 otherwise unsafe floating point operations
1839 :ref:`frem `, :ref:`fcmp `) have the following flags that can
1840 be set to enable otherwise unsafe floating point operations
18411841
18421842 ``nnan``
18431843 No NaNs - Allow optimizations to assume the arguments and result are not
75727572
75737573 ::
75747574
7575 = fcmp , ; yields i1 or :result
7575 = fcmp [fast-math flags]* , ; yields i1 or :result
75767576
75777577 Overview:
75787578 """""""""
76557655 #. ``uno``: yields ``true`` if either operand is a QNAN.
76567656 #. ``true``: always yields ``true``, regardless of operands.
76577657
7658 The ``fcmp`` instruction can also optionally take any number of
7659 :ref:`fast-math flags `, which are optimization hints to enable
7660 otherwise unsafe floating point optimizations.
7661
7662 Any set of fast-math flags are legal on an ``fcmp`` instruction, but the
7663 only flags that have any effect on its semantics are those that allow
7664 assumptions to be made about the values of input arguments; namely
7665 ``nnan``, ``ninf``, and ``nsz``. See :ref:`fastmath` for more information.
7666
76587667 Example:
76597668 """"""""
76607669
13811381 return CreateICmp(ICmpInst::ICMP_SLE, LHS, RHS, Name);
13821382 }
13831383
1384 Value *CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
1385 return CreateFCmp(FCmpInst::FCMP_OEQ, LHS, RHS, Name);
1386 }
1387 Value *CreateFCmpOGT(Value *LHS, Value *RHS, const Twine &Name = "") {
1388 return CreateFCmp(FCmpInst::FCMP_OGT, LHS, RHS, Name);
1389 }
1390 Value *CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name = "") {
1391 return CreateFCmp(FCmpInst::FCMP_OGE, LHS, RHS, Name);
1392 }
1393 Value *CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name = "") {
1394 return CreateFCmp(FCmpInst::FCMP_OLT, LHS, RHS, Name);
1395 }
1396 Value *CreateFCmpOLE(Value *LHS, Value *RHS, const Twine &Name = "") {
1397 return CreateFCmp(FCmpInst::FCMP_OLE, LHS, RHS, Name);
1398 }
1399 Value *CreateFCmpONE(Value *LHS, Value *RHS, const Twine &Name = "") {
1400 return CreateFCmp(FCmpInst::FCMP_ONE, LHS, RHS, Name);
1401 }
1402 Value *CreateFCmpORD(Value *LHS, Value *RHS, const Twine &Name = "") {
1403 return CreateFCmp(FCmpInst::FCMP_ORD, LHS, RHS, Name);
1404 }
1405 Value *CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name = "") {
1406 return CreateFCmp(FCmpInst::FCMP_UNO, LHS, RHS, Name);
1407 }
1408 Value *CreateFCmpUEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
1409 return CreateFCmp(FCmpInst::FCMP_UEQ, LHS, RHS, Name);
1410 }
1411 Value *CreateFCmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") {
1412 return CreateFCmp(FCmpInst::FCMP_UGT, LHS, RHS, Name);
1413 }
1414 Value *CreateFCmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") {
1415 return CreateFCmp(FCmpInst::FCMP_UGE, LHS, RHS, Name);
1416 }
1417 Value *CreateFCmpULT(Value *LHS, Value *RHS, const Twine &Name = "") {
1418 return CreateFCmp(FCmpInst::FCMP_ULT, LHS, RHS, Name);
1419 }
1420 Value *CreateFCmpULE(Value *LHS, Value *RHS, const Twine &Name = "") {
1421 return CreateFCmp(FCmpInst::FCMP_ULE, LHS, RHS, Name);
1422 }
1423 Value *CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name = "") {
1424 return CreateFCmp(FCmpInst::FCMP_UNE, LHS, RHS, Name);
1384 Value *CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name = "",
1385 MDNode *FPMathTag = nullptr) {
1386 return CreateFCmp(FCmpInst::FCMP_OEQ, LHS, RHS, Name, FPMathTag);
1387 }
1388 Value *CreateFCmpOGT(Value *LHS, Value *RHS, const Twine &Name = "",
1389 MDNode *FPMathTag = nullptr) {
1390 return CreateFCmp(FCmpInst::FCMP_OGT, LHS, RHS, Name, FPMathTag);
1391 }
1392 Value *CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name = "",
1393 MDNode *FPMathTag = nullptr) {
1394 return CreateFCmp(FCmpInst::FCMP_OGE, LHS, RHS, Name, FPMathTag);
1395 }
1396 Value *CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name = "",
1397 MDNode *FPMathTag = nullptr) {
1398 return CreateFCmp(FCmpInst::FCMP_OLT, LHS, RHS, Name, FPMathTag);
1399 }
1400 Value *CreateFCmpOLE(Value *LHS, Value *RHS, const Twine &Name = "",
1401 MDNode *FPMathTag = nullptr) {
1402 return CreateFCmp(FCmpInst::FCMP_OLE, LHS, RHS, Name, FPMathTag);
1403 }
1404 Value *CreateFCmpONE(Value *LHS, Value *RHS, const Twine &Name = "",
1405 MDNode *FPMathTag = nullptr) {
1406 return CreateFCmp(FCmpInst::FCMP_ONE, LHS, RHS, Name, FPMathTag);
1407 }
1408 Value *CreateFCmpORD(Value *LHS, Value *RHS, const Twine &Name = "",
1409 MDNode *FPMathTag = nullptr) {
1410 return CreateFCmp(FCmpInst::FCMP_ORD, LHS, RHS, Name, FPMathTag);
1411 }
1412 Value *CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name = "",
1413 MDNode *FPMathTag = nullptr) {
1414 return CreateFCmp(FCmpInst::FCMP_UNO, LHS, RHS, Name, FPMathTag);
1415 }
1416 Value *CreateFCmpUEQ(Value *LHS, Value *RHS, const Twine &Name = "",
1417 MDNode *FPMathTag = nullptr) {
1418 return CreateFCmp(FCmpInst::FCMP_UEQ, LHS, RHS, Name, FPMathTag);
1419 }
1420 Value *CreateFCmpUGT(Value *LHS, Value *RHS, const Twine &Name = "",
1421 MDNode *FPMathTag = nullptr) {
1422 return CreateFCmp(FCmpInst::FCMP_UGT, LHS, RHS, Name, FPMathTag);
1423 }
1424 Value *CreateFCmpUGE(Value *LHS, Value *RHS, const Twine &Name = "",
1425 MDNode *FPMathTag = nullptr) {
1426 return CreateFCmp(FCmpInst::FCMP_UGE, LHS, RHS, Name, FPMathTag);
1427 }
1428 Value *CreateFCmpULT(Value *LHS, Value *RHS, const Twine &Name = "",
1429 MDNode *FPMathTag = nullptr) {
1430 return CreateFCmp(FCmpInst::FCMP_ULT, LHS, RHS, Name, FPMathTag);
1431 }
1432 Value *CreateFCmpULE(Value *LHS, Value *RHS, const Twine &Name = "",
1433 MDNode *FPMathTag = nullptr) {
1434 return CreateFCmp(FCmpInst::FCMP_ULE, LHS, RHS, Name, FPMathTag);
1435 }
1436 Value *CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name = "",
1437 MDNode *FPMathTag = nullptr) {
1438 return CreateFCmp(FCmpInst::FCMP_UNE, LHS, RHS, Name, FPMathTag);
14251439 }
14261440
14271441 Value *CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
14321446 return Insert(new ICmpInst(P, LHS, RHS), Name);
14331447 }
14341448 Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
1435 const Twine &Name = "") {
1449 const Twine &Name = "", MDNode *FPMathTag = nullptr) {
14361450 if (Constant *LC = dyn_cast(LHS))
14371451 if (Constant *RC = dyn_cast(RHS))
14381452 return Insert(Folder.CreateFCmp(P, LC, RC), Name);
1439 return Insert(new FCmpInst(P, LHS, RHS), Name);
1453 return Insert(AddFPMathAttributes(new FCmpInst(P, LHS, RHS),
1454 FPMathTag, FMF), Name);
14401455 }
14411456
14421457 //===--------------------------------------------------------------------===//
304304 float getFPAccuracy() const;
305305
306306 static inline bool classof(const Instruction *I) {
307 return I->getType()->isFPOrFPVectorTy();
307 return I->getType()->isFPOrFPVectorTy() ||
308 I->getOpcode() == Instruction::FCmp;
308309 }
309310 static inline bool classof(const Value *V) {
310311 return isa(V) && classof(cast(V));
45334533 case lltok::kw_and:
45344534 case lltok::kw_or:
45354535 case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal);
4536 case lltok::kw_icmp:
4537 case lltok::kw_fcmp: return ParseCompare(Inst, PFS, KeywordVal);
4536 case lltok::kw_icmp: return ParseCompare(Inst, PFS, KeywordVal);
4537 case lltok::kw_fcmp: {
4538 FastMathFlags FMF = EatFastMathFlagsIfPresent();
4539 int Res = ParseCompare(Inst, PFS, KeywordVal);
4540 if (Res != 0)
4541 return Res;
4542 if (FMF.any())
4543 Inst->setFastMathFlags(FMF);
4544 return 0;
4545 }
4546
45384547 // Casts.
45394548 case lltok::kw_trunc:
45404549 case lltok::kw_zext:
694694 case bitc::COMDAT_SELECTION_KIND_SAME_SIZE:
695695 return Comdat::SameSize;
696696 }
697 }
698
699 static FastMathFlags getDecodedFastMathFlags(unsigned Val) {
700 FastMathFlags FMF;
701 if (0 != (Val & FastMathFlags::UnsafeAlgebra))
702 FMF.setUnsafeAlgebra();
703 if (0 != (Val & FastMathFlags::NoNaNs))
704 FMF.setNoNaNs();
705 if (0 != (Val & FastMathFlags::NoInfs))
706 FMF.setNoInfs();
707 if (0 != (Val & FastMathFlags::NoSignedZeros))
708 FMF.setNoSignedZeros();
709 if (0 != (Val & FastMathFlags::AllowReciprocal))
710 FMF.setAllowReciprocal();
711 return FMF;
697712 }
698713
699714 static void upgradeDLLImportExportLinkage(llvm::GlobalValue *GV, unsigned Val) {
34713486 if (Record[OpNum] & (1 << bitc::PEO_EXACT))
34723487 cast(I)->setIsExact(true);
34733488 } else if (isa(I)) {
3474 FastMathFlags FMF;
3475 if (0 != (Record[OpNum] & FastMathFlags::UnsafeAlgebra))
3476 FMF.setUnsafeAlgebra();
3477 if (0 != (Record[OpNum] & FastMathFlags::NoNaNs))
3478 FMF.setNoNaNs();
3479 if (0 != (Record[OpNum] & FastMathFlags::NoInfs))
3480 FMF.setNoInfs();
3481 if (0 != (Record[OpNum] & FastMathFlags::NoSignedZeros))
3482 FMF.setNoSignedZeros();
3483 if (0 != (Record[OpNum] & FastMathFlags::AllowReciprocal))
3484 FMF.setAllowReciprocal();
3489 FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]);
34853490 if (FMF.any())
34863491 I->setFastMathFlags(FMF);
34873492 }
37383743 unsigned OpNum = 0;
37393744 Value *LHS, *RHS;
37403745 if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
3741 popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
3742 OpNum+1 != Record.size())
3746 popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS))
3747 return error("Invalid record");
3748
3749 unsigned PredVal = Record[OpNum];
3750 bool IsFP = LHS->getType()->isFPOrFPVectorTy();
3751 FastMathFlags FMF;
3752 if (IsFP && Record.size() > OpNum+1)
3753 FMF = getDecodedFastMathFlags(Record[++OpNum]);
3754
3755 if (OpNum+1 != Record.size())
37433756 return error("Invalid record");
37443757
37453758 if (LHS->getType()->isFPOrFPVectorTy())
3746 I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
3759 I = new FCmpInst((FCmpInst::Predicate)PredVal, LHS, RHS);
37473760 else
3748 I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
3761 I = new ICmpInst((ICmpInst::Predicate)PredVal, LHS, RHS);
3762
3763 if (FMF.any())
3764 I->setFastMathFlags(FMF);
37493765 InstructionList.push_back(I);
37503766 break;
37513767 }
17581758 pushValue(I.getOperand(2), InstID, Vals, VE);
17591759 break;
17601760 case Instruction::ICmp:
1761 case Instruction::FCmp:
1761 case Instruction::FCmp: {
17621762 // compare returning Int1Ty or vector of Int1Ty
17631763 Code = bitc::FUNC_CODE_INST_CMP2;
17641764 PushValueAndType(I.getOperand(0), InstID, Vals, VE);
17651765 pushValue(I.getOperand(1), InstID, Vals, VE);
17661766 Vals.push_back(cast(I).getPredicate());
1767 uint64_t Flags = GetOptimizationFlags(&I);
1768 if (Flags != 0)
1769 Vals.push_back(Flags);
17671770 break;
1771 }
17681772
17691773 case Instruction::Ret:
17701774 {
0 ; RUN: llvm-as < %s | llvm-dis > %t0
1 ; RUN: opt -S < %s > %t1
2 ; RUN: diff %t0 %t1
3 ; RUN: FileCheck < %t1 %s
4
5 ; Make sure flags on fcmp instructions are serialized/deserialized properly.
6
7 define i1 @foo(float %a, float %b, double %c, double %d) {
8 ; CHECK: %plain = fcmp ueq float %a, %b
9 %plain = fcmp ueq float %a, %b
10 ; CHECK: %fast = fcmp fast olt float %a, %b
11 %fast = fcmp fast olt float %a, %b
12 ; CHECK: %nsz = fcmp nsz uge float %a, %b
13 %nsz = fcmp nsz uge float %a, %b
14 ; CHECK: %nnan = fcmp nnan nsz oge double %c, %d
15 %nnan = fcmp nnan nsz oge double %c, %d
16
17 %dce1 = or i1 %plain, %fast
18 %dce2 = or i1 %dce1, %nsz
19 %dce3 = or i1 %dce2, %nnan
20
21 ret i1 %dce3
22 }
129129
130130 TEST_F(IRBuilderTest, FastMathFlags) {
131131 IRBuilder<> Builder(BB);
132 Value *F;
133 Instruction *FDiv, *FAdd;
132 Value *F, *FC;
133 Instruction *FDiv, *FAdd, *FCmp;
134134
135135 F = Builder.CreateLoad(GV);
136136 F = Builder.CreateFAdd(F, F);
186186 ASSERT_TRUE(isa(F));
187187 FDiv = cast(F);
188188 EXPECT_TRUE(FDiv->hasAllowReciprocal());
189
190 Builder.clearFastMathFlags();
191
192 FC = Builder.CreateFCmpOEQ(F, F);
193 ASSERT_TRUE(isa(FC));
194 FCmp = cast(FC);
195 EXPECT_FALSE(FCmp->hasAllowReciprocal());
196
197 FMF.clear();
198 FMF.setAllowReciprocal();
199 Builder.SetFastMathFlags(FMF);
200
201 FC = Builder.CreateFCmpOEQ(F, F);
202 EXPECT_TRUE(Builder.getFastMathFlags().any());
203 EXPECT_TRUE(Builder.getFastMathFlags().AllowReciprocal);
204 ASSERT_TRUE(isa(FC));
205 FCmp = cast(FC);
206 EXPECT_TRUE(FCmp->hasAllowReciprocal());
189207
190208 Builder.clearFastMathFlags();
191209