llvm.org GIT mirror llvm / 25ed974
Revert "Allow X86::COND_NE_OR_P and X86::COND_NP_OR_E to be reversed." and "Add a missing test case for r258847." This reverts commit r258847, r258848. Causes miscompilations and backend errors. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@258927 91177308-0d34-0410-b5e6-96231b3b80d8 Benjamin Kramer 4 years ago
7 changed file(s) with 71 addition(s) and 183 deletion(s). Raw diff Collapse all Expand all
38043804 case X86::COND_NP: return X86::COND_P;
38053805 case X86::COND_O: return X86::COND_NO;
38063806 case X86::COND_NO: return X86::COND_O;
3807 case X86::COND_NE_OR_P: return X86::COND_E_AND_NP;
3808 case X86::COND_NP_OR_E: return X86::COND_P_AND_NE;
3809 case X86::COND_E_AND_NP: return X86::COND_NE_OR_P;
3810 case X86::COND_P_AND_NE: return X86::COND_NP_OR_E;
38113807 }
38123808 }
38133809
40013997 MachineBasicBlock::iterator OldInst = I;
40023998
40033999 BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
4004 .addMBB(UnCondBrIter->getOperand(0).getMBB());
4000 .addMBB(UnCondBrIter->getOperand(0).getMBB());
40054001 BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1))
4006 .addMBB(TargetBB);
4002 .addMBB(TargetBB);
40074003
40084004 OldInst->eraseFromParent();
40094005 UnCondBrIter->eraseFromParent();
40274023 assert(Cond.size() == 1);
40284024 assert(TBB);
40294025
4026 // Only handle the case where all conditional branches branch to the same
4027 // destination.
4028 if (TBB != I->getOperand(0).getMBB())
4029 return true;
4030
40304031 // If the conditions are the same, we can leave them alone.
40314032 X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
40324033 if (OldBranchCode == BranchCode)
40354036 // If they differ, see if they fit one of the known patterns. Theoretically,
40364037 // we could handle more patterns here, but we shouldn't expect to see them
40374038 // if instruction selection has done a reasonable job.
4038 auto NewTBB = I->getOperand(0).getMBB();
4039 if (TBB == NewTBB &&
4040 ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_E) ||
4041 (OldBranchCode == X86::COND_E && BranchCode == X86::COND_NP))) {
4039 if ((OldBranchCode == X86::COND_NP &&
4040 BranchCode == X86::COND_E) ||
4041 (OldBranchCode == X86::COND_E &&
4042 BranchCode == X86::COND_NP))
40424043 BranchCode = X86::COND_NP_OR_E;
4043 } else if (TBB == NewTBB &&
4044 ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) ||
4045 (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) {
4044 else if ((OldBranchCode == X86::COND_P &&
4045 BranchCode == X86::COND_NE) ||
4046 (OldBranchCode == X86::COND_NE &&
4047 BranchCode == X86::COND_P))
40464048 BranchCode = X86::COND_NE_OR_P;
4047 } else if ((OldBranchCode == X86::COND_NE && BranchCode == X86::COND_NP) ||
4048 (OldBranchCode == X86::COND_P && BranchCode == X86::COND_E)) {
4049 // X86::COND_P_AND_NE usually has two different branch destinations.
4050 //
4051 // JNP B1
4052 // JNE B2
4053 // B1: (fall-through)
4054 // B2:
4055 //
4056 // Here this condition branches to B2 only if P && NE. It has another
4057 // equivalent form:
4058 //
4059 // JE B1
4060 // JP B2
4061 // B1: (fall-through)
4062 // B2:
4063 //
4064 // Similarly it branches to B2 only if NE && P. That is why this condition
4065 // is named COND_P_AND_NE.
4066 BranchCode = X86::COND_P_AND_NE;
4067 } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) ||
4068 (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) {
4069 // See comments above for X86::COND_P_AND_NE.
4070 BranchCode = X86::COND_E_AND_NP;
4071 } else
4049 else
40724050 return true;
40734051
40744052 // Update the MachineOperand.
41774155 return Count;
41784156 }
41794157
4180 static MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB) {
4181 auto I = std::next(MBB->getIterator());
4182 if (I == MBB->getParent()->end())
4183 return nullptr;
4184 return &*I;
4185 }
4186
41874158 unsigned
41884159 X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
41894160 MachineBasicBlock *FBB, ArrayRef Cond,
41994170 BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(TBB);
42004171 return 1;
42014172 }
4202
4203 // If FBB is null, it is implied to be a fall-through block.
4204 bool FallThru = FBB == nullptr;
42054173
42064174 // Conditional branch.
42074175 unsigned Count = 0;
42214189 BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB);
42224190 ++Count;
42234191 break;
4224 case X86::COND_P_AND_NE:
4225 // Use the next block of MBB as FBB if it is null.
4226 if (FBB == nullptr) {
4227 FBB = getFallThroughMBB(&MBB);
4228 assert(FBB && "MBB cannot be the last block in function when the false "
4229 "body is a fall-through.");
4230 }
4231 // Synthesize NEG_NP_OR_E with two branches.
4232 BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(FBB);
4233 ++Count;
4234 BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB);
4235 ++Count;
4236 break;
4237 case X86::COND_E_AND_NP:
4238 // Use the next block of MBB as FBB if it is null.
4239 if (FBB == nullptr) {
4240 FBB = getFallThroughMBB(&MBB);
4241 assert(FBB && "MBB cannot be the last block in function when the false "
4242 "body is a fall-through.");
4243 }
4244 // Synthesize NEG_NE_OR_P with two branches.
4245 BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(FBB);
4246 ++Count;
4247 BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB);
4248 ++Count;
4249 break;
42504192 default: {
42514193 unsigned Opc = GetCondBranchFromCond(CC);
42524194 BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
42534195 ++Count;
42544196 }
42554197 }
4256 if (!FallThru) {
4198 if (FBB) {
42574199 // Two-way Conditional branch. Insert the second branch.
42584200 BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB);
42594201 ++Count;
67746716 ReverseBranchCondition(SmallVectorImpl &Cond) const {
67756717 assert(Cond.size() == 1 && "Invalid X86 branch condition!");
67766718 X86::CondCode CC = static_cast(Cond[0].getImm());
6719 if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
6720 return true;
67776721 Cond[0].setImm(GetOppositeBranchCondition(CC));
67786722 return false;
67796723 }
2828 namespace X86 {
2929 // X86 specific condition code. These correspond to X86_*_COND in
3030 // X86InstrInfo.td. They must be kept in synch.
31 enum CondCode {
32 COND_A = 0,
33 COND_AE = 1,
34 COND_B = 2,
35 COND_BE = 3,
36 COND_E = 4,
37 COND_G = 5,
38 COND_GE = 6,
39 COND_L = 7,
40 COND_LE = 8,
41 COND_NE = 9,
42 COND_NO = 10,
43 COND_NP = 11,
44 COND_NS = 12,
45 COND_O = 13,
46 COND_P = 14,
47 COND_S = 15,
48 LAST_VALID_COND = COND_S,
49
50 // Artificial condition codes. These are used by AnalyzeBranch
51 // to indicate a block terminated with two conditional branches to
52 // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
53 // which can't be represented on x86 with a single condition. These
54 // are never used in MachineInstrs.
55 COND_NE_OR_P,
56 COND_NP_OR_E,
57
58 // Artificial condition codes. These are used to represent the negation of
59 // above two conditions. The only scenario we need these two conditions is
60 // when we try to reverse above two conditions in order to remove redundant
61 // unconditional jumps. Note that both true and false bodies need to be
62 // avaiable in order to correctly synthesize instructions for them. These are
63 // never used in MachineInstrs.
64 COND_E_AND_NP, // negate of COND_NE_OR_P
65 COND_P_AND_NE, // negate of COND_NP_OR_E
66
67 COND_INVALID
68 };
31 enum CondCode {
32 COND_A = 0,
33 COND_AE = 1,
34 COND_B = 2,
35 COND_BE = 3,
36 COND_E = 4,
37 COND_G = 5,
38 COND_GE = 6,
39 COND_L = 7,
40 COND_LE = 8,
41 COND_NE = 9,
42 COND_NO = 10,
43 COND_NP = 11,
44 COND_NS = 12,
45 COND_O = 13,
46 COND_P = 14,
47 COND_S = 15,
48 LAST_VALID_COND = COND_S,
49
50 // Artificial condition codes. These are used by AnalyzeBranch
51 // to indicate a block terminated with two conditional branches to
52 // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
53 // which can't be represented on x86 with a single condition. These
54 // are never used in MachineInstrs.
55 COND_NE_OR_P,
56 COND_NP_OR_E,
57
58 COND_INVALID
59 };
6960
7061 // Turn condition code into conditional branch opcode.
7162 unsigned GetCondBranchFromCond(CondCode CC);
462462 }
463463
464464 define void @fpcmp_unanalyzable_branch(i1 %cond) {
465 ; This function's CFG contains an once-unanalyzable branch (une on floating
466 ; points). As now it becomes analyzable, we should get best layout in which each
467 ; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is
468 ; fall-through.
465 ; This function's CFG contains an unanalyzable branch that is likely to be
466 ; split due to having a different high-probability predecessor.
469467 ; CHECK: fpcmp_unanalyzable_branch
470468 ; CHECK: %entry
471 ; CHECK: %entry.if.then_crit_edge
472 ; CHECK: %if.then
473 ; CHECK: %if.end
474 ; CHECK: %exit
469 ; CHECK: %exit
470 ; CHECK-NOT: %if.then
471 ; CHECK-NOT: %if.end
472 ; CHECK-NOT: jne
473 ; CHECK-NOT: jnp
475474 ; CHECK: jne
476475 ; CHECK-NEXT: jnp
476 ; CHECK-NEXT: %if.then
477477
478478 entry:
479479 ; Note that this branch must be strongly biased toward
480480 ; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for
481 ; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'.
481 ; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then'. It is the last edge in that
482 ; chain which would violate the unanalyzable branch in 'exit', but we won't even
483 ; try this trick unless 'if.then' is believed to almost always be reached from
484 ; 'entry.if.then_crit_edge'.
482485 br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
483486
484487 entry.if.then_crit_edge:
490493
491494 exit:
492495 %cmp.i = fcmp une double 0.000000e+00, undef
493 br i1 %cmp.i, label %if.then, label %if.end, !prof !3
496 br i1 %cmp.i, label %if.then, label %if.end
494497
495498 if.then:
496499 %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
503506 }
504507
505508 !1 = !{!"branch_weights", i32 1000, i32 1}
506 !3 = !{!"branch_weights", i32 1, i32 1000}
507509
508510 declare i32 @f()
509511 declare i32 @g()
662664 ; Ensure that we can handle unanalyzable branches where the destination block
663665 ; gets selected as the optimal successor to merge.
664666 ;
665 ; This branch is now analyzable and hence the destination block becomes the
666 ; hotter one. The right order is entry->bar->exit->foo.
667 ;
668667 ; CHECK: unanalyzable_branch_to_best_succ
669668 ; CHECK: %entry
669 ; CHECK: %foo
670670 ; CHECK: %bar
671671 ; CHECK: %exit
672 ; CHECK: %foo
673672
674673 entry:
675674 ; Bias this branch toward bar to ensure we form that chain.
44 ; CHECK-LABEL: fcmp_oeq
55 ; CHECK: ucomiss %xmm1, %xmm0
66 ; CHECK-NEXT: jne {{LBB.+_1}}
7 ; CHECK-NEXT: jp {{LBB.+_1}}
7 ; CHECK-NEXT: jnp {{LBB.+_2}}
88 %1 = fcmp oeq float %x, %y
99 br i1 %1, label %bb1, label %bb2
1010 bb2:
161161 ; CHECK-LABEL: fcmp_une
162162 ; CHECK: ucomiss %xmm1, %xmm0
163163 ; CHECK-NEXT: jne {{LBB.+_2}}
164 ; CHECK-NEXT: jnp {{LBB.+_1}}
164 ; CHECK-NEXT: jp {{LBB.+_2}}
165 ; CHECK-NEXT: jmp {{LBB.+_1}}
165166 %1 = fcmp une float %x, %y
166167 br i1 %1, label %bb1, label %bb2
167168 bb2:
1616 ; CHECK: xorps %xmm1, %xmm1
1717 ; CHECK-NEXT: ucomiss %xmm1, %xmm0
1818 ; CHECK-NEXT: jne {{LBB.+_1}}
19 ; CHECK-NEXT: jp {{LBB.+_1}}
19 ; CHECK-NEXT: jnp {{LBB.+_2}}
2020 %1 = fcmp oeq float %x, 0.000000e+00
2121 br i1 %1, label %bb1, label %bb2
2222 bb2:
337337 ; CHECK: xorps %xmm1, %xmm1
338338 ; CHECK-NEXT: ucomiss %xmm1, %xmm0
339339 ; CHECK-NEXT: jne {{LBB.+_2}}
340 ; CHECK-NEXT: jnp {{LBB.+_1}}
340 ; CHECK-NEXT: jp {{LBB.+_2}}
341 ; CHECK-NEXT: jmp {{LBB.+_1}}
341342 %1 = fcmp une float %x, 0.000000e+00
342343 br i1 %1, label %bb1, label %bb2
343344 bb2:
1818 ; addsd ...
1919 ; LBB0_2:
2020
21 define float @func1(float %x, float %y) nounwind readnone optsize ssp {
22 ; CHECK: func1
21 ; CHECK: func
2322 ; CHECK: jne [[LABEL:.*]]
2423 ; CHECK-NEXT: jp [[LABEL]]
2524 ; CHECK-NOT: jmp
26 ;
25
26 define float @func(float %x, float %y) nounwind readnone optsize ssp {
2727 entry:
2828 %0 = fpext float %x to double
2929 %1 = fpext float %y to double
4040 %.0 = fptrunc double %.0.in to float
4141 ret float %.0
4242 }
43
44 define float @func2(float %x, float %y) nounwind readnone optsize ssp {
45 ; CHECK: func2
46 ; CHECK: jne [[LABEL:.*]]
47 ; CHECK-NEXT: jp [[LABEL]]
48 ; CHECK: %bb2
49 ; CHECK: %bb1
50 ; CHECK: jmp
51 ;
52 entry:
53 %0 = fpext float %x to double
54 %1 = fpext float %y to double
55 %2 = fmul double %0, %1
56 %3 = fcmp une double %2, 0.000000e+00
57 br i1 %3, label %bb1, label %bb2, !prof !1
58
59 bb1:
60 %4 = fadd double %2, -1.000000e+00
61 br label %bb2
62
63 bb2:
64 %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ]
65 %.0 = fptrunc double %.0.in to float
66 ret float %.0
67 }
68
69 !1 = !{!"branch_weights", i32 1, i32 1000}
+0
-21
test/CodeGen/X86/x86-analyze-branch-jne-jp.ll less more
None ; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
1
2 ; Test if the negation of the non-equality check between floating points are
3 ; translated to jnp followed by jne.
4
5 ; CHECK: jne
6 ; CHECK-NEXT: jnp
7 define void @foo(float %f) {
8 entry:
9 %cmp = fcmp une float %f, 0.000000e+00
10 br i1 %cmp, label %if.then, label %if.end
11
12 if.then:
13 tail call void @a()
14 br label %if.end
15
16 if.end:
17 ret void
18 }
19
20 declare void @a()