llvm.org GIT mirror llvm / fa7848b
[X86] condition branches folding for three-way conditional codes This patch implements a pass that optimizes condition branches on x86 by taking advantage of the three-way conditional code generated by compare instructions. Currently, it tries to hoisting EQ and NE conditional branch to a dominant conditional branch condition where the same EQ/NE conditional code is computed. An example: bb_0: cmp %0, 19 jg bb_1 jmp bb_2 bb_1: cmp %0, 40 jg bb_3 jmp bb_4 bb_4: cmp %0, 20 je bb_5 jmp bb_6 Here we could combine the two compares in bb_0 and bb_4 and have the following code: bb_0: cmp %0, 20 jg bb_1 jl bb_2 jmp bb_5 bb_1: cmp %0, 40 jg bb_3 jmp bb_6 For the case of %0 == 20 (bb_5), we eliminate two jumps, and the control height for bb_6 is also reduced. bb_4 is gone after the optimization. This optimization is motivated by the branch pattern generated by the switch lowering: we always have pivot-1 compare for the inner nodes and we do a pivot compare again the leaf (like above pattern). This pass currently is enabled on Intel's Sandybridge and later arches. Some reviewers pointed out that on some arches (like AMD Jaguar), this pass may increase branch density to the point where it hurts the performance of the branch predictor. Differential Revision: https://reviews.llvm.org/D46662 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@343993 91177308-0d34-0410-b5e6-96231b3b80d8 Rong Xu 1 year, 11 months ago
9 changed file(s) with 947 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
2626 X86CallingConv.cpp
2727 X86CallLowering.cpp
2828 X86CmovConversion.cpp
29 X86CondBrFolding.cpp
2930 X86DomainReassignment.cpp
3031 X86ExpandPseudo.cpp
3132 X86FastISel.cpp
7474 /// Return a pass that transforms setcc + movzx pairs into xor + setcc.
7575 FunctionPass *createX86FixupSetCC();
7676
77 /// Return a pass that folds conditional branch jumps.
78 FunctionPass *createX86CondBrFolding();
79
7780 /// Return a pass that avoids creating store forward block issues in the hardware.
7881 FunctionPass *createX86AvoidStoreForwardingBlocks();
7982
403403 "Indicates that the BEXTR instruction is implemented as a single uop "
404404 "with good throughput.">;
405405
406 // Merge branches using three-way conditional code.
407 def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
408 "ThreewayBranchProfitable", "true",
409 "Merge branches to a three-way "
410 "conditional branch">;
411
406412 //===----------------------------------------------------------------------===//
407413 // Register File Description
408414 //===----------------------------------------------------------------------===//
731737 FeatureFastScalarFSQRT,
732738 FeatureFastSHLDRotate,
733739 FeatureSlowIncDec,
740 FeatureMergeToThreeWayBranch,
734741 FeatureMacroFusion
735742 ]>;
736743
0 //===---- X86CondBrFolding.cpp - optimize conditional branches ------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 // This file defines a pass that optimizes condition branches on x86 by taking
9 // advantage of the three-way conditional code generated by compare
10 // instructions.
11 // Currently, it tries to hoisting EQ and NE conditional branch to a dominant
12 // conditional branch condition where the same EQ/NE conditional code is
13 // computed. An example:
14 // bb_0:
15 // cmp %0, 19
16 // jg bb_1
17 // jmp bb_2
18 // bb_1:
19 // cmp %0, 40
20 // jg bb_3
21 // jmp bb_4
22 // bb_4:
23 // cmp %0, 20
24 // je bb_5
25 // jmp bb_6
26 // Here we could combine the two compares in bb_0 and bb_4 and have the
27 // following code:
28 // bb_0:
29 // cmp %0, 20
30 // jg bb_1
31 // jl bb_2
32 // jmp bb_5
33 // bb_1:
34 // cmp %0, 40
35 // jg bb_3
36 // jmp bb_6
37 // For the case of %0 == 20 (bb_5), we eliminate two jumps, and the control
38 // height for bb_6 is also reduced. bb_4 is gone after the optimization.
39 //
40 // There are plenty of this code patterns, especially from the switch case
41 // lowing where we generate compare of "pivot-1" for the inner nodes in the
42 // binary search tree.
43 //===----------------------------------------------------------------------===//
44
45 #include "X86.h"
46 #include "X86InstrInfo.h"
47 #include "X86Subtarget.h"
48 #include "llvm/ADT/Statistic.h"
49 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
50 #include "llvm/CodeGen/MachineFunctionPass.h"
51 #include "llvm/CodeGen/MachineInstrBuilder.h"
52 #include "llvm/CodeGen/MachineRegisterInfo.h"
53 #include "llvm/Support/BranchProbability.h"
54
55 using namespace llvm;
56
57 #define DEBUG_TYPE "x86-condbr-folding"
58
59 STATISTIC(NumFixedCondBrs, "Number of x86 condbr folded");
60
61 namespace {
62 class X86CondBrFoldingPass : public MachineFunctionPass {
63 public:
64 X86CondBrFoldingPass() : MachineFunctionPass(ID) {}
65
66 StringRef getPassName() const override { return "X86 CondBr Folding"; }
67
68 bool runOnMachineFunction(MachineFunction &MF) override;
69
70 void getAnalysisUsage(AnalysisUsage &AU) const override {
71 MachineFunctionPass::getAnalysisUsage(AU);
72 AU.addRequired();
73 }
74
75 private:
76 static char ID;
77 };
78
79 char X86CondBrFoldingPass::ID = 0;
80 } // namespace
81
82 FunctionPass *llvm::createX86CondBrFolding() {
83 return new X86CondBrFoldingPass();
84 }
85
86 // A class the stores the auxiliary information for each MBB.
87 struct TargetMBBInfo {
88 MachineBasicBlock *TBB;
89 MachineBasicBlock *FBB;
90 MachineInstr *BrInstr;
91 MachineInstr *CmpInstr;
92 X86::CondCode BranchCode;
93 unsigned SrcReg;
94 int CmpValue;
95 bool Modified;
96 bool CmpBrOnly;
97 };
98
99 // A class that optimizes the conditional branch by hoisting and merge CondCode.
100 class X86CondBrFolding {
101 public:
102 X86CondBrFolding(const X86InstrInfo *TII,
103 const MachineBranchProbabilityInfo *MBPI,
104 MachineFunction &MF)
105 : TII(TII), MBPI(MBPI), MF(MF) {}
106 bool optimize();
107
108 private:
109 const X86InstrInfo *TII;
110 const MachineBranchProbabilityInfo *MBPI;
111 MachineFunction &MF;
112 std::vector> MBBInfos;
113 SmallVector RemoveList;
114
115 void optimizeCondBr(MachineBasicBlock &MBB,
116 SmallVectorImpl &BranchPath);
117 void fixBranchProb(MachineBasicBlock *NextMBB, MachineBasicBlock *RootMBB,
118 SmallVectorImpl &BranchPath);
119 void replaceBrDest(MachineBasicBlock *MBB, MachineBasicBlock *OrigDest,
120 MachineBasicBlock *NewDest);
121 void fixupModifiedCond(MachineBasicBlock *MBB);
122 std::unique_ptr analyzeMBB(MachineBasicBlock &MBB);
123 static bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
124 int &CmpValue);
125 bool findPath(MachineBasicBlock *MBB,
126 SmallVectorImpl &BranchPath);
127 TargetMBBInfo *getMBBInfo(MachineBasicBlock *MBB) const {
128 return MBBInfos[MBB->getNumber()].get();
129 }
130 };
131
132 // Find a valid path that we can reuse the CondCode.
133 // The resulted path (if return true) is stored in BranchPath.
134 // Return value:
135 // false: is no valid path is found.
136 // true: a valid path is found and the targetBB can be reached.
137 bool X86CondBrFolding::findPath(
138 MachineBasicBlock *MBB, SmallVectorImpl &BranchPath) {
139 TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
140 assert(MBBInfo && "Expecting a candidate MBB");
141 int CmpValue = MBBInfo->CmpValue;
142
143 MachineBasicBlock *PredMBB = *MBB->pred_begin();
144 MachineBasicBlock *SaveMBB = MBB;
145 while (PredMBB) {
146 TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
147 if (!PredMBBInfo || PredMBBInfo->SrcReg != MBBInfo->SrcReg)
148 return false;
149
150 assert(SaveMBB == PredMBBInfo->TBB || SaveMBB == PredMBBInfo->FBB);
151 bool IsFalseBranch = (SaveMBB == PredMBBInfo->FBB);
152
153 X86::CondCode CC = PredMBBInfo->BranchCode;
154 assert(CC == X86::COND_L || CC == X86::COND_G || CC == X86::COND_E);
155 int PredCmpValue = PredMBBInfo->CmpValue;
156 bool ValueCmpTrue = ((CmpValue < PredCmpValue && CC == X86::COND_L) ||
157 (CmpValue > PredCmpValue && CC == X86::COND_G) ||
158 (CmpValue == PredCmpValue && CC == X86::COND_E));
159 // Check if both the result of value compare and the branch target match.
160 if (!(ValueCmpTrue ^ IsFalseBranch)) {
161 LLVM_DEBUG(dbgs() << "Dead BB detected!\n");
162 return false;
163 }
164
165 BranchPath.push_back(PredMBB);
166 // These are the conditions on which we could combine the compares.
167 if ((CmpValue == PredCmpValue) ||
168 (CmpValue == PredCmpValue - 1 && CC == X86::COND_L) ||
169 (CmpValue == PredCmpValue + 1 && CC == X86::COND_G))
170 return true;
171
172 // If PredMBB has more than on preds, or not a pure cmp and br, we bailout.
173 if (PredMBB->pred_size() != 1 || !PredMBBInfo->CmpBrOnly)
174 return false;
175
176 SaveMBB = PredMBB;
177 PredMBB = *PredMBB->pred_begin();
178 }
179 return false;
180 }
181
182 // Fix up any PHI node in the successor of MBB.
183 static void fixPHIsInSucc(MachineBasicBlock *MBB, MachineBasicBlock *OldMBB,
184 MachineBasicBlock *NewMBB) {
185 if (NewMBB == OldMBB)
186 return;
187 for (auto MI = MBB->instr_begin(), ME = MBB->instr_end();
188 MI != ME && MI->isPHI(); ++MI)
189 for (unsigned i = 2, e = MI->getNumOperands() + 1; i != e; i += 2) {
190 MachineOperand &MO = MI->getOperand(i);
191 if (MO.getMBB() == OldMBB)
192 MO.setMBB(NewMBB);
193 }
194 }
195
196 // Utility function to set branch probability for edge MBB->SuccMBB.
197 static inline bool setBranchProb(MachineBasicBlock *MBB,
198 MachineBasicBlock *SuccMBB,
199 BranchProbability Prob) {
200 auto MBBI = std::find(MBB->succ_begin(), MBB->succ_end(), SuccMBB);
201 if (MBBI == MBB->succ_end())
202 return false;
203 MBB->setSuccProbability(MBBI, Prob);
204 return true;
205 }
206
207 // Utility function to find the unconditional br instruction in MBB.
208 static inline MachineBasicBlock::iterator
209 findUncondBrI(MachineBasicBlock *MBB) {
210 return std::find_if(MBB->begin(), MBB->end(), [](MachineInstr &MI) -> bool {
211 return MI.getOpcode() == X86::JMP_1;
212 });
213 }
214
215 // Replace MBB's original successor, OrigDest, with NewDest.
216 // Also update the MBBInfo for MBB.
217 void X86CondBrFolding::replaceBrDest(MachineBasicBlock *MBB,
218 MachineBasicBlock *OrigDest,
219 MachineBasicBlock *NewDest) {
220 TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
221 MachineInstr *BrMI;
222 if (MBBInfo->TBB == OrigDest) {
223 BrMI = MBBInfo->BrInstr;
224 unsigned JNCC = GetCondBranchFromCond(MBBInfo->BranchCode);
225 MachineInstrBuilder MIB =
226 BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI), TII->get(JNCC))
227 .addMBB(NewDest);
228 MBBInfo->TBB = NewDest;
229 MBBInfo->BrInstr = MIB.getInstr();
230 } else { // Should be the unconditional jump stmt.
231 MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
232 BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
233 .addMBB(NewDest);
234 MBBInfo->FBB = NewDest;
235 BrMI = &*UncondBrI;
236 }
237 fixPHIsInSucc(NewDest, OrigDest, MBB);
238 BrMI->eraseFromParent();
239 MBB->addSuccessor(NewDest);
240 setBranchProb(MBB, NewDest, MBPI->getEdgeProbability(MBB, OrigDest));
241 MBB->removeSuccessor(OrigDest);
242 }
243
244 // Change the CondCode and BrInstr according to MBBInfo.
245 void X86CondBrFolding::fixupModifiedCond(MachineBasicBlock *MBB) {
246 TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
247 if (!MBBInfo->Modified)
248 return;
249
250 MachineInstr *BrMI = MBBInfo->BrInstr;
251 X86::CondCode CC = MBBInfo->BranchCode;
252 MachineInstrBuilder MIB = BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI),
253 TII->get(GetCondBranchFromCond(CC)))
254 .addMBB(MBBInfo->TBB);
255 BrMI->eraseFromParent();
256 MBBInfo->BrInstr = MIB.getInstr();
257
258 MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
259 BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
260 .addMBB(MBBInfo->FBB);
261 MBB->erase(UncondBrI);
262 MBBInfo->Modified = false;
263 }
264
265 //
266 // Apply the transformation:
267 // RootMBB -1-> ... PredMBB -3-> MBB -5-> TargetMBB
268 // \-2-> \-4-> \-6-> FalseMBB
269 // ==>
270 // RootMBB -1-> ... PredMBB -7-> FalseMBB
271 // TargetMBB <-8-/ \-2-> \-4->
272 //
273 // Note that PredMBB and RootMBB could be the same.
274 // And in the case of dead TargetMBB, we will not have TargetMBB and edge 8.
275 //
276 // There are some special handling where the RootMBB is COND_E in which case
277 // we directly short-cycle the brinstr.
278 //
279 void X86CondBrFolding::optimizeCondBr(
280 MachineBasicBlock &MBB, SmallVectorImpl &BranchPath) {
281
282 X86::CondCode CC;
283 TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
284 assert(MBBInfo && "Expecting a candidate MBB");
285 MachineBasicBlock *TargetMBB = MBBInfo->TBB;
286 BranchProbability TargetProb = MBPI->getEdgeProbability(&MBB, MBBInfo->TBB);
287
288 // Forward the jump from MBB's predecessor to MBB's false target.
289 MachineBasicBlock *PredMBB = BranchPath.front();
290 TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
291 assert(PredMBBInfo && "Expecting a candidate MBB");
292 if (PredMBBInfo->Modified)
293 fixupModifiedCond(PredMBB);
294 CC = PredMBBInfo->BranchCode;
295 // Don't do this if depth of BranchPath is 1 and PredMBB is of COND_E.
296 // We will short-cycle directly for this case.
297 if (!(CC == X86::COND_E && BranchPath.size() == 1))
298 replaceBrDest(PredMBB, &MBB, MBBInfo->FBB);
299
300 MachineBasicBlock *RootMBB = BranchPath.back();
301 TargetMBBInfo *RootMBBInfo = getMBBInfo(RootMBB);
302 assert(RootMBBInfo && "Expecting a candidate MBB");
303 if (RootMBBInfo->Modified)
304 fixupModifiedCond(RootMBB);
305 CC = RootMBBInfo->BranchCode;
306
307 if (CC != X86::COND_E) {
308 MachineBasicBlock::iterator UncondBrI = findUncondBrI(RootMBB);
309 // RootMBB: Cond jump to the original not-taken MBB.
310 X86::CondCode NewCC;
311 switch (CC) {
312 case X86::COND_L:
313 NewCC = X86::COND_G;
314 break;
315 case X86::COND_G:
316 NewCC = X86::COND_L;
317 break;
318 default:
319 llvm_unreachable("unexpected condtional code.");
320 }
321 BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
322 TII->get(GetCondBranchFromCond(NewCC)))
323 .addMBB(RootMBBInfo->FBB);
324
325 // RootMBB: Jump to TargetMBB
326 BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
327 TII->get(X86::JMP_1))
328 .addMBB(TargetMBB);
329 RootMBB->addSuccessor(TargetMBB);
330 fixPHIsInSucc(TargetMBB, &MBB, RootMBB);
331 RootMBB->erase(UncondBrI);
332 } else {
333 replaceBrDest(RootMBB, RootMBBInfo->TBB, TargetMBB);
334 }
335
336 // Fix RootMBB's CmpValue to MBB's CmpValue to TargetMBB. Don't set Imm
337 // directly. Move MBB's stmt to here as the opcode might be different.
338 if (RootMBBInfo->CmpValue != MBBInfo->CmpValue) {
339 MachineInstr *NewCmp = MBBInfo->CmpInstr;
340 NewCmp->removeFromParent();
341 RootMBB->insert(RootMBBInfo->CmpInstr, NewCmp);
342 RootMBBInfo->CmpInstr->eraseFromParent();
343 }
344
345 // Invalidate MBBInfo just in case.
346 MBBInfos[MBB.getNumber()] = nullptr;
347 MBBInfos[RootMBB->getNumber()] = nullptr;
348
349 // Fix branch Probabilities.
350 auto fixBranchProb = [&](MachineBasicBlock *NextMBB) {
351 BranchProbability Prob;
352 for (auto &I : BranchPath) {
353 MachineBasicBlock *ThisMBB = I;
354 if (!ThisMBB->hasSuccessorProbabilities() ||
355 !ThisMBB->isSuccessor(NextMBB))
356 break;
357 Prob = MBPI->getEdgeProbability(ThisMBB, NextMBB);
358 if (Prob.isUnknown())
359 break;
360 TargetProb = Prob * TargetProb;
361 Prob = Prob - TargetProb;
362 setBranchProb(ThisMBB, NextMBB, Prob);
363 if (ThisMBB == RootMBB) {
364 setBranchProb(ThisMBB, TargetMBB, TargetProb);
365 }
366 ThisMBB->normalizeSuccProbs();
367 if (ThisMBB == RootMBB)
368 break;
369 NextMBB = ThisMBB;
370 }
371 return true;
372 };
373 if (CC != X86::COND_E && !TargetProb.isUnknown())
374 fixBranchProb(MBBInfo->FBB);
375
376 if (CC != X86::COND_E)
377 RemoveList.push_back(&MBB);
378
379 LLVM_DEBUG(dbgs() << "After optimization:\nRootMBB is: " << *RootMBB << "\n");
380 if (BranchPath.size() > 1)
381 LLVM_DEBUG(dbgs() << "PredMBB is: " << *(BranchPath[0]) << "\n");
382 }
383
384 // Driver function for optimization: find the valid candidate and apply
385 // the transformation.
386 bool X86CondBrFolding::optimize() {
387 bool Changed = false;
388 LLVM_DEBUG(dbgs() << "***** X86CondBr Folding on Function: " << MF.getName()
389 << " *****\n");
390 // Setup data structures.
391 MBBInfos.resize(MF.getNumBlockIDs());
392 for (auto &MBB : MF)
393 MBBInfos[MBB.getNumber()] = analyzeMBB(MBB);
394
395 for (auto &MBB : MF) {
396 TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
397 if (!MBBInfo || !MBBInfo->CmpBrOnly)
398 continue;
399 if (MBB.pred_size() != 1)
400 continue;
401 LLVM_DEBUG(dbgs() << "Work on MBB." << MBB.getNumber()
402 << " CmpValue: " << MBBInfo->CmpValue << "\n");
403 SmallVector BranchPath;
404 if (!findPath(&MBB, BranchPath))
405 continue;
406
407 #ifndef NDEBUG
408 LLVM_DEBUG(dbgs() << "Found one path (len=" << BranchPath.size() << "):\n");
409 int Index = 1;
410 LLVM_DEBUG(dbgs() << "Target MBB is: " << MBB << "\n");
411 for (auto I = BranchPath.rbegin(); I != BranchPath.rend(); ++I, ++Index) {
412 MachineBasicBlock *PMBB = *I;
413 TargetMBBInfo *PMBBInfo = getMBBInfo(PMBB);
414 LLVM_DEBUG(dbgs() << "Path MBB (" << Index << " of " << BranchPath.size()
415 << ") is " << *PMBB);
416 LLVM_DEBUG(dbgs() << "CC=" << PMBBInfo->BranchCode
417 << " Val=" << PMBBInfo->CmpValue
418 << " CmpBrOnly=" << PMBBInfo->CmpBrOnly << "\n\n");
419 }
420 #endif
421 optimizeCondBr(MBB, BranchPath);
422 Changed = true;
423 }
424 NumFixedCondBrs += RemoveList.size();
425 for (auto MBBI : RemoveList) {
426 for (auto *Succ : MBBI->successors())
427 MBBI->removeSuccessor(Succ);
428 MBBI->eraseFromParent();
429 }
430
431 return Changed;
432 }
433
434 // Analyze instructions that generate CondCode and extract information.
435 bool X86CondBrFolding::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
436 int &CmpValue) {
437 unsigned SrcRegIndex = 0;
438 unsigned ValueIndex = 0;
439 switch (MI.getOpcode()) {
440 // TODO: handle test instructions.
441 default:
442 return false;
443 case X86::CMP64ri32:
444 case X86::CMP64ri8:
445 case X86::CMP32ri:
446 case X86::CMP32ri8:
447 case X86::CMP16ri:
448 case X86::CMP16ri8:
449 case X86::CMP8ri:
450 SrcRegIndex = 0;
451 ValueIndex = 1;
452 break;
453 case X86::SUB64ri32:
454 case X86::SUB64ri8:
455 case X86::SUB32ri:
456 case X86::SUB32ri8:
457 case X86::SUB16ri:
458 case X86::SUB16ri8:
459 case X86::SUB8ri:
460 SrcRegIndex = 1;
461 ValueIndex = 2;
462 break;
463 }
464 SrcReg = MI.getOperand(SrcRegIndex).getReg();
465 assert(MI.getOperand(ValueIndex).isImm() && "Expecting Imm operand");
466 CmpValue = MI.getOperand(ValueIndex).getImm();
467 return true;
468 }
469
470 // Analyze a candidate MBB and set the extract all the information needed.
471 // The valid candidate will have two successors.
472 // It also should have a sequence of
473 // Branch_instr,
474 // CondBr,
475 // UnCondBr.
476 // Return TargetMBBInfo if MBB is a valid candidate and nullptr otherwise.
477 std::unique_ptr
478 X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) {
479 MachineBasicBlock *TBB;
480 MachineBasicBlock *FBB;
481 MachineInstr *BrInstr;
482 MachineInstr *CmpInstr;
483 X86::CondCode CC;
484 unsigned SrcReg;
485 int CmpValue;
486 bool Modified;
487 bool CmpBrOnly;
488
489 if (MBB.succ_size() != 2)
490 return nullptr;
491
492 CmpBrOnly = true;
493 FBB = TBB = nullptr;
494 CmpInstr = nullptr;
495 MachineBasicBlock::iterator I = MBB.end();
496 while (I != MBB.begin()) {
497 --I;
498 if (I->isDebugValue())
499 continue;
500 if (I->getOpcode() == X86::JMP_1) {
501 if (FBB)
502 return nullptr;
503 FBB = I->getOperand(0).getMBB();
504 continue;
505 }
506 if (I->isBranch()) {
507 if (TBB)
508 return nullptr;
509 CC = X86::getCondFromBranchOpc(I->getOpcode());
510 switch (CC) {
511 default:
512 return nullptr;
513 case X86::COND_E:
514 case X86::COND_L:
515 case X86::COND_G:
516 case X86::COND_NE:
517 case X86::COND_LE:
518 case X86::COND_GE:
519 break;
520 }
521 TBB = I->getOperand(0).getMBB();
522 BrInstr = &*I;
523 continue;
524 }
525 if (analyzeCompare(*I, SrcReg, CmpValue)) {
526 if (CmpInstr)
527 return nullptr;
528 CmpInstr = &*I;
529 continue;
530 }
531 CmpBrOnly = false;
532 break;
533 }
534
535 if (!TBB || !FBB || !CmpInstr)
536 return nullptr;
537
538 // Simplify CondCode. Note this is only to simplify the findPath logic
539 // and will not change the instruction here.
540 switch (CC) {
541 case X86::COND_NE:
542 CC = X86::COND_E;
543 std::swap(TBB, FBB);
544 Modified = true;
545 break;
546 case X86::COND_LE:
547 if (CmpValue == INT_MAX)
548 return nullptr;
549 CC = X86::COND_L;
550 CmpValue += 1;
551 Modified = true;
552 break;
553 case X86::COND_GE:
554 if (CmpValue == INT_MIN)
555 return nullptr;
556 CC = X86::COND_G;
557 CmpValue -= 1;
558 Modified = true;
559 break;
560 default:
561 Modified = false;
562 break;
563 }
564 return llvm::make_unique(TargetMBBInfo{
565 TBB, FBB, BrInstr, CmpInstr, CC, SrcReg, CmpValue, Modified, CmpBrOnly});
566 }
567
568 bool X86CondBrFoldingPass::runOnMachineFunction(MachineFunction &MF) {
569 const X86Subtarget &ST = MF.getSubtarget();
570 if (!ST.threewayBranchProfitable())
571 return false;
572 const X86InstrInfo *TII = ST.getInstrInfo();
573 const MachineBranchProbabilityInfo *MBPI =
574 &getAnalysis();
575
576 X86CondBrFolding CondBr(TII, MBPI, MF);
577 return CondBr.optimize();
578 }
417417
418418 /// Indicates target prefers 256 bit instructions.
419419 bool Prefer256Bit = false;
420
421 /// Threeway branch is profitable in this subtarget.
422 bool ThreewayBranchProfitable = false;
420423
421424 /// What processor and OS we're targeting.
422425 Triple TargetTriple;
661664 bool hasWAITPKG() const { return HasWAITPKG; }
662665 bool hasPCONFIG() const { return HasPCONFIG; }
663666 bool hasSGX() const { return HasSGX; }
667 bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
664668 bool hasINVPCID() const { return HasINVPCID; }
665669 bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
666670 bool useRetpolineIndirectBranches() const {
5353 cl::desc("Enable the machine combiner pass"),
5454 cl::init(true), cl::Hidden);
5555
56 static cl::opt EnableCondBrFoldingPass("x86-condbr-folding",
57 cl::desc("Enable the conditional branch "
58 "folding pass"),
59 cl::init(true), cl::Hidden);
60
5661 namespace llvm {
5762
5863 void initializeWinEHStatePassPass(PassRegistry &);
446451 }
447452
448453 bool X86PassConfig::addILPOpts() {
454 if (EnableCondBrFoldingPass)
455 addPass(createX86CondBrFolding());
449456 addPass(&EarlyIfConverterID);
450457 if (EnableMachineCombinerPass)
451458 addPass(&MachineCombinerID);
7171 ; CHECK-NEXT: Merge disjoint stack slots
7272 ; CHECK-NEXT: Local Stack Slot Allocation
7373 ; CHECK-NEXT: Remove dead machine instructions
74 ; CHECK-NEXT: X86 CondBr Folding
7475 ; CHECK-NEXT: MachineDominator Tree Construction
7576 ; CHECK-NEXT: Machine Natural Loop Construction
7677 ; CHECK-NEXT: Machine Trace Metrics
0 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=sandybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
1 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=ivybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
2 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=haswell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
3 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=broadwell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
4 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skylake %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
5 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
6 ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=NOTMERGE
7
8 define i32 @length2_1(i32) {
9 %2 = icmp slt i32 %0, 3
10 br i1 %2, label %3, label %5
11
12 ;
13 %4 = tail call i32 (...) @f1()
14 br label %13
15
16 ;
17 %6 = icmp slt i32 %0, 40
18 br i1 %6, label %7, label %13
19
20 ;
21 %8 = icmp eq i32 %0, 3
22 br i1 %8, label %9, label %11
23
24 ;
25 %10 = tail call i32 (...) @f2()
26 br label %11
27
28 ;
29 %12 = tail call i32 (...) @f3() #2
30 br label %13
31
32 ;
33 ret i32 0
34 }
35 ; MERGE-LABEL: length2_1
36 ; MERGE: cmpl $3
37 ; MERGE-NEXT: jg
38 ; MERGE-NEXT: jge
39 ; NOTMERGE-LABEL: length2_1
40 ; NOTMERGE: cmpl $2
41 ; NOTMERGE-NEXT: jg
42
43 define i32 @length2_2(i32) {
44 %2 = icmp sle i32 %0, 2
45 br i1 %2, label %3, label %5
46
47 ;
48 %4 = tail call i32 (...) @f1()
49 br label %13
50
51 ;
52 %6 = icmp slt i32 %0, 40
53 br i1 %6, label %7, label %13
54
55 ;
56 %8 = icmp eq i32 %0, 3
57 br i1 %8, label %9, label %11
58
59 ;
60 %10 = tail call i32 (...) @f2()
61 br label %11
62
63 ;
64 %12 = tail call i32 (...) @f3() #2
65 br label %13
66
67 ;
68 ret i32 0
69 }
70 ; MERGE-LABEL: length2_2
71 ; MERGE: cmpl $3
72 ; MERGE-NEXT: jg
73 ; MERGE-NEXT: jge
74 ; NOTMERGE-LABEL: length2_2
75 ; NOTMERGE: cmpl $2
76 ; NOTMERGE-NEXT: jg
77
78 define i32 @length2_3(i32) {
79 %2 = icmp sgt i32 %0, 3
80 br i1 %2, label %3, label %5
81
82 ;
83 %4 = tail call i32 (...) @f1()
84 br label %13
85
86 ;
87 %6 = icmp sgt i32 %0, -40
88 br i1 %6, label %7, label %13
89
90 ;
91 %8 = icmp eq i32 %0, 3
92 br i1 %8, label %9, label %11
93
94 ;
95 %10 = tail call i32 (...) @f2()
96 br label %11
97
98 ;
99 %12 = tail call i32 (...) @f3() #2
100 br label %13
101
102 ;
103 ret i32 0
104 }
105 ; MERGE-LABEL: length2_3
106 ; MERGE: cmpl $3
107 ; MERGE-NEXT: jl
108 ; MERGE-NEXT: jle
109 ; NOTMERGE-LABEL: length2_3
110 ; NOTMERGE: cmpl $4
111 ; NOTMERGE-NEXT: jl
112
113 define i32 @length2_4(i32) {
114 %2 = icmp sge i32 %0, 4
115 br i1 %2, label %3, label %5
116
117 ;
118 %4 = tail call i32 (...) @f1()
119 br label %13
120
121 ;
122 %6 = icmp sgt i32 %0, -40
123 br i1 %6, label %7, label %13
124
125 ;
126 %8 = icmp eq i32 %0, 3
127 br i1 %8, label %9, label %11
128
129 ;
130 %10 = tail call i32 (...) @f2()
131 br label %11
132
133 ;
134 %12 = tail call i32 (...) @f3() #2
135 br label %13
136
137 ;
138 ret i32 0
139 }
140 ; MERGE-LABEL: length2_4
141 ; MERGE: cmpl $3
142 ; MERGE-NEXT: jl
143 ; MERGE-NEXT: jle
144 ; NOTMERGE-LABEL: length2_4
145 ; NOTMERGE: cmpl $4
146 ; NOTMERGE-NEXT: jl
147
148 declare i32 @f1(...)
149 declare i32 @f2(...)
150 declare i32 @f3(...)
151
152 define i32 @length1_1(i32) {
153 %2 = icmp sgt i32 %0, 5
154 br i1 %2, label %3, label %5
155
156 ;
157 %4 = tail call i32 (...) @f1()
158 br label %9
159
160 ;
161 %6 = icmp eq i32 %0, 5
162 br i1 %6, label %7, label %9
163
164 ;
165 %8 = tail call i32 (...) @f2()
166 br label %9
167
168 ;
169 ret i32 0
170 }
171 ; MERGE-LABEL: length1_1
172 ; MERGE: cmpl $5
173 ; MERGE-NEXT: jl
174 ; MERGE-NEXT: jle
175 ; NOTMERGE-LABEL: length1_1
176 ; NOTMERGE: cmpl $6
177 ; NOTMERGE-NEXT: jl
0 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=sandybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
1 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=ivybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
2 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=haswell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
3 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=broadwell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
4 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skylake %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
5 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
6 ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=NOTMERGE
7
8 @v1 = common dso_local local_unnamed_addr global i32 0, align 4
9 @v2 = common dso_local local_unnamed_addr global i32 0, align 4
10 @v3 = common dso_local local_unnamed_addr global i32 0, align 4
11 @v4 = common dso_local local_unnamed_addr global i32 0, align 4
12 @v5 = common dso_local local_unnamed_addr global i32 0, align 4
13 @v6 = common dso_local local_unnamed_addr global i32 0, align 4
14 @v7 = common dso_local local_unnamed_addr global i32 0, align 4
15 @v8 = common dso_local local_unnamed_addr global i32 0, align 4
16 @v9 = common dso_local local_unnamed_addr global i32 0, align 4
17 @v10 = common dso_local local_unnamed_addr global i32 0, align 4
18 @v11 = common dso_local local_unnamed_addr global i32 0, align 4
19 @v12 = common dso_local local_unnamed_addr global i32 0, align 4
20 @v13 = common dso_local local_unnamed_addr global i32 0, align 4
21 @v14 = common dso_local local_unnamed_addr global i32 0, align 4
22 @v15 = common dso_local local_unnamed_addr global i32 0, align 4
23
24 define dso_local i32 @fourcases(i32 %n) {
25 entry:
26 switch i32 %n, label %return [
27 i32 111, label %sw.bb
28 i32 222, label %sw.bb1
29 i32 3665, label %sw.bb2
30 i32 4444, label %sw.bb4
31 ]
32
33 sw.bb:
34 %0 = load i32, i32* @v1, align 4
35 br label %return
36
37 sw.bb1:
38 %1 = load i32, i32* @v2, align 4
39 %add = add nsw i32 %1, 12
40 br label %return
41
42 sw.bb2:
43 %2 = load i32, i32* @v3, align 4
44 %add3 = add nsw i32 %2, 13
45 br label %return
46
47 sw.bb4:
48 %3 = load i32, i32* @v1, align 4
49 %4 = load i32, i32* @v2, align 4
50 %add5 = add nsw i32 %4, %3
51 br label %return
52
53 return:
54 %retval.0 = phi i32 [ %add5, %sw.bb4 ], [ %add3, %sw.bb2 ], [ %add, %sw.bb1 ], [ %0, %sw.bb ], [ 0, %entry ]
55 ret i32 %retval.0
56 }
57 ; MERGE-LABEL: fourcases
58 ; MERGE: cmpl $3665
59 ; MERGE-NEXT: jg
60 ; MERGE-NEXT: jge
61 ; NOTMERGE: cmpl $3664
62 ; NOTMERGE-NEXT: jg
63
64 define dso_local i32 @fifteencases(i32) {
65 switch i32 %0, label %32 [
66 i32 -111, label %2
67 i32 -13, label %4
68 i32 25, label %6
69 i32 37, label %8
70 i32 89, label %10
71 i32 111, label %12
72 i32 213, label %14
73 i32 271, label %16
74 i32 283, label %18
75 i32 325, label %20
76 i32 327, label %22
77 i32 429, label %24
78 i32 500, label %26
79 i32 603, label %28
80 i32 605, label %30
81 ]
82
83 ;
84 %3 = load i32, i32* @v1, align 4
85 br label %32
86
87 ;
88 %5 = load i32, i32* @v2, align 4
89 br label %32
90
91 ;
92 %7 = load i32, i32* @v3, align 4
93 br label %32
94
95 ;
96 %9 = load i32, i32* @v4, align 4
97 br label %32
98
99 ;
100 %11 = load i32, i32* @v5, align 4
101 br label %32
102
103 ;
104 %13 = load i32, i32* @v6, align 4
105 br label %32
106
107 ;
108 %15 = load i32, i32* @v7, align 4
109 br label %32
110
111 ;
112 %17 = load i32, i32* @v8, align 4
113 br label %32
114
115 ;
116 %19 = load i32, i32* @v9, align 4
117 br label %32
118
119 ;
120 %21 = load i32, i32* @v10, align 4
121 br label %32
122
123 ;
124 %23 = load i32, i32* @v11, align 4
125 br label %32
126
127 ;
128 %25 = load i32, i32* @v12, align 4
129 br label %32
130
131 ;
132 %27 = load i32, i32* @v13, align 4
133 br label %32
134
135 ;
136 %29 = load i32, i32* @v14, align 4
137 br label %32
138
139 ;
140 %31 = load i32, i32* @v15, align 4
141 br label %32
142
143 ;
144 %33 = phi i32 [ %31, %30 ], [ %29, %28 ], [ %27, %26 ], [ %25, %24 ], [ %23, %22 ], [ %21, %20 ], [ %19, %18 ], [ %17, %16 ], [ %15, %14 ], [ %13, %12 ], [ %11, %10 ], [ %9, %8 ], [ %7, %6 ], [ %5, %4 ], [ %3, %2 ], [ 0, %1 ]
145 ret i32 %33
146 }
147 ; MERGE-LABEL: fifteencases
148 ; MERGE: cmpl $271
149 ; MERGE-NEXT: jg
150 ; MERGE-NEXT: jge
151 ; MERGE: cmpl $37
152 ; MERGE-NEXT: jg
153 ; MERGE-NEXT: jge
154 ; MERGE: cmpl $429
155 ; MERGE-NEXT: jg
156 ; MERGE-NEXT: jge
157 ; MERGE: cmpl $325
158 ; MERGE-NEXT: jg
159 ; MERGE-NEXT: jge
160 ; MERGE: cmpl $603
161 ; MERGE-NEXT: jg
162 ; MERGE-NEXT: jge
163 ; NOTMERGE-LABEL: fifteencases
164 ; NOTMERGE: cmpl $270
165 ; NOTMERGE-NEXT: jle
166