llvm.org GIT mirror llvm / c97898e
Recommit r343993: [X86] condition branches folding for three-way conditional codes Fix the memory issue exposed by sanitizer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@344085 91177308-0d34-0410-b5e6-96231b3b80d8 Rong Xu 1 year, 11 months ago
9 changed file(s) with 947 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
2626 X86CallingConv.cpp
2727 X86CallLowering.cpp
2828 X86CmovConversion.cpp
29 X86CondBrFolding.cpp
2930 X86DomainReassignment.cpp
3031 X86ExpandPseudo.cpp
3132 X86FastISel.cpp
7474 /// Return a pass that transforms setcc + movzx pairs into xor + setcc.
7575 FunctionPass *createX86FixupSetCC();
7676
77 /// Return a pass that folds conditional branch jumps.
78 FunctionPass *createX86CondBrFolding();
79
7780 /// Return a pass that avoids creating store forward block issues in the hardware.
7881 FunctionPass *createX86AvoidStoreForwardingBlocks();
7982
403403 "Indicates that the BEXTR instruction is implemented as a single uop "
404404 "with good throughput.">;
405405
406 // Merge branches using three-way conditional code.
407 def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
408 "ThreewayBranchProfitable", "true",
409 "Merge branches to a three-way "
410 "conditional branch">;
411
406412 //===----------------------------------------------------------------------===//
407413 // Register File Description
408414 //===----------------------------------------------------------------------===//
731737 FeatureFastScalarFSQRT,
732738 FeatureFastSHLDRotate,
733739 FeatureSlowIncDec,
740 FeatureMergeToThreeWayBranch,
734741 FeatureMacroFusion
735742 ]>;
736743
0 //===---- X86CondBrFolding.cpp - optimize conditional branches ------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 // This file defines a pass that optimizes condition branches on x86 by taking
9 // advantage of the three-way conditional code generated by compare
10 // instructions.
11 // Currently, it tries to hoisting EQ and NE conditional branch to a dominant
12 // conditional branch condition where the same EQ/NE conditional code is
13 // computed. An example:
14 // bb_0:
15 // cmp %0, 19
16 // jg bb_1
17 // jmp bb_2
18 // bb_1:
19 // cmp %0, 40
20 // jg bb_3
21 // jmp bb_4
22 // bb_4:
23 // cmp %0, 20
24 // je bb_5
25 // jmp bb_6
26 // Here we could combine the two compares in bb_0 and bb_4 and have the
27 // following code:
28 // bb_0:
29 // cmp %0, 20
30 // jg bb_1
31 // jl bb_2
32 // jmp bb_5
33 // bb_1:
34 // cmp %0, 40
35 // jg bb_3
36 // jmp bb_6
37 // For the case of %0 == 20 (bb_5), we eliminate two jumps, and the control
38 // height for bb_6 is also reduced. bb_4 is gone after the optimization.
39 //
40 // There are plenty of this code patterns, especially from the switch case
41 // lowing where we generate compare of "pivot-1" for the inner nodes in the
42 // binary search tree.
43 //===----------------------------------------------------------------------===//
44
45 #include "X86.h"
46 #include "X86InstrInfo.h"
47 #include "X86Subtarget.h"
48 #include "llvm/ADT/Statistic.h"
49 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
50 #include "llvm/CodeGen/MachineFunctionPass.h"
51 #include "llvm/CodeGen/MachineInstrBuilder.h"
52 #include "llvm/CodeGen/MachineRegisterInfo.h"
53 #include "llvm/Support/BranchProbability.h"
54
55 using namespace llvm;
56
57 #define DEBUG_TYPE "x86-condbr-folding"
58
59 STATISTIC(NumFixedCondBrs, "Number of x86 condbr folded");
60
61 namespace {
62 class X86CondBrFoldingPass : public MachineFunctionPass {
63 public:
64 X86CondBrFoldingPass() : MachineFunctionPass(ID) {}
65
66 StringRef getPassName() const override { return "X86 CondBr Folding"; }
67
68 bool runOnMachineFunction(MachineFunction &MF) override;
69
70 void getAnalysisUsage(AnalysisUsage &AU) const override {
71 MachineFunctionPass::getAnalysisUsage(AU);
72 AU.addRequired();
73 }
74
75 private:
76 static char ID;
77 };
78
79 char X86CondBrFoldingPass::ID = 0;
80 } // namespace
81
82 FunctionPass *llvm::createX86CondBrFolding() {
83 return new X86CondBrFoldingPass();
84 }
85
86 // A class the stores the auxiliary information for each MBB.
87 struct TargetMBBInfo {
88 MachineBasicBlock *TBB;
89 MachineBasicBlock *FBB;
90 MachineInstr *BrInstr;
91 MachineInstr *CmpInstr;
92 X86::CondCode BranchCode;
93 unsigned SrcReg;
94 int CmpValue;
95 bool Modified;
96 bool CmpBrOnly;
97 };
98
99 // A class that optimizes the conditional branch by hoisting and merge CondCode.
100 class X86CondBrFolding {
101 public:
102 X86CondBrFolding(const X86InstrInfo *TII,
103 const MachineBranchProbabilityInfo *MBPI,
104 MachineFunction &MF)
105 : TII(TII), MBPI(MBPI), MF(MF) {}
106 bool optimize();
107
108 private:
109 const X86InstrInfo *TII;
110 const MachineBranchProbabilityInfo *MBPI;
111 MachineFunction &MF;
112 std::vector> MBBInfos;
113 SmallVector RemoveList;
114
115 void optimizeCondBr(MachineBasicBlock &MBB,
116 SmallVectorImpl &BranchPath);
117 void fixBranchProb(MachineBasicBlock *NextMBB, MachineBasicBlock *RootMBB,
118 SmallVectorImpl &BranchPath);
119 void replaceBrDest(MachineBasicBlock *MBB, MachineBasicBlock *OrigDest,
120 MachineBasicBlock *NewDest);
121 void fixupModifiedCond(MachineBasicBlock *MBB);
122 std::unique_ptr analyzeMBB(MachineBasicBlock &MBB);
123 static bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
124 int &CmpValue);
125 bool findPath(MachineBasicBlock *MBB,
126 SmallVectorImpl &BranchPath);
127 TargetMBBInfo *getMBBInfo(MachineBasicBlock *MBB) const {
128 return MBBInfos[MBB->getNumber()].get();
129 }
130 };
131
132 // Find a valid path that we can reuse the CondCode.
133 // The resulted path (if return true) is stored in BranchPath.
134 // Return value:
135 // false: is no valid path is found.
136 // true: a valid path is found and the targetBB can be reached.
137 bool X86CondBrFolding::findPath(
138 MachineBasicBlock *MBB, SmallVectorImpl &BranchPath) {
139 TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
140 assert(MBBInfo && "Expecting a candidate MBB");
141 int CmpValue = MBBInfo->CmpValue;
142
143 MachineBasicBlock *PredMBB = *MBB->pred_begin();
144 MachineBasicBlock *SaveMBB = MBB;
145 while (PredMBB) {
146 TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
147 if (!PredMBBInfo || PredMBBInfo->SrcReg != MBBInfo->SrcReg)
148 return false;
149
150 assert(SaveMBB == PredMBBInfo->TBB || SaveMBB == PredMBBInfo->FBB);
151 bool IsFalseBranch = (SaveMBB == PredMBBInfo->FBB);
152
153 X86::CondCode CC = PredMBBInfo->BranchCode;
154 assert(CC == X86::COND_L || CC == X86::COND_G || CC == X86::COND_E);
155 int PredCmpValue = PredMBBInfo->CmpValue;
156 bool ValueCmpTrue = ((CmpValue < PredCmpValue && CC == X86::COND_L) ||
157 (CmpValue > PredCmpValue && CC == X86::COND_G) ||
158 (CmpValue == PredCmpValue && CC == X86::COND_E));
159 // Check if both the result of value compare and the branch target match.
160 if (!(ValueCmpTrue ^ IsFalseBranch)) {
161 LLVM_DEBUG(dbgs() << "Dead BB detected!\n");
162 return false;
163 }
164
165 BranchPath.push_back(PredMBB);
166 // These are the conditions on which we could combine the compares.
167 if ((CmpValue == PredCmpValue) ||
168 (CmpValue == PredCmpValue - 1 && CC == X86::COND_L) ||
169 (CmpValue == PredCmpValue + 1 && CC == X86::COND_G))
170 return true;
171
172 // If PredMBB has more than on preds, or not a pure cmp and br, we bailout.
173 if (PredMBB->pred_size() != 1 || !PredMBBInfo->CmpBrOnly)
174 return false;
175
176 SaveMBB = PredMBB;
177 PredMBB = *PredMBB->pred_begin();
178 }
179 return false;
180 }
181
182 // Fix up any PHI node in the successor of MBB.
183 static void fixPHIsInSucc(MachineBasicBlock *MBB, MachineBasicBlock *OldMBB,
184 MachineBasicBlock *NewMBB) {
185 if (NewMBB == OldMBB)
186 return;
187 for (auto MI = MBB->instr_begin(), ME = MBB->instr_end();
188 MI != ME && MI->isPHI(); ++MI)
189 for (unsigned i = 2, e = MI->getNumOperands() + 1; i != e; i += 2) {
190 MachineOperand &MO = MI->getOperand(i);
191 if (MO.getMBB() == OldMBB)
192 MO.setMBB(NewMBB);
193 }
194 }
195
196 // Utility function to set branch probability for edge MBB->SuccMBB.
197 static inline bool setBranchProb(MachineBasicBlock *MBB,
198 MachineBasicBlock *SuccMBB,
199 BranchProbability Prob) {
200 auto MBBI = std::find(MBB->succ_begin(), MBB->succ_end(), SuccMBB);
201 if (MBBI == MBB->succ_end())
202 return false;
203 MBB->setSuccProbability(MBBI, Prob);
204 return true;
205 }
206
207 // Utility function to find the unconditional br instruction in MBB.
208 static inline MachineBasicBlock::iterator
209 findUncondBrI(MachineBasicBlock *MBB) {
210 return std::find_if(MBB->begin(), MBB->end(), [](MachineInstr &MI) -> bool {
211 return MI.getOpcode() == X86::JMP_1;
212 });
213 }
214
215 // Replace MBB's original successor, OrigDest, with NewDest.
216 // Also update the MBBInfo for MBB.
217 void X86CondBrFolding::replaceBrDest(MachineBasicBlock *MBB,
218 MachineBasicBlock *OrigDest,
219 MachineBasicBlock *NewDest) {
220 TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
221 MachineInstr *BrMI;
222 if (MBBInfo->TBB == OrigDest) {
223 BrMI = MBBInfo->BrInstr;
224 unsigned JNCC = GetCondBranchFromCond(MBBInfo->BranchCode);
225 MachineInstrBuilder MIB =
226 BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI), TII->get(JNCC))
227 .addMBB(NewDest);
228 MBBInfo->TBB = NewDest;
229 MBBInfo->BrInstr = MIB.getInstr();
230 } else { // Should be the unconditional jump stmt.
231 MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
232 BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
233 .addMBB(NewDest);
234 MBBInfo->FBB = NewDest;
235 BrMI = &*UncondBrI;
236 }
237 fixPHIsInSucc(NewDest, OrigDest, MBB);
238 BrMI->eraseFromParent();
239 MBB->addSuccessor(NewDest);
240 setBranchProb(MBB, NewDest, MBPI->getEdgeProbability(MBB, OrigDest));
241 MBB->removeSuccessor(OrigDest);
242 }
243
244 // Change the CondCode and BrInstr according to MBBInfo.
245 void X86CondBrFolding::fixupModifiedCond(MachineBasicBlock *MBB) {
246 TargetMBBInfo *MBBInfo = getMBBInfo(MBB);
247 if (!MBBInfo->Modified)
248 return;
249
250 MachineInstr *BrMI = MBBInfo->BrInstr;
251 X86::CondCode CC = MBBInfo->BranchCode;
252 MachineInstrBuilder MIB = BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI),
253 TII->get(GetCondBranchFromCond(CC)))
254 .addMBB(MBBInfo->TBB);
255 BrMI->eraseFromParent();
256 MBBInfo->BrInstr = MIB.getInstr();
257
258 MachineBasicBlock::iterator UncondBrI = findUncondBrI(MBB);
259 BuildMI(*MBB, UncondBrI, MBB->findDebugLoc(UncondBrI), TII->get(X86::JMP_1))
260 .addMBB(MBBInfo->FBB);
261 MBB->erase(UncondBrI);
262 MBBInfo->Modified = false;
263 }
264
265 //
266 // Apply the transformation:
267 // RootMBB -1-> ... PredMBB -3-> MBB -5-> TargetMBB
268 // \-2-> \-4-> \-6-> FalseMBB
269 // ==>
270 // RootMBB -1-> ... PredMBB -7-> FalseMBB
271 // TargetMBB <-8-/ \-2-> \-4->
272 //
273 // Note that PredMBB and RootMBB could be the same.
274 // And in the case of dead TargetMBB, we will not have TargetMBB and edge 8.
275 //
276 // There are some special handling where the RootMBB is COND_E in which case
277 // we directly short-cycle the brinstr.
278 //
279 void X86CondBrFolding::optimizeCondBr(
280 MachineBasicBlock &MBB, SmallVectorImpl &BranchPath) {
281
282 X86::CondCode CC;
283 TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
284 assert(MBBInfo && "Expecting a candidate MBB");
285 MachineBasicBlock *TargetMBB = MBBInfo->TBB;
286 BranchProbability TargetProb = MBPI->getEdgeProbability(&MBB, MBBInfo->TBB);
287
288 // Forward the jump from MBB's predecessor to MBB's false target.
289 MachineBasicBlock *PredMBB = BranchPath.front();
290 TargetMBBInfo *PredMBBInfo = getMBBInfo(PredMBB);
291 assert(PredMBBInfo && "Expecting a candidate MBB");
292 if (PredMBBInfo->Modified)
293 fixupModifiedCond(PredMBB);
294 CC = PredMBBInfo->BranchCode;
295 // Don't do this if depth of BranchPath is 1 and PredMBB is of COND_E.
296 // We will short-cycle directly for this case.
297 if (!(CC == X86::COND_E && BranchPath.size() == 1))
298 replaceBrDest(PredMBB, &MBB, MBBInfo->FBB);
299
300 MachineBasicBlock *RootMBB = BranchPath.back();
301 TargetMBBInfo *RootMBBInfo = getMBBInfo(RootMBB);
302 assert(RootMBBInfo && "Expecting a candidate MBB");
303 if (RootMBBInfo->Modified)
304 fixupModifiedCond(RootMBB);
305 CC = RootMBBInfo->BranchCode;
306
307 if (CC != X86::COND_E) {
308 MachineBasicBlock::iterator UncondBrI = findUncondBrI(RootMBB);
309 // RootMBB: Cond jump to the original not-taken MBB.
310 X86::CondCode NewCC;
311 switch (CC) {
312 case X86::COND_L:
313 NewCC = X86::COND_G;
314 break;
315 case X86::COND_G:
316 NewCC = X86::COND_L;
317 break;
318 default:
319 llvm_unreachable("unexpected condtional code.");
320 }
321 BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
322 TII->get(GetCondBranchFromCond(NewCC)))
323 .addMBB(RootMBBInfo->FBB);
324
325 // RootMBB: Jump to TargetMBB
326 BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
327 TII->get(X86::JMP_1))
328 .addMBB(TargetMBB);
329 RootMBB->addSuccessor(TargetMBB);
330 fixPHIsInSucc(TargetMBB, &MBB, RootMBB);
331 RootMBB->erase(UncondBrI);
332 } else {
333 replaceBrDest(RootMBB, RootMBBInfo->TBB, TargetMBB);
334 }
335
336 // Fix RootMBB's CmpValue to MBB's CmpValue to TargetMBB. Don't set Imm
337 // directly. Move MBB's stmt to here as the opcode might be different.
338 if (RootMBBInfo->CmpValue != MBBInfo->CmpValue) {
339 MachineInstr *NewCmp = MBBInfo->CmpInstr;
340 NewCmp->removeFromParent();
341 RootMBB->insert(RootMBBInfo->CmpInstr, NewCmp);
342 RootMBBInfo->CmpInstr->eraseFromParent();
343 }
344
345 // Fix branch Probabilities.
346 auto fixBranchProb = [&](MachineBasicBlock *NextMBB) {
347 BranchProbability Prob;
348 for (auto &I : BranchPath) {
349 MachineBasicBlock *ThisMBB = I;
350 if (!ThisMBB->hasSuccessorProbabilities() ||
351 !ThisMBB->isSuccessor(NextMBB))
352 break;
353 Prob = MBPI->getEdgeProbability(ThisMBB, NextMBB);
354 if (Prob.isUnknown())
355 break;
356 TargetProb = Prob * TargetProb;
357 Prob = Prob - TargetProb;
358 setBranchProb(ThisMBB, NextMBB, Prob);
359 if (ThisMBB == RootMBB) {
360 setBranchProb(ThisMBB, TargetMBB, TargetProb);
361 }
362 ThisMBB->normalizeSuccProbs();
363 if (ThisMBB == RootMBB)
364 break;
365 NextMBB = ThisMBB;
366 }
367 return true;
368 };
369 if (CC != X86::COND_E && !TargetProb.isUnknown())
370 fixBranchProb(MBBInfo->FBB);
371
372 if (CC != X86::COND_E)
373 RemoveList.push_back(&MBB);
374
375 // Invalidate MBBInfo just in case.
376 MBBInfos[MBB.getNumber()] = nullptr;
377 MBBInfos[RootMBB->getNumber()] = nullptr;
378
379 LLVM_DEBUG(dbgs() << "After optimization:\nRootMBB is: " << *RootMBB << "\n");
380 if (BranchPath.size() > 1)
381 LLVM_DEBUG(dbgs() << "PredMBB is: " << *(BranchPath[0]) << "\n");
382 }
383
384 // Driver function for optimization: find the valid candidate and apply
385 // the transformation.
386 bool X86CondBrFolding::optimize() {
387 bool Changed = false;
388 LLVM_DEBUG(dbgs() << "***** X86CondBr Folding on Function: " << MF.getName()
389 << " *****\n");
390 // Setup data structures.
391 MBBInfos.resize(MF.getNumBlockIDs());
392 for (auto &MBB : MF)
393 MBBInfos[MBB.getNumber()] = analyzeMBB(MBB);
394
395 for (auto &MBB : MF) {
396 TargetMBBInfo *MBBInfo = getMBBInfo(&MBB);
397 if (!MBBInfo || !MBBInfo->CmpBrOnly)
398 continue;
399 if (MBB.pred_size() != 1)
400 continue;
401 LLVM_DEBUG(dbgs() << "Work on MBB." << MBB.getNumber()
402 << " CmpValue: " << MBBInfo->CmpValue << "\n");
403 SmallVector BranchPath;
404 if (!findPath(&MBB, BranchPath))
405 continue;
406
407 #ifndef NDEBUG
408 LLVM_DEBUG(dbgs() << "Found one path (len=" << BranchPath.size() << "):\n");
409 int Index = 1;
410 LLVM_DEBUG(dbgs() << "Target MBB is: " << MBB << "\n");
411 for (auto I = BranchPath.rbegin(); I != BranchPath.rend(); ++I, ++Index) {
412 MachineBasicBlock *PMBB = *I;
413 TargetMBBInfo *PMBBInfo = getMBBInfo(PMBB);
414 LLVM_DEBUG(dbgs() << "Path MBB (" << Index << " of " << BranchPath.size()
415 << ") is " << *PMBB);
416 LLVM_DEBUG(dbgs() << "CC=" << PMBBInfo->BranchCode
417 << " Val=" << PMBBInfo->CmpValue
418 << " CmpBrOnly=" << PMBBInfo->CmpBrOnly << "\n\n");
419 }
420 #endif
421 optimizeCondBr(MBB, BranchPath);
422 Changed = true;
423 }
424 NumFixedCondBrs += RemoveList.size();
425 for (auto MBBI : RemoveList) {
426 for (auto *Succ : MBBI->successors())
427 MBBI->removeSuccessor(Succ);
428 MBBI->eraseFromParent();
429 }
430
431 return Changed;
432 }
433
434 // Analyze instructions that generate CondCode and extract information.
435 bool X86CondBrFolding::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
436 int &CmpValue) {
437 unsigned SrcRegIndex = 0;
438 unsigned ValueIndex = 0;
439 switch (MI.getOpcode()) {
440 // TODO: handle test instructions.
441 default:
442 return false;
443 case X86::CMP64ri32:
444 case X86::CMP64ri8:
445 case X86::CMP32ri:
446 case X86::CMP32ri8:
447 case X86::CMP16ri:
448 case X86::CMP16ri8:
449 case X86::CMP8ri:
450 SrcRegIndex = 0;
451 ValueIndex = 1;
452 break;
453 case X86::SUB64ri32:
454 case X86::SUB64ri8:
455 case X86::SUB32ri:
456 case X86::SUB32ri8:
457 case X86::SUB16ri:
458 case X86::SUB16ri8:
459 case X86::SUB8ri:
460 SrcRegIndex = 1;
461 ValueIndex = 2;
462 break;
463 }
464 SrcReg = MI.getOperand(SrcRegIndex).getReg();
465 assert(MI.getOperand(ValueIndex).isImm() && "Expecting Imm operand");
466 CmpValue = MI.getOperand(ValueIndex).getImm();
467 return true;
468 }
469
470 // Analyze a candidate MBB and set the extract all the information needed.
471 // The valid candidate will have two successors.
472 // It also should have a sequence of
473 // Branch_instr,
474 // CondBr,
475 // UnCondBr.
476 // Return TargetMBBInfo if MBB is a valid candidate and nullptr otherwise.
477 std::unique_ptr
478 X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) {
479 MachineBasicBlock *TBB;
480 MachineBasicBlock *FBB;
481 MachineInstr *BrInstr;
482 MachineInstr *CmpInstr;
483 X86::CondCode CC;
484 unsigned SrcReg;
485 int CmpValue;
486 bool Modified;
487 bool CmpBrOnly;
488
489 if (MBB.succ_size() != 2)
490 return nullptr;
491
492 CmpBrOnly = true;
493 FBB = TBB = nullptr;
494 CmpInstr = nullptr;
495 MachineBasicBlock::iterator I = MBB.end();
496 while (I != MBB.begin()) {
497 --I;
498 if (I->isDebugValue())
499 continue;
500 if (I->getOpcode() == X86::JMP_1) {
501 if (FBB)
502 return nullptr;
503 FBB = I->getOperand(0).getMBB();
504 continue;
505 }
506 if (I->isBranch()) {
507 if (TBB)
508 return nullptr;
509 CC = X86::getCondFromBranchOpc(I->getOpcode());
510 switch (CC) {
511 default:
512 return nullptr;
513 case X86::COND_E:
514 case X86::COND_L:
515 case X86::COND_G:
516 case X86::COND_NE:
517 case X86::COND_LE:
518 case X86::COND_GE:
519 break;
520 }
521 TBB = I->getOperand(0).getMBB();
522 BrInstr = &*I;
523 continue;
524 }
525 if (analyzeCompare(*I, SrcReg, CmpValue)) {
526 if (CmpInstr)
527 return nullptr;
528 CmpInstr = &*I;
529 continue;
530 }
531 CmpBrOnly = false;
532 break;
533 }
534
535 if (!TBB || !FBB || !CmpInstr)
536 return nullptr;
537
538 // Simplify CondCode. Note this is only to simplify the findPath logic
539 // and will not change the instruction here.
540 switch (CC) {
541 case X86::COND_NE:
542 CC = X86::COND_E;
543 std::swap(TBB, FBB);
544 Modified = true;
545 break;
546 case X86::COND_LE:
547 if (CmpValue == INT_MAX)
548 return nullptr;
549 CC = X86::COND_L;
550 CmpValue += 1;
551 Modified = true;
552 break;
553 case X86::COND_GE:
554 if (CmpValue == INT_MIN)
555 return nullptr;
556 CC = X86::COND_G;
557 CmpValue -= 1;
558 Modified = true;
559 break;
560 default:
561 Modified = false;
562 break;
563 }
564 return llvm::make_unique(TargetMBBInfo{
565 TBB, FBB, BrInstr, CmpInstr, CC, SrcReg, CmpValue, Modified, CmpBrOnly});
566 }
567
568 bool X86CondBrFoldingPass::runOnMachineFunction(MachineFunction &MF) {
569 const X86Subtarget &ST = MF.getSubtarget();
570 if (!ST.threewayBranchProfitable())
571 return false;
572 const X86InstrInfo *TII = ST.getInstrInfo();
573 const MachineBranchProbabilityInfo *MBPI =
574 &getAnalysis();
575
576 X86CondBrFolding CondBr(TII, MBPI, MF);
577 return CondBr.optimize();
578 }
417417
418418 /// Indicates target prefers 256 bit instructions.
419419 bool Prefer256Bit = false;
420
421 /// Threeway branch is profitable in this subtarget.
422 bool ThreewayBranchProfitable = false;
420423
421424 /// What processor and OS we're targeting.
422425 Triple TargetTriple;
661664 bool hasWAITPKG() const { return HasWAITPKG; }
662665 bool hasPCONFIG() const { return HasPCONFIG; }
663666 bool hasSGX() const { return HasSGX; }
667 bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
664668 bool hasINVPCID() const { return HasINVPCID; }
665669 bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
666670 bool useRetpolineIndirectBranches() const {
5353 cl::desc("Enable the machine combiner pass"),
5454 cl::init(true), cl::Hidden);
5555
56 static cl::opt EnableCondBrFoldingPass("x86-condbr-folding",
57 cl::desc("Enable the conditional branch "
58 "folding pass"),
59 cl::init(true), cl::Hidden);
60
5661 namespace llvm {
5762
5863 void initializeWinEHStatePassPass(PassRegistry &);
446451 }
447452
448453 bool X86PassConfig::addILPOpts() {
454 if (EnableCondBrFoldingPass)
455 addPass(createX86CondBrFolding());
449456 addPass(&EarlyIfConverterID);
450457 if (EnableMachineCombinerPass)
451458 addPass(&MachineCombinerID);
7171 ; CHECK-NEXT: Merge disjoint stack slots
7272 ; CHECK-NEXT: Local Stack Slot Allocation
7373 ; CHECK-NEXT: Remove dead machine instructions
74 ; CHECK-NEXT: X86 CondBr Folding
7475 ; CHECK-NEXT: MachineDominator Tree Construction
7576 ; CHECK-NEXT: Machine Natural Loop Construction
7677 ; CHECK-NEXT: Machine Trace Metrics
0 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=sandybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
1 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=ivybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
2 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=haswell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
3 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=broadwell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
4 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skylake %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
5 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
6 ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=NOTMERGE
7
8 define i32 @length2_1(i32) {
9 %2 = icmp slt i32 %0, 3
10 br i1 %2, label %3, label %5
11
12 ;
13 %4 = tail call i32 (...) @f1()
14 br label %13
15
16 ;
17 %6 = icmp slt i32 %0, 40
18 br i1 %6, label %7, label %13
19
20 ;
21 %8 = icmp eq i32 %0, 3
22 br i1 %8, label %9, label %11
23
24 ;
25 %10 = tail call i32 (...) @f2()
26 br label %11
27
28 ;
29 %12 = tail call i32 (...) @f3() #2
30 br label %13
31
32 ;
33 ret i32 0
34 }
35 ; MERGE-LABEL: length2_1
36 ; MERGE: cmpl $3
37 ; MERGE-NEXT: jg
38 ; MERGE-NEXT: jge
39 ; NOTMERGE-LABEL: length2_1
40 ; NOTMERGE: cmpl $2
41 ; NOTMERGE-NEXT: jg
42
43 define i32 @length2_2(i32) {
44 %2 = icmp sle i32 %0, 2
45 br i1 %2, label %3, label %5
46
47 ;
48 %4 = tail call i32 (...) @f1()
49 br label %13
50
51 ;
52 %6 = icmp slt i32 %0, 40
53 br i1 %6, label %7, label %13
54
55 ;
56 %8 = icmp eq i32 %0, 3
57 br i1 %8, label %9, label %11
58
59 ;
60 %10 = tail call i32 (...) @f2()
61 br label %11
62
63 ;
64 %12 = tail call i32 (...) @f3() #2
65 br label %13
66
67 ;
68 ret i32 0
69 }
70 ; MERGE-LABEL: length2_2
71 ; MERGE: cmpl $3
72 ; MERGE-NEXT: jg
73 ; MERGE-NEXT: jge
74 ; NOTMERGE-LABEL: length2_2
75 ; NOTMERGE: cmpl $2
76 ; NOTMERGE-NEXT: jg
77
78 define i32 @length2_3(i32) {
79 %2 = icmp sgt i32 %0, 3
80 br i1 %2, label %3, label %5
81
82 ;
83 %4 = tail call i32 (...) @f1()
84 br label %13
85
86 ;
87 %6 = icmp sgt i32 %0, -40
88 br i1 %6, label %7, label %13
89
90 ;
91 %8 = icmp eq i32 %0, 3
92 br i1 %8, label %9, label %11
93
94 ;
95 %10 = tail call i32 (...) @f2()
96 br label %11
97
98 ;
99 %12 = tail call i32 (...) @f3() #2
100 br label %13
101
102 ;
103 ret i32 0
104 }
105 ; MERGE-LABEL: length2_3
106 ; MERGE: cmpl $3
107 ; MERGE-NEXT: jl
108 ; MERGE-NEXT: jle
109 ; NOTMERGE-LABEL: length2_3
110 ; NOTMERGE: cmpl $4
111 ; NOTMERGE-NEXT: jl
112
113 define i32 @length2_4(i32) {
114 %2 = icmp sge i32 %0, 4
115 br i1 %2, label %3, label %5
116
117 ;
118 %4 = tail call i32 (...) @f1()
119 br label %13
120
121 ;
122 %6 = icmp sgt i32 %0, -40
123 br i1 %6, label %7, label %13
124
125 ;
126 %8 = icmp eq i32 %0, 3
127 br i1 %8, label %9, label %11
128
129 ;
130 %10 = tail call i32 (...) @f2()
131 br label %11
132
133 ;
134 %12 = tail call i32 (...) @f3() #2
135 br label %13
136
137 ;
138 ret i32 0
139 }
140 ; MERGE-LABEL: length2_4
141 ; MERGE: cmpl $3
142 ; MERGE-NEXT: jl
143 ; MERGE-NEXT: jle
144 ; NOTMERGE-LABEL: length2_4
145 ; NOTMERGE: cmpl $4
146 ; NOTMERGE-NEXT: jl
147
148 declare i32 @f1(...)
149 declare i32 @f2(...)
150 declare i32 @f3(...)
151
152 define i32 @length1_1(i32) {
153 %2 = icmp sgt i32 %0, 5
154 br i1 %2, label %3, label %5
155
156 ;
157 %4 = tail call i32 (...) @f1()
158 br label %9
159
160 ;
161 %6 = icmp eq i32 %0, 5
162 br i1 %6, label %7, label %9
163
164 ;
165 %8 = tail call i32 (...) @f2()
166 br label %9
167
168 ;
169 ret i32 0
170 }
171 ; MERGE-LABEL: length1_1
172 ; MERGE: cmpl $5
173 ; MERGE-NEXT: jl
174 ; MERGE-NEXT: jle
175 ; NOTMERGE-LABEL: length1_1
176 ; NOTMERGE: cmpl $6
177 ; NOTMERGE-NEXT: jl
0 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=sandybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
1 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=ivybridge %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
2 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=haswell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
3 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=broadwell %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
4 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skylake %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
5 ; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skx %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=MERGE
6 ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -verify-machineinstrs | FileCheck %s --check-prefix=NOTMERGE
7
8 @v1 = common dso_local local_unnamed_addr global i32 0, align 4
9 @v2 = common dso_local local_unnamed_addr global i32 0, align 4
10 @v3 = common dso_local local_unnamed_addr global i32 0, align 4
11 @v4 = common dso_local local_unnamed_addr global i32 0, align 4
12 @v5 = common dso_local local_unnamed_addr global i32 0, align 4
13 @v6 = common dso_local local_unnamed_addr global i32 0, align 4
14 @v7 = common dso_local local_unnamed_addr global i32 0, align 4
15 @v8 = common dso_local local_unnamed_addr global i32 0, align 4
16 @v9 = common dso_local local_unnamed_addr global i32 0, align 4
17 @v10 = common dso_local local_unnamed_addr global i32 0, align 4
18 @v11 = common dso_local local_unnamed_addr global i32 0, align 4
19 @v12 = common dso_local local_unnamed_addr global i32 0, align 4
20 @v13 = common dso_local local_unnamed_addr global i32 0, align 4
21 @v14 = common dso_local local_unnamed_addr global i32 0, align 4
22 @v15 = common dso_local local_unnamed_addr global i32 0, align 4
23
24 define dso_local i32 @fourcases(i32 %n) {
25 entry:
26 switch i32 %n, label %return [
27 i32 111, label %sw.bb
28 i32 222, label %sw.bb1
29 i32 3665, label %sw.bb2
30 i32 4444, label %sw.bb4
31 ]
32
33 sw.bb:
34 %0 = load i32, i32* @v1, align 4
35 br label %return
36
37 sw.bb1:
38 %1 = load i32, i32* @v2, align 4
39 %add = add nsw i32 %1, 12
40 br label %return
41
42 sw.bb2:
43 %2 = load i32, i32* @v3, align 4
44 %add3 = add nsw i32 %2, 13
45 br label %return
46
47 sw.bb4:
48 %3 = load i32, i32* @v1, align 4
49 %4 = load i32, i32* @v2, align 4
50 %add5 = add nsw i32 %4, %3
51 br label %return
52
53 return:
54 %retval.0 = phi i32 [ %add5, %sw.bb4 ], [ %add3, %sw.bb2 ], [ %add, %sw.bb1 ], [ %0, %sw.bb ], [ 0, %entry ]
55 ret i32 %retval.0
56 }
57 ; MERGE-LABEL: fourcases
58 ; MERGE: cmpl $3665
59 ; MERGE-NEXT: jg
60 ; MERGE-NEXT: jge
61 ; NOTMERGE: cmpl $3664
62 ; NOTMERGE-NEXT: jg
63
64 define dso_local i32 @fifteencases(i32) {
65 switch i32 %0, label %32 [
66 i32 -111, label %2
67 i32 -13, label %4
68 i32 25, label %6
69 i32 37, label %8
70 i32 89, label %10
71 i32 111, label %12
72 i32 213, label %14
73 i32 271, label %16
74 i32 283, label %18
75 i32 325, label %20
76 i32 327, label %22
77 i32 429, label %24
78 i32 500, label %26
79 i32 603, label %28
80 i32 605, label %30
81 ]
82
83 ;
84 %3 = load i32, i32* @v1, align 4
85 br label %32
86
87 ;
88 %5 = load i32, i32* @v2, align 4
89 br label %32
90
91 ;
92 %7 = load i32, i32* @v3, align 4
93 br label %32
94
95 ;
96 %9 = load i32, i32* @v4, align 4
97 br label %32
98
99 ;
100 %11 = load i32, i32* @v5, align 4
101 br label %32
102
103 ;
104 %13 = load i32, i32* @v6, align 4
105 br label %32
106
107 ;
108 %15 = load i32, i32* @v7, align 4
109 br label %32
110
111 ;
112 %17 = load i32, i32* @v8, align 4
113 br label %32
114
115 ;
116 %19 = load i32, i32* @v9, align 4
117 br label %32
118
119 ;
120 %21 = load i32, i32* @v10, align 4
121 br label %32
122
123 ;
124 %23 = load i32, i32* @v11, align 4
125 br label %32
126
127 ;
128 %25 = load i32, i32* @v12, align 4
129 br label %32
130
131 ;
132 %27 = load i32, i32* @v13, align 4
133 br label %32
134
135 ;
136 %29 = load i32, i32* @v14, align 4
137 br label %32
138
139 ;
140 %31 = load i32, i32* @v15, align 4
141 br label %32
142
143 ;
144 %33 = phi i32 [ %31, %30 ], [ %29, %28 ], [ %27, %26 ], [ %25, %24 ], [ %23, %22 ], [ %21, %20 ], [ %19, %18 ], [ %17, %16 ], [ %15, %14 ], [ %13, %12 ], [ %11, %10 ], [ %9, %8 ], [ %7, %6 ], [ %5, %4 ], [ %3, %2 ], [ 0, %1 ]
145 ret i32 %33
146 }
147 ; MERGE-LABEL: fifteencases
148 ; MERGE: cmpl $271
149 ; MERGE-NEXT: jg
150 ; MERGE-NEXT: jge
151 ; MERGE: cmpl $37
152 ; MERGE-NEXT: jg
153 ; MERGE-NEXT: jge
154 ; MERGE: cmpl $429
155 ; MERGE-NEXT: jg
156 ; MERGE-NEXT: jge
157 ; MERGE: cmpl $325
158 ; MERGE-NEXT: jg
159 ; MERGE-NEXT: jge
160 ; MERGE: cmpl $603
161 ; MERGE-NEXT: jg
162 ; MERGE-NEXT: jge
163 ; NOTMERGE-LABEL: fifteencases
164 ; NOTMERGE: cmpl $270
165 ; NOTMERGE-NEXT: jle
166