llvm.org GIT mirror llvm / 1872f69
[CGP] Split some critical edges coming out of indirect branches Splitting critical edges when one of the source edges is an indirectbr is hard in general (because it requires changing the memory the indirectbr reads). But if a block only has a single indirectbr predecessor (which is the common case), we can simulate splitting that edge by splitting the destination block, and retargeting the *direct* branches. This is motivated by the use of computed gotos in python 2.7: PyEval_EvalFrame() ends up using an indirect branch with ~100 successors, and passing a constant to each of those. Since MachineSink can't break indirect critical edges on demand (and doing this in MIR doesn't look feasible), this causes us to emit about ~100 defs of registers containing constants, which we in the predecessor block, where only one of those constants is used in each successor. So, at each computed goto, we needlessly spill about a 100 constants to stack. The end result is that a clang-compiled python interpreter can be about ~2.5x slower on a simple python reduction loop than a gcc-compiled interpreter. Differential Revision: https://reviews.llvm.org/D29916 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296416 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Kuperstein 3 years ago
5 changed file(s) with 477 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
1414
1515 #include "llvm/CodeGen/Passes.h"
1616 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SetVector.h"
1718 #include "llvm/ADT/SmallSet.h"
1819 #include "llvm/ADT/Statistic.h"
1920 #include "llvm/Analysis/BlockFrequencyInfo.h"
2021 #include "llvm/Analysis/BranchProbabilityInfo.h"
22 #include "llvm/Analysis/CFG.h"
2123 #include "llvm/Analysis/InstructionSimplify.h"
2224 #include "llvm/Analysis/LoopInfo.h"
2325 #include "llvm/Analysis/ProfileSummaryInfo.h"
5254 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
5355 #include "llvm/Transforms/Utils/BuildLibCalls.h"
5456 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
57 #include "llvm/Transforms/Utils/Cloning.h"
5558 #include "llvm/Transforms/Utils/Local.h"
5659 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
60 #include "llvm/Transforms/Utils/ValueMapper.h"
5761 using namespace llvm;
5862 using namespace llvm::PatternMatch;
5963
221225 unsigned CreatedInstCost);
222226 bool splitBranchCondition(Function &F);
223227 bool simplifyOffsetableRelocate(Instruction &I);
228 bool splitIndirectCriticalEdges(Function &F);
224229 };
225230 }
226231
295300 if (!DisableBranchOpts)
296301 EverMadeChange |= splitBranchCondition(F);
297302
303 // Split some critical edges where one of the sources is an indirect branch,
304 // to help generate sane code for PHIs involving such edges.
305 EverMadeChange |= splitIndirectCriticalEdges(F);
306
298307 bool MadeChange = true;
299308 while (MadeChange) {
300309 MadeChange = false;
426435 DestBB = nullptr;
427436
428437 return DestBB;
438 }
439
440 // Return the unique indirectbr predecessor of a block. This may return null
441 // even if such a predecessor exists, if it's not useful for splitting.
442 // If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
443 // predecessors of BB.
444 static BasicBlock *
445 findIBRPredecessor(BasicBlock *BB, SmallVectorImpl &OtherPreds) {
446 // If the block doesn't have any PHIs, we don't care about it, since there's
447 // no point in splitting it.
448 PHINode *PN = dyn_cast(BB->begin());
449 if (!PN)
450 return nullptr;
451
452 // Verify we have exactly one IBR predecessor.
453 // Conservatively bail out if one of the other predecessors is not a "regular"
454 // terminator (that is, not a switch or a br).
455 BasicBlock *IBB = nullptr;
456 for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
457 BasicBlock *PredBB = PN->getIncomingBlock(Pred);
458 TerminatorInst *PredTerm = PredBB->getTerminator();
459 switch (PredTerm->getOpcode()) {
460 case Instruction::IndirectBr:
461 if (IBB)
462 return nullptr;
463 IBB = PredBB;
464 break;
465 case Instruction::Br:
466 case Instruction::Switch:
467 OtherPreds.push_back(PredBB);
468 continue;
469 default:
470 return nullptr;
471 }
472 }
473
474 return IBB;
475 }
476
477 // Split critical edges where the source of the edge is an indirectbr
478 // instruction. This isn't always possible, but we can handle some easy cases.
479 // This is useful because MI is unable to split such critical edges,
480 // which means it will not be able to sink instructions along those edges.
481 // This is especially painful for indirect branches with many successors, where
482 // we end up having to prepare all outgoing values in the origin block.
483 //
484 // Our normal algorithm for splitting critical edges requires us to update
485 // the outgoing edges of the edge origin block, but for an indirectbr this
486 // is hard, since it would require finding and updating the block addresses
487 // the indirect branch uses. But if a block only has a single indirectbr
488 // predecessor, with the others being regular branches, we can do it in a
489 // different way.
490 // Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr.
491 // We can split D into D0 and D1, where D0 contains only the PHIs from D,
492 // and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
493 // create the following structure:
494 // A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
495 bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {
496 // Check whether the function has any indirectbrs, and collect which blocks
497 // they may jump to. Since most functions don't have indirect branches,
498 // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
499 SmallSetVector Targets;
500 for (auto &BB : F) {
501 auto *IBI = dyn_cast(BB.getTerminator());
502 if (!IBI)
503 continue;
504
505 for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
506 Targets.insert(IBI->getSuccessor(Succ));
507 }
508
509 if (Targets.empty())
510 return false;
511
512 bool Changed = false;
513 for (BasicBlock *Target : Targets) {
514 SmallVector OtherPreds;
515 BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
516 // If we did not found an indirectbr, or the indirectbr is the only
517 // incoming edge, this isn't the kind of edge we're looking for.
518 if (!IBRPred || OtherPreds.empty())
519 continue;
520
521 // Don't even think about ehpads/landingpads.
522 Instruction *FirstNonPHI = Target->getFirstNonPHI();
523 if (FirstNonPHI->isEHPad() || Target->isLandingPad())
524 continue;
525
526 BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
527 // It's possible Target was its own successor through an indirectbr.
528 // In this case, the indirectbr now comes from BodyBlock.
529 if (IBRPred == Target)
530 IBRPred = BodyBlock;
531
532 // At this point Target only has PHIs, and BodyBlock has the rest of the
533 // block's body. Create a copy of Target that will be used by the "direct"
534 // preds.
535 ValueToValueMapTy VMap;
536 BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
537
538 for (BasicBlock *Pred : OtherPreds)
539 Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
540
541 // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
542 // they are clones, so the number of PHIs are the same.
543 // (a) Remove the edge coming from IBRPred from the "Direct" PHI
544 // (b) Leave that as the only edge in the "Indirect" PHI.
545 // (c) Merge the two in the body block.
546 BasicBlock::iterator Indirect = Target->begin(),
547 End = Target->getFirstNonPHI()->getIterator();
548 BasicBlock::iterator Direct = DirectSucc->begin();
549 BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
550
551 assert(&*End == Target->getTerminator() &&
552 "Block was expected to only contain PHIs");
553
554 while (Indirect != End) {
555 PHINode *DirPHI = cast(Direct);
556 PHINode *IndPHI = cast(Indirect);
557
558 // Now, clean up - the direct block shouldn't get the indirect value,
559 // and vice versa.
560 DirPHI->removeIncomingValue(IBRPred);
561 Direct++;
562
563 // Advance the pointer here, to avoid invalidation issues when the old
564 // PHI is erased.
565 Indirect++;
566
567 PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
568 NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
569 IBRPred);
570
571 // Create a PHI in the body block, to merge the direct and indirect
572 // predecessors.
573 PHINode *MergePHI =
574 PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
575 MergePHI->addIncoming(NewIndPHI, Target);
576 MergePHI->addIncoming(DirPHI, DirectSucc);
577
578 IndPHI->replaceAllUsesWith(MergePHI);
579 IndPHI->eraseFromParent();
580 }
581
582 Changed = true;
583 }
584
585 return Changed;
429586 }
430587
431588 /// Eliminate blocks that contain only PHI nodes, debug info directives, and an
4646 br label %L2
4747
4848 L2: ; preds = %L3, %bb2
49 ; THUMB-LABEL: %L1.clone
4950 ; THUMB: muls
5051 %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ] ; [#uses=1]
5152 %phitmp = mul i32 %res.2, 6 ; [#uses=1]
44 entry:
55 %tmp1 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i16 0, i16 %i ; [#uses=1]
66 %gotovar.4.0 = load i8*, i8** %tmp1, align 4 ; [#uses=1]
7 ; CHECK: br .LC.0.2070(r12)
7 ; CHECK: br .LC.0.2070(r15)
88 indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
99
1010 L5: ; preds = %bb2
1616 bb2: ; preds = %entry, %bb3
1717 %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; [#uses=1]
1818 ; PIC: mtctr
19 ; PIC-NEXT: li
20 ; PIC-NEXT: li
21 ; PIC-NEXT: li
22 ; PIC-NEXT: li
2319 ; PIC-NEXT: bctr
20 ; PIC: li
21 ; PIC: b LBB
22 ; PIC: li
23 ; PIC: b LBB
24 ; PIC: li
25 ; PIC: b LBB
26 ; PIC: li
27 ; PIC: b LBB
2428 ; STATIC: mtctr
25 ; STATIC-NEXT: li
26 ; STATIC-NEXT: li
27 ; STATIC-NEXT: li
28 ; STATIC-NEXT: li
2929 ; STATIC-NEXT: bctr
30 ; STATIC: li
31 ; STATIC: b LBB
32 ; STATIC: li
33 ; STATIC: b LBB
34 ; STATIC: li
35 ; STATIC: b LBB
36 ; STATIC: li
37 ; STATIC: b LBB
3038 ; PPC64: mtctr
31 ; PPC64-NEXT: li
32 ; PPC64-NEXT: li
33 ; PPC64-NEXT: li
34 ; PPC64-NEXT: li
3539 ; PPC64-NEXT: bctr
40 ; PPC64: li
41 ; PPC64: b LBB
42 ; PPC64: li
43 ; PPC64: b LBB
44 ; PPC64: li
45 ; PPC64: b LBB
46 ; PPC64: li
47 ; PPC64: b LBB
3648 indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
3749
3850 bb3: ; preds = %entry
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt -codegenprepare -S < %s | FileCheck %s
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3 target triple = "x86_64-unknown-linux-gnu"
4
5 declare void @use(i32) local_unnamed_addr
6 declare void @useptr([2 x i8*]*) local_unnamed_addr
7
8 ; CHECK: @simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
9 @simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
10
11 ; CHECK: @multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
12 @multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
13
14 ; CHECK: @loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
15 @loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
16
17 ; CHECK: @nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
18 @nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
19
20 ; CHECK: @noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16
21 @noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16
22
23 ; Check that we break the critical edge when an jump table has only one use.
24 define void @simple(i32* nocapture readonly %p) {
25 ; CHECK-LABEL: @simple(
26 ; CHECK-NEXT: entry:
27 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
28 ; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
29 ; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
30 ; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
31 ; CHECK-NEXT: i32 0, label [[BB0_CLONE:%.*]]
32 ; CHECK-NEXT: i32 1, label [[BB1_CLONE:%.*]]
33 ; CHECK-NEXT: ]
34 ; CHECK: bb0:
35 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
36 ; CHECK: .split:
37 ; CHECK-NEXT: [[MERGE:%.*]] = phi i32* [ [[PTR:%.*]], [[BB0:%.*]] ], [ [[INCDEC_PTR]], [[BB0_CLONE]] ]
38 ; CHECK-NEXT: [[MERGE2:%.*]] = phi i32 [ 0, [[BB0]] ], [ [[INITVAL]], [[BB0_CLONE]] ]
39 ; CHECK-NEXT: tail call void @use(i32 [[MERGE2]])
40 ; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]]
41 ; CHECK: bb1:
42 ; CHECK-NEXT: br label [[DOTSPLIT3:%.*]]
43 ; CHECK: .split3:
44 ; CHECK-NEXT: [[MERGE5:%.*]] = phi i32* [ [[PTR]], [[BB1:%.*]] ], [ [[INCDEC_PTR]], [[BB1_CLONE]] ]
45 ; CHECK-NEXT: [[MERGE7:%.*]] = phi i32 [ 1, [[BB1]] ], [ [[INITVAL]], [[BB1_CLONE]] ]
46 ; CHECK-NEXT: tail call void @use(i32 [[MERGE7]])
47 ; CHECK-NEXT: br label [[INDIRECTGOTO]]
48 ; CHECK: indirectgoto:
49 ; CHECK-NEXT: [[P_ADDR_SINK:%.*]] = phi i32* [ [[MERGE5]], [[DOTSPLIT3]] ], [ [[MERGE]], [[DOTSPLIT]] ]
50 ; CHECK-NEXT: [[PTR]] = getelementptr inbounds i32, i32* [[P_ADDR_SINK]], i64 1
51 ; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[P_ADDR_SINK]], align 4
52 ; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64
53 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 [[IDX]]
54 ; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
55 ; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
56 ; CHECK: exit:
57 ; CHECK-NEXT: ret void
58 ; CHECK: bb0.clone:
59 ; CHECK-NEXT: br label [[DOTSPLIT]]
60 ; CHECK: bb1.clone:
61 ; CHECK-NEXT: br label [[DOTSPLIT3]]
62 ;
63 entry:
64 %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
65 %initval = load i32, i32* %p, align 4
66 %initop = load i32, i32* %incdec.ptr, align 4
67 switch i32 %initop, label %exit [
68 i32 0, label %bb0
69 i32 1, label %bb1
70 ]
71
72 bb0:
73 %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
74 %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %indirectgoto ]
75 tail call void @use(i32 %opcode.0)
76 br label %indirectgoto
77
78 bb1:
79 %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
80 %opcode.1 = phi i32 [ %initval, %entry ], [ 1, %indirectgoto ]
81 tail call void @use(i32 %opcode.1)
82 br label %indirectgoto
83
84 indirectgoto:
85 %p.addr.sink = phi i32* [ %p.addr.1, %bb1 ], [ %p.addr.0, %bb0 ]
86 %ptr = getelementptr inbounds i32, i32* %p.addr.sink, i64 1
87 %newp = load i32, i32* %p.addr.sink, align 4
88 %idx = sext i32 %newp to i64
89 %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 %idx
90 %newop = load i8*, i8** %arrayidx, align 8
91 indirectbr i8* %newop, [label %bb0, label %bb1]
92
93 exit:
94 ret void
95 }
96
97 ; Don't try to break critical edges when several indirectbr point to a single block
98 define void @multi(i32* nocapture readonly %p) {
99 ; CHECK-LABEL: @multi(
100 ; CHECK-NEXT: entry:
101 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
102 ; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
103 ; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
104 ; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
105 ; CHECK-NEXT: i32 0, label [[BB0:%.*]]
106 ; CHECK-NEXT: i32 1, label [[BB1:%.*]]
107 ; CHECK-NEXT: ]
108 ; CHECK: bb0:
109 ; CHECK-NEXT: [[P_ADDR_0:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY:%.*]] ], [ [[NEXT0:%.*]], [[BB0]] ], [ [[NEXT1:%.*]], [[BB1]] ]
110 ; CHECK-NEXT: [[OPCODE_0:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
111 ; CHECK-NEXT: tail call void @use(i32 [[OPCODE_0]])
112 ; CHECK-NEXT: [[NEXT0]] = getelementptr inbounds i32, i32* [[P_ADDR_0]], i64 1
113 ; CHECK-NEXT: [[NEWP0:%.*]] = load i32, i32* [[P_ADDR_0]], align 4
114 ; CHECK-NEXT: [[IDX0:%.*]] = sext i32 [[NEWP0]] to i64
115 ; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX0]]
116 ; CHECK-NEXT: [[NEWOP0:%.*]] = load i8*, i8** [[ARRAYIDX0]], align 8
117 ; CHECK-NEXT: indirectbr i8* [[NEWOP0]], [label [[BB0]], label %bb1]
118 ; CHECK: bb1:
119 ; CHECK-NEXT: [[P_ADDR_1:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY]] ], [ [[NEXT0]], [[BB0]] ], [ [[NEXT1]], [[BB1]] ]
120 ; CHECK-NEXT: [[OPCODE_1:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
121 ; CHECK-NEXT: tail call void @use(i32 [[OPCODE_1]])
122 ; CHECK-NEXT: [[NEXT1]] = getelementptr inbounds i32, i32* [[P_ADDR_1]], i64 1
123 ; CHECK-NEXT: [[NEWP1:%.*]] = load i32, i32* [[P_ADDR_1]], align 4
124 ; CHECK-NEXT: [[IDX1:%.*]] = sext i32 [[NEWP1]] to i64
125 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX1]]
126 ; CHECK-NEXT: [[NEWOP1:%.*]] = load i8*, i8** [[ARRAYIDX1]], align 8
127 ; CHECK-NEXT: indirectbr i8* [[NEWOP1]], [label [[BB0]], label %bb1]
128 ; CHECK: exit:
129 ; CHECK-NEXT: ret void
130 ;
131 entry:
132 %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
133 %initval = load i32, i32* %p, align 4
134 %initop = load i32, i32* %incdec.ptr, align 4
135 switch i32 %initop, label %exit [
136 i32 0, label %bb0
137 i32 1, label %bb1
138 ]
139
140 bb0:
141 %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
142 %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
143 tail call void @use(i32 %opcode.0)
144 %next0 = getelementptr inbounds i32, i32* %p.addr.0, i64 1
145 %newp0 = load i32, i32* %p.addr.0, align 4
146 %idx0 = sext i32 %newp0 to i64
147 %arrayidx0 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx0
148 %newop0 = load i8*, i8** %arrayidx0, align 8
149 indirectbr i8* %newop0, [label %bb0, label %bb1]
150
151 bb1:
152 %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
153 %opcode.1 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
154 tail call void @use(i32 %opcode.1)
155 %next1 = getelementptr inbounds i32, i32* %p.addr.1, i64 1
156 %newp1 = load i32, i32* %p.addr.1, align 4
157 %idx1 = sext i32 %newp1 to i64
158 %arrayidx1 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx1
159 %newop1 = load i8*, i8** %arrayidx1, align 8
160 indirectbr i8* %newop1, [label %bb0, label %bb1]
161
162 exit:
163 ret void
164 }
165
166 ; Make sure we do the right thing for cases where the indirectbr branches to
167 ; the block it terminates.
168 define void @loop(i64* nocapture readonly %p) {
169 ; CHECK-LABEL: @loop(
170 ; CHECK-NEXT: bb0.clone:
171 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
172 ; CHECK: bb0:
173 ; CHECK-NEXT: br label [[DOTSPLIT]]
174 ; CHECK: .split:
175 ; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[BB0:%.*]] ], [ 0, [[BB0_CLONE:%.*]] ]
176 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[MERGE]]
177 ; CHECK-NEXT: store i64 [[MERGE]], i64* [[TMP0]], align 4
178 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[MERGE]], 1
179 ; CHECK-NEXT: [[IDX:%.*]] = srem i64 [[MERGE]], 2
180 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 [[IDX]]
181 ; CHECK-NEXT: [[TARGET:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
182 ; CHECK-NEXT: indirectbr i8* [[TARGET]], [label [[BB0]], label %bb1]
183 ; CHECK: bb1:
184 ; CHECK-NEXT: ret void
185 ;
186 entry:
187 br label %bb0
188
189 bb0:
190 %i = phi i64 [ %i.next, %bb0 ], [ 0, %entry ]
191 %tmp0 = getelementptr inbounds i64, i64* %p, i64 %i
192 store i64 %i, i64* %tmp0, align 4
193 %i.next = add nuw nsw i64 %i, 1
194 %idx = srem i64 %i, 2
195 %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 %idx
196 %target = load i8*, i8** %arrayidx, align 8
197 indirectbr i8* %target, [label %bb0, label %bb1]
198
199 bb1:
200 ret void
201 }
202
203 ; Don't do anything for cases that contain no phis.
204 define void @nophi(i32* %p) {
205 ; CHECK-LABEL: @nophi(
206 ; CHECK-NEXT: entry:
207 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
208 ; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
209 ; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
210 ; CHECK-NEXT: i32 0, label [[BB0:%.*]]
211 ; CHECK-NEXT: i32 1, label [[BB1:%.*]]
212 ; CHECK-NEXT: ]
213 ; CHECK: bb0:
214 ; CHECK-NEXT: tail call void @use(i32 0)
215 ; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]]
216 ; CHECK: bb1:
217 ; CHECK-NEXT: tail call void @use(i32 1)
218 ; CHECK-NEXT: br label [[INDIRECTGOTO]]
219 ; CHECK: indirectgoto:
220 ; CHECK-NEXT: [[SUNKADDR:%.*]] = ptrtoint i32* [[P]] to i64
221 ; CHECK-NEXT: [[SUNKADDR1:%.*]] = add i64 [[SUNKADDR]], 4
222 ; CHECK-NEXT: [[SUNKADDR2:%.*]] = inttoptr i64 [[SUNKADDR1]] to i32*
223 ; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[SUNKADDR2]], align 4
224 ; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64
225 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 [[IDX]]
226 ; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
227 ; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
228 ; CHECK: exit:
229 ; CHECK-NEXT: ret void
230 ;
231 entry:
232 %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
233 %initop = load i32, i32* %incdec.ptr, align 4
234 switch i32 %initop, label %exit [
235 i32 0, label %bb0
236 i32 1, label %bb1
237 ]
238
239 bb0:
240 tail call void @use(i32 0) br label %indirectgoto
241
242 bb1:
243 tail call void @use(i32 1)
244 br label %indirectgoto
245
246 indirectgoto:
247 %newp = load i32, i32* %incdec.ptr, align 4
248 %idx = sext i32 %newp to i64
249 %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 %idx
250 %newop = load i8*, i8** %arrayidx, align 8
251 indirectbr i8* %newop, [label %bb0, label %bb1]
252
253 exit:
254 ret void
255 }
256
257 ; Don't do anything if the edge isn't critical.
258 define i32 @noncritical(i32 %k, i8* %p)
259 ; CHECK-LABEL: @noncritical(
260 ; CHECK-NEXT: entry:
261 ; CHECK-NEXT: [[D:%.*]] = add i32 [[K:%.*]], 1
262 ; CHECK-NEXT: indirectbr i8* [[P:%.*]], [label [[BB0:%.*]], label %bb1]
263 ; CHECK: bb0:
264 ; CHECK-NEXT: [[R0:%.*]] = sub i32 [[K]], [[D]]
265 ; CHECK-NEXT: br label [[EXIT:%.*]]
266 ; CHECK: bb1:
267 ; CHECK-NEXT: [[R1:%.*]] = sub i32 [[D]], [[K]]
268 ; CHECK-NEXT: br label [[EXIT]]
269 ; CHECK: exit:
270 ; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[R0]], [[BB0]] ], [ [[R1]], [[BB1:%.*]] ]
271 ; CHECK-NEXT: ret i32 0
272 ;
273 {
274 entry:
275 %d = add i32 %k, 1
276 indirectbr i8* %p, [label %bb0, label %bb1]
277
278 bb0:
279 %v00 = phi i32 [%k, %entry]
280 %v01 = phi i32 [%d, %entry]
281 %r0 = sub i32 %v00, %v01
282 br label %exit
283
284 bb1:
285 %v10 = phi i32 [%d, %entry]
286 %v11 = phi i32 [%k, %entry]
287 %r1 = sub i32 %v10, %v11
288 br label %exit
289
290 exit:
291 %v = phi i32 [%r0, %bb0], [%r1, %bb1]
292 ret i32 0
293 }