llvm.org GIT mirror llvm / 98ee128
[CGP] Split some critical edges coming out of indirect branches Splitting critical edges when one of the source edges is an indirectbr is hard in general (because it requires changing the memory the indirectbr reads). But if a block only has a single indirectbr predecessor (which is the common case), we can simulate splitting that edge by splitting the destination block, and retargeting the *direct* branches. This is motivated by the use of computed gotos in python 2.7: PyEval_EvalFrame() ends up using an indirect branch with ~100 successors, and passing a constant to each of those. Since MachineSink can't break indirect critical edges on demand (and doing this in MIR doesn't look feasible), this causes us to emit about ~100 defs of registers containing constants, which we in the predecessor block, where only one of those constants is used in each successor. So, at each computed goto, we needlessly spill about a 100 constants to stack. The end result is that a clang-compiled python interpreter can be about ~2.5x slower on a simple python reduction loop than a gcc-compiled interpreter. Differential Revision: https://reviews.llvm.org/D29916 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296149 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Kuperstein 3 years ago
5 changed file(s) with 435 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
1414
1515 #include "llvm/CodeGen/Passes.h"
1616 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SetVector.h"
1718 #include "llvm/ADT/SmallSet.h"
1819 #include "llvm/ADT/Statistic.h"
1920 #include "llvm/Analysis/BlockFrequencyInfo.h"
2021 #include "llvm/Analysis/BranchProbabilityInfo.h"
22 #include "llvm/Analysis/CFG.h"
2123 #include "llvm/Analysis/InstructionSimplify.h"
2224 #include "llvm/Analysis/LoopInfo.h"
2325 #include "llvm/Analysis/ProfileSummaryInfo.h"
5254 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
5355 #include "llvm/Transforms/Utils/BuildLibCalls.h"
5456 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
57 #include "llvm/Transforms/Utils/Cloning.h"
5558 #include "llvm/Transforms/Utils/Local.h"
5659 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
60 #include "llvm/Transforms/Utils/ValueMapper.h"
5761 using namespace llvm;
5862 using namespace llvm::PatternMatch;
5963
221225 unsigned CreatedInstCost);
222226 bool splitBranchCondition(Function &F);
223227 bool simplifyOffsetableRelocate(Instruction &I);
228 bool splitIndirectCriticalEdges(Function &F);
224229 };
225230 }
226231
295300 if (!DisableBranchOpts)
296301 EverMadeChange |= splitBranchCondition(F);
297302
303 // Split some critical edges where one of the sources is an indirect branch,
304 // to help generate sane code for PHIs involving such edges.
305 EverMadeChange |= splitIndirectCriticalEdges(F);
306
298307 bool MadeChange = true;
299308 while (MadeChange) {
300309 MadeChange = false;
426435 DestBB = nullptr;
427436
428437 return DestBB;
438 }
439
440 // Return the unique indirectbr predecessor of a block. This may return null
441 // even if such a predecessor exists, if it's not useful for splitting.
442 // If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
443 // predecessors of BB.
444 static BasicBlock *
445 findIBRPredecessor(BasicBlock *BB, SmallVectorImpl &OtherPreds) {
446 // If the block doesn't have any PHIs, we don't care about it, since there's
447 // no point in splitting it.
448 PHINode *PN = dyn_cast(BB->begin());
449 if (!PN)
450 return nullptr;
451
452 // Verify we have exactly one IBR predecessor.
453 // Conservatively bail out if one of the other predecessors is not a "regular"
454 // terminator (that is, not a switch or a br).
455 BasicBlock *IBB = nullptr;
456 for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
457 BasicBlock *PredBB = PN->getIncomingBlock(Pred);
458 TerminatorInst *PredTerm = PredBB->getTerminator();
459 switch (PredTerm->getOpcode()) {
460 case Instruction::IndirectBr:
461 if (IBB)
462 return nullptr;
463 IBB = PredBB;
464 break;
465 case Instruction::Br:
466 case Instruction::Switch:
467 OtherPreds.push_back(PredBB);
468 continue;
469 default:
470 return nullptr;
471 }
472 }
473
474 return IBB;
475 }
476
477 // Split critical edges where the source of the edge is an indirectbr
478 // instruction. This isn't always possible, but we can handle some easy cases.
479 // This is useful because MI is unable to split such critical edges,
480 // which means it will not be able to sink instructions along those edges.
481 // This is especially painful for indirect branches with many successors, where
482 // we end up having to prepare all outgoing values in the origin block.
483 //
484 // Our normal algorithm for splitting critical edges requires us to update
485 // the outgoing edges of the edge origin block, but for an indirectbr this
486 // is hard, since it would require finding and updating the block addresses
487 // the indirect branch uses. But if a block only has a single indirectbr
488 // predecessor, with the others being regular branches, we can do it in a
489 // different way.
490 // Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr.
491 // We can split D into D0 and D1, where D0 contains only the PHIs from D,
492 // and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
493 // create the following structure:
494 // A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
495 bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {
496 // Check whether the function has any indirectbrs, and collect which blocks
497 // they may jump to. Since most functions don't have indirect branches,
498 // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
499 SmallSetVector Targets;
500 for (auto &BB : F) {
501 auto *IBI = dyn_cast(BB.getTerminator());
502 if (!IBI)
503 continue;
504
505 for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
506 Targets.insert(IBI->getSuccessor(Succ));
507 }
508
509 if (Targets.empty())
510 return false;
511
512 bool Changed = false;
513 for (BasicBlock *Target : Targets) {
514 SmallVector OtherPreds;
515 BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
516 if (!IBRPred)
517 continue;
518
519 // Don't even think about ehpads/landingpads.
520 Instruction *FirstNonPHI = Target->getFirstNonPHI();
521 if (FirstNonPHI->isEHPad() || Target->isLandingPad())
522 continue;
523
524 BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
525 // It's possible Target was its own successor through an indirectbr.
526 // In this case, the indirectbr now comes from BodyBlock.
527 if (IBRPred == Target)
528 IBRPred = BodyBlock;
529
530 // At this point Target only has PHIs, and BodyBlock has the rest of the
531 // block's body. Create a copy of Target that will be used by the "direct"
532 // preds.
533 ValueToValueMapTy VMap;
534 BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
535
536 for (BasicBlock *Pred : OtherPreds)
537 Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
538
539 // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
540 // they are clones, so the number of PHIs are the same.
541 // (a) Remove the edge coming from IBRPred from the "Direct" PHI
542 // (b) Leave that as the only edge in the "Indirect" PHI.
543 // (c) Merge the two in the body block.
544 BasicBlock::iterator Indirect = Target->begin(),
545 End = Target->getFirstNonPHI()->getIterator();
546 BasicBlock::iterator Direct = DirectSucc->begin();
547 BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
548
549 assert(&*End == Target->getTerminator() &&
550 "Block was expected to only contain PHIs");
551
552 while (Indirect != End) {
553 PHINode *DirPHI = cast(Direct);
554 PHINode *IndPHI = cast(Indirect);
555
556 // Now, clean up - the direct block shouldn't get the indirect value,
557 // and vice versa.
558 DirPHI->removeIncomingValue(IBRPred);
559 Direct++;
560
561 // Advance the pointer here, to avoid invalidation issues when the old
562 // PHI is erased.
563 Indirect++;
564
565 PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
566 NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
567 IBRPred);
568
569 // Create a PHI in the body block, to merge the direct and indirect
570 // predecessors.
571 PHINode *MergePHI =
572 PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
573 MergePHI->addIncoming(NewIndPHI, Target);
574 MergePHI->addIncoming(DirPHI, DirectSucc);
575
576 IndPHI->replaceAllUsesWith(MergePHI);
577 IndPHI->eraseFromParent();
578 }
579
580 Changed = true;
581 }
582
583 return Changed;
429584 }
430585
431586 /// Eliminate blocks that contain only PHI nodes, debug info directives, and an
4646 br label %L2
4747
4848 L2: ; preds = %L3, %bb2
49 ; THUMB-LABEL: %L1.clone
4950 ; THUMB: muls
5051 %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ] ; [#uses=1]
5152 %phitmp = mul i32 %res.2, 6 ; [#uses=1]
44 entry:
55 %tmp1 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i16 0, i16 %i ; [#uses=1]
66 %gotovar.4.0 = load i8*, i8** %tmp1, align 4 ; [#uses=1]
7 ; CHECK: br .LC.0.2070(r12)
7 ; CHECK: br .LC.0.2070(r15)
88 indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
99
1010 L5: ; preds = %bb2
1616 bb2: ; preds = %entry, %bb3
1717 %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; [#uses=1]
1818 ; PIC: mtctr
19 ; PIC-NEXT: li
20 ; PIC-NEXT: li
21 ; PIC-NEXT: li
22 ; PIC-NEXT: li
2319 ; PIC-NEXT: bctr
20 ; PIC: li
21 ; PIC: b LBB
22 ; PIC: li
23 ; PIC: b LBB
24 ; PIC: li
25 ; PIC: b LBB
26 ; PIC: li
27 ; PIC: b LBB
2428 ; STATIC: mtctr
25 ; STATIC-NEXT: li
26 ; STATIC-NEXT: li
27 ; STATIC-NEXT: li
28 ; STATIC-NEXT: li
2929 ; STATIC-NEXT: bctr
30 ; STATIC: li
31 ; STATIC: b LBB
32 ; STATIC: li
33 ; STATIC: b LBB
34 ; STATIC: li
35 ; STATIC: b LBB
36 ; STATIC: li
37 ; STATIC: b LBB
3038 ; PPC64: mtctr
31 ; PPC64-NEXT: li
32 ; PPC64-NEXT: li
33 ; PPC64-NEXT: li
34 ; PPC64-NEXT: li
3539 ; PPC64-NEXT: bctr
40 ; PPC64: li
41 ; PPC64: b LBB
42 ; PPC64: li
43 ; PPC64: b LBB
44 ; PPC64: li
45 ; PPC64: b LBB
46 ; PPC64: li
47 ; PPC64: b LBB
3648 indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
3749
3850 bb3: ; preds = %entry
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt -codegenprepare -S < %s | FileCheck %s
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3 target triple = "x86_64-unknown-linux-gnu"
4
5 declare void @use(i32) local_unnamed_addr
6 declare void @useptr([2 x i8*]*) local_unnamed_addr
7
8 ; CHECK: @simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
9 @simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
10
11 ; CHECK: @multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
12 @multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
13
14 ; CHECK: @loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
15 @loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
16
17 ; CHECK: @nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
18 @nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
19
20 ; Check that we break the critical edge when an jump table has only one use.
21 define void @simple(i32* nocapture readonly %p) {
22 ; CHECK-LABEL: @simple(
23 ; CHECK-NEXT: entry:
24 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
25 ; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
26 ; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
27 ; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
28 ; CHECK-NEXT: i32 0, label [[BB0_CLONE:%.*]]
29 ; CHECK-NEXT: i32 1, label [[BB1_CLONE:%.*]]
30 ; CHECK-NEXT: ]
31 ; CHECK: bb0:
32 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
33 ; CHECK: .split:
34 ; CHECK-NEXT: [[MERGE:%.*]] = phi i32* [ [[PTR:%.*]], [[BB0:%.*]] ], [ [[INCDEC_PTR]], [[BB0_CLONE]] ]
35 ; CHECK-NEXT: [[MERGE2:%.*]] = phi i32 [ 0, [[BB0]] ], [ [[INITVAL]], [[BB0_CLONE]] ]
36 ; CHECK-NEXT: tail call void @use(i32 [[MERGE2]])
37 ; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]]
38 ; CHECK: bb1:
39 ; CHECK-NEXT: br label [[DOTSPLIT3:%.*]]
40 ; CHECK: .split3:
41 ; CHECK-NEXT: [[MERGE5:%.*]] = phi i32* [ [[PTR]], [[BB1:%.*]] ], [ [[INCDEC_PTR]], [[BB1_CLONE]] ]
42 ; CHECK-NEXT: [[MERGE7:%.*]] = phi i32 [ 1, [[BB1]] ], [ [[INITVAL]], [[BB1_CLONE]] ]
43 ; CHECK-NEXT: tail call void @use(i32 [[MERGE7]])
44 ; CHECK-NEXT: br label [[INDIRECTGOTO]]
45 ; CHECK: indirectgoto:
46 ; CHECK-NEXT: [[P_ADDR_SINK:%.*]] = phi i32* [ [[MERGE5]], [[DOTSPLIT3]] ], [ [[MERGE]], [[DOTSPLIT]] ]
47 ; CHECK-NEXT: [[PTR]] = getelementptr inbounds i32, i32* [[P_ADDR_SINK]], i64 1
48 ; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[P_ADDR_SINK]], align 4
49 ; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64
50 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 [[IDX]]
51 ; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
52 ; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
53 ; CHECK: exit:
54 ; CHECK-NEXT: ret void
55 ; CHECK: bb0.clone:
56 ; CHECK-NEXT: br label [[DOTSPLIT]]
57 ; CHECK: bb1.clone:
58 ; CHECK-NEXT: br label [[DOTSPLIT3]]
59 ;
60 entry:
61 %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
62 %initval = load i32, i32* %p, align 4
63 %initop = load i32, i32* %incdec.ptr, align 4
64 switch i32 %initop, label %exit [
65 i32 0, label %bb0
66 i32 1, label %bb1
67 ]
68
69 bb0:
70 %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
71 %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %indirectgoto ]
72 tail call void @use(i32 %opcode.0)
73 br label %indirectgoto
74
75 bb1:
76 %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
77 %opcode.1 = phi i32 [ %initval, %entry ], [ 1, %indirectgoto ]
78 tail call void @use(i32 %opcode.1)
79 br label %indirectgoto
80
81 indirectgoto:
82 %p.addr.sink = phi i32* [ %p.addr.1, %bb1 ], [ %p.addr.0, %bb0 ]
83 %ptr = getelementptr inbounds i32, i32* %p.addr.sink, i64 1
84 %newp = load i32, i32* %p.addr.sink, align 4
85 %idx = sext i32 %newp to i64
86 %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 %idx
87 %newop = load i8*, i8** %arrayidx, align 8
88 indirectbr i8* %newop, [label %bb0, label %bb1]
89
90 exit:
91 ret void
92 }
93
94 ; Don't try to break critical edges when several indirectbr point to a single block
95 define void @multi(i32* nocapture readonly %p) {
96 ; CHECK-LABEL: @multi(
97 ; CHECK-NEXT: entry:
98 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
99 ; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
100 ; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
101 ; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
102 ; CHECK-NEXT: i32 0, label [[BB0:%.*]]
103 ; CHECK-NEXT: i32 1, label [[BB1:%.*]]
104 ; CHECK-NEXT: ]
105 ; CHECK: bb0:
106 ; CHECK-NEXT: [[P_ADDR_0:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY:%.*]] ], [ [[NEXT0:%.*]], [[BB0]] ], [ [[NEXT1:%.*]], [[BB1]] ]
107 ; CHECK-NEXT: [[OPCODE_0:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
108 ; CHECK-NEXT: tail call void @use(i32 [[OPCODE_0]])
109 ; CHECK-NEXT: [[NEXT0]] = getelementptr inbounds i32, i32* [[P_ADDR_0]], i64 1
110 ; CHECK-NEXT: [[NEWP0:%.*]] = load i32, i32* [[P_ADDR_0]], align 4
111 ; CHECK-NEXT: [[IDX0:%.*]] = sext i32 [[NEWP0]] to i64
112 ; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX0]]
113 ; CHECK-NEXT: [[NEWOP0:%.*]] = load i8*, i8** [[ARRAYIDX0]], align 8
114 ; CHECK-NEXT: indirectbr i8* [[NEWOP0]], [label [[BB0]], label %bb1]
115 ; CHECK: bb1:
116 ; CHECK-NEXT: [[P_ADDR_1:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY]] ], [ [[NEXT0]], [[BB0]] ], [ [[NEXT1]], [[BB1]] ]
117 ; CHECK-NEXT: [[OPCODE_1:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
118 ; CHECK-NEXT: tail call void @use(i32 [[OPCODE_1]])
119 ; CHECK-NEXT: [[NEXT1]] = getelementptr inbounds i32, i32* [[P_ADDR_1]], i64 1
120 ; CHECK-NEXT: [[NEWP1:%.*]] = load i32, i32* [[P_ADDR_1]], align 4
121 ; CHECK-NEXT: [[IDX1:%.*]] = sext i32 [[NEWP1]] to i64
122 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX1]]
123 ; CHECK-NEXT: [[NEWOP1:%.*]] = load i8*, i8** [[ARRAYIDX1]], align 8
124 ; CHECK-NEXT: indirectbr i8* [[NEWOP1]], [label [[BB0]], label %bb1]
125 ; CHECK: exit:
126 ; CHECK-NEXT: ret void
127 ;
128 entry:
129 %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
130 %initval = load i32, i32* %p, align 4
131 %initop = load i32, i32* %incdec.ptr, align 4
132 switch i32 %initop, label %exit [
133 i32 0, label %bb0
134 i32 1, label %bb1
135 ]
136
137 bb0:
138 %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
139 %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
140 tail call void @use(i32 %opcode.0)
141 %next0 = getelementptr inbounds i32, i32* %p.addr.0, i64 1
142 %newp0 = load i32, i32* %p.addr.0, align 4
143 %idx0 = sext i32 %newp0 to i64
144 %arrayidx0 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx0
145 %newop0 = load i8*, i8** %arrayidx0, align 8
146 indirectbr i8* %newop0, [label %bb0, label %bb1]
147
148 bb1:
149 %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
150 %opcode.1 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
151 tail call void @use(i32 %opcode.1)
152 %next1 = getelementptr inbounds i32, i32* %p.addr.1, i64 1
153 %newp1 = load i32, i32* %p.addr.1, align 4
154 %idx1 = sext i32 %newp1 to i64
155 %arrayidx1 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx1
156 %newop1 = load i8*, i8** %arrayidx1, align 8
157 indirectbr i8* %newop1, [label %bb0, label %bb1]
158
159 exit:
160 ret void
161 }
162
163 ; Make sure we do the right thing for cases where the indirectbr branches to
164 ; the block it terminates.
165 define void @loop(i64* nocapture readonly %p) {
166 ; CHECK-LABEL: @loop(
167 ; CHECK-NEXT: bb0.clone:
168 ; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
169 ; CHECK: bb0:
170 ; CHECK-NEXT: br label [[DOTSPLIT]]
171 ; CHECK: .split:
172 ; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[BB0:%.*]] ], [ 0, [[BB0_CLONE:%.*]] ]
173 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[MERGE]]
174 ; CHECK-NEXT: store i64 [[MERGE]], i64* [[TMP0]], align 4
175 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[MERGE]], 1
176 ; CHECK-NEXT: [[IDX:%.*]] = srem i64 [[MERGE]], 2
177 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 [[IDX]]
178 ; CHECK-NEXT: [[TARGET:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
179 ; CHECK-NEXT: indirectbr i8* [[TARGET]], [label [[BB0]], label %bb1]
180 ; CHECK: bb1:
181 ; CHECK-NEXT: ret void
182 ;
183 entry:
184 br label %bb0
185
186 bb0:
187 %i = phi i64 [ %i.next, %bb0 ], [ 0, %entry ]
188 %tmp0 = getelementptr inbounds i64, i64* %p, i64 %i
189 store i64 %i, i64* %tmp0, align 4
190 %i.next = add nuw nsw i64 %i, 1
191 %idx = srem i64 %i, 2
192 %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 %idx
193 %target = load i8*, i8** %arrayidx, align 8
194 indirectbr i8* %target, [label %bb0, label %bb1]
195
196 bb1:
197 ret void
198 }
199
200 ; Don't do anything for cases that contain no phis.
201 define void @nophi(i32* %p) {
202 ; CHECK-LABEL: @nophi(
203 ; CHECK-NEXT: entry:
204 ; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
205 ; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
206 ; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
207 ; CHECK-NEXT: i32 0, label [[BB0:%.*]]
208 ; CHECK-NEXT: i32 1, label [[BB1:%.*]]
209 ; CHECK-NEXT: ]
210 ; CHECK: bb0:
211 ; CHECK-NEXT: tail call void @use(i32 0)
212 ; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]]
213 ; CHECK: bb1:
214 ; CHECK-NEXT: tail call void @use(i32 1)
215 ; CHECK-NEXT: br label [[INDIRECTGOTO]]
216 ; CHECK: indirectgoto:
217 ; CHECK-NEXT: [[SUNKADDR:%.*]] = ptrtoint i32* [[P]] to i64
218 ; CHECK-NEXT: [[SUNKADDR1:%.*]] = add i64 [[SUNKADDR]], 4
219 ; CHECK-NEXT: [[SUNKADDR2:%.*]] = inttoptr i64 [[SUNKADDR1]] to i32*
220 ; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[SUNKADDR2]], align 4
221 ; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64
222 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 [[IDX]]
223 ; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
224 ; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
225 ; CHECK: exit:
226 ; CHECK-NEXT: ret void
227 ;
228 entry:
229 %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
230 %initop = load i32, i32* %incdec.ptr, align 4
231 switch i32 %initop, label %exit [
232 i32 0, label %bb0
233 i32 1, label %bb1
234 ]
235
236 bb0:
237 tail call void @use(i32 0)
238 br label %indirectgoto
239
240 bb1:
241 tail call void @use(i32 1)
242 br label %indirectgoto
243
244 indirectgoto:
245 %newp = load i32, i32* %incdec.ptr, align 4
246 %idx = sext i32 %newp to i64
247 %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 %idx
248 %newop = load i8*, i8** %arrayidx, align 8
249 indirectbr i8* %newop, [label %bb0, label %bb1]
250
251 exit:
252 ret void
253 }