llvm.org GIT mirror llvm / 85e632d
Add a speculative execution pass Summary: This is a pass for speculative execution of instructions for simple if-then (triangle) control flow. It's aimed at GPUs, but could perhaps be used in other contexts. Enabling this pass gives us a 1.0% geomean improvement on Google benchmark suites, with one benchmark improving 33%. Credit goes to Jingyue Wu for writing an earlier version of this pass. Patched by Bjarke Roune. Test Plan: This patch adds a set of tests in test/Transforms/SpeculativeExecution/spec.ll The pass is controlled by a flag which defaults to having the pass not run. Reviewers: eliben, dberlin, meheff, jingyue, hfinkel Reviewed By: jingyue, hfinkel Subscribers: majnemer, jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D9360 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237459 91177308-0d34-0410-b5e6-96231b3b80d8 Jingyue Wu 4 years ago
10 changed file(s) with 460 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
205205 return const_cast(this)->getUniquePredecessor();
206206 }
207207
208 /// Return the successor of this block if it has a unique successor.
209 /// Otherwise return a null pointer. This method is analogous to
210 /// getUniquePredeccessor above.
208 /// \brief Return the successor of this block if it has a single successor.
209 /// Otherwise return a null pointer.
210 ///
211 /// This method is analogous to getSinglePredecessor above.
212 BasicBlock *getSingleSuccessor();
213 const BasicBlock *getSingleSuccessor() const {
214 return const_cast(this)->getSingleSuccessor();
215 }
216
217 /// \brief Return the successor of this block if it has a unique successor.
218 /// Otherwise return a null pointer.
219 ///
220 /// This method is analogous to getUniquePredecessor above.
211221 BasicBlock *getUniqueSuccessor();
212222 const BasicBlock *getUniqueSuccessor() const {
213223 return const_cast(this)->getUniqueSuccessor();
255255 void initializeSeparateConstOffsetFromGEPPass(PassRegistry &);
256256 void initializeSlotIndexesPass(PassRegistry&);
257257 void initializeSpillPlacementPass(PassRegistry&);
258 void initializeSpeculativeExecutionPass(PassRegistry&);
258259 void initializeStackProtectorPass(PassRegistry&);
259260 void initializeStackColoringPass(PassRegistry&);
260261 void initializeStackSlotColoringPass(PassRegistry&);
169169 (void) llvm::createPartiallyInlineLibCallsPass();
170170 (void) llvm::createScalarizerPass();
171171 (void) llvm::createSeparateConstOffsetFromGEPPass();
172 (void) llvm::createSpeculativeExecutionPass();
172173 (void) llvm::createRewriteSymbolsPass();
173174 (void) llvm::createStraightLineStrengthReducePass();
174175 (void) llvm::createMemDerefPrinter();
422422
423423 //===----------------------------------------------------------------------===//
424424 //
425 // SpeculativeExecution - Aggressively hoist instructions to enable
426 // speculative execution on targets where branches are expensive.
427 //
428 FunctionPass *createSpeculativeExecutionPass();
429
430 //===----------------------------------------------------------------------===//
431 //
425432 // LoadCombine - Combine loads into bigger loads.
426433 //
427434 BasicBlockPass *createLoadCombinePass();
235235 // This is OK.
236236 }
237237 return PredBB;
238 }
239
240 BasicBlock *BasicBlock::getSingleSuccessor() {
241 succ_iterator SI = succ_begin(this), E = succ_end(this);
242 if (SI == E) return nullptr; // no successors
243 BasicBlock *TheSucc = *SI;
244 ++SI;
245 return (SI == E) ? TheSucc : nullptr /* multiple successors */;
238246 }
239247
240248 BasicBlock *BasicBlock::getUniqueSuccessor() {
230230 MPM.add(createSROAPass(/*RequiresDomTree*/ false));
231231 else
232232 MPM.add(createScalarReplAggregatesPass(-1, false));
233
233234 MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
234235 MPM.add(createJumpThreadingPass()); // Thread jumps.
235236 MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
4444 SeparateConstOffsetFromGEP.cpp
4545 SimplifyCFGPass.cpp
4646 Sink.cpp
47 SpeculativeExecution.cpp
4748 StraightLineStrengthReduce.cpp
4849 StructurizeCFG.cpp
4950 TailRecursionElimination.cpp
7373 initializeSinkingPass(Registry);
7474 initializeTailCallElimPass(Registry);
7575 initializeSeparateConstOffsetFromGEPPass(Registry);
76 initializeSpeculativeExecutionPass(Registry);
7677 initializeStraightLineStrengthReducePass(Registry);
7778 initializeLoadCombinePass(Registry);
7879 initializePlaceBackedgeSafepointsImplPass(Registry);
0 //===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass hoists instructions to enable speculative execution on
10 // targets where branches are expensive. This is aimed at GPUs. It
11 // currently works on simple if-then and if-then-else
12 // patterns.
13 //
14 // Removing branches is not the only motivation for this
15 // pass. E.g. consider this code and assume that there is no
16 // addressing mode for multiplying by sizeof(*a):
17 //
18 // if (b > 0)
19 // c = a[i + 1]
20 // if (d > 0)
21 // e = a[i + 2]
22 //
23 // turns into
24 //
25 // p = &a[i + 1];
26 // if (b > 0)
27 // c = *p;
28 // q = &a[i + 2];
29 // if (d > 0)
30 // e = *q;
31 //
32 // which could later be optimized to
33 //
34 // r = &a[i];
35 // if (b > 0)
36 // c = r[1];
37 // if (d > 0)
38 // e = r[2];
39 //
40 // Later passes sink back much of the speculated code that did not enable
41 // further optimization.
42 //
43 //===----------------------------------------------------------------------===//
44
45 #include "llvm/ADT/SmallSet.h"
46 #include "llvm/Analysis/TargetTransformInfo.h"
47 #include "llvm/Analysis/ValueTracking.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/Operator.h"
51 #include "llvm/Support/CommandLine.h"
52 #include "llvm/Support/Debug.h"
53
54 using namespace llvm;
55
56 #define DEBUG_TYPE "speculative-execution"
57
58 // The risk that speculation will not pay off increases with the
59 // number of instructions speculated, so we put a limit on that.
60 static cl::opt SpecExecMaxSpeculationCost(
61 "spec-exec-max-speculation-cost", cl::init(7), cl::Hidden,
62 cl::desc("Speculative execution is not applied to basic blocks where "
63 "the cost of the instructions to speculatively execute "
64 "exceeds this limit."));
65
66 // Speculating just a few instructions from a larger block tends not
67 // to be profitable and this limit prevents that. A reason for that is
68 // that small basic blocks are more likely to be candidates for
69 // further optimization.
70 static cl::opt SpecExecMaxNotHoisted(
71 "spec-exec-max-not-hoisted", cl::init(5), cl::Hidden,
72 cl::desc("Speculative execution is not applied to basic blocks where the "
73 "number of instructions that would not be speculatively executed "
74 "exceeds this limit."));
75
76 class SpeculativeExecution : public FunctionPass {
77 public:
78 static char ID;
79 SpeculativeExecution(): FunctionPass(ID) {}
80
81 void getAnalysisUsage(AnalysisUsage &AU) const override;
82 bool runOnFunction(Function &F) override;
83
84 private:
85 bool runOnBasicBlock(BasicBlock &B);
86 bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock);
87
88 const TargetTransformInfo *TTI = nullptr;
89 };
90
91 char SpeculativeExecution::ID = 0;
92 INITIALIZE_PASS_BEGIN(SpeculativeExecution, "speculative-execution",
93 "Speculatively execute instructions", false, false)
94 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
95 INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution",
96 "Speculatively execute instructions", false, false)
97
98 void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const {
99 AU.addRequired();
100 }
101
102 bool SpeculativeExecution::runOnFunction(Function &F) {
103 if (skipOptnoneFunction(F))
104 return false;
105
106 TTI = &getAnalysis().getTTI(F);
107
108 bool Changed = false;
109 for (auto& B : F) {
110 Changed |= runOnBasicBlock(B);
111 }
112 return Changed;
113 }
114
115 bool SpeculativeExecution::runOnBasicBlock(BasicBlock &B) {
116 BranchInst *BI = dyn_cast(B.getTerminator());
117 if (BI == nullptr)
118 return false;
119
120 if (BI->getNumSuccessors() != 2)
121 return false;
122 BasicBlock &Succ0 = *BI->getSuccessor(0);
123 BasicBlock &Succ1 = *BI->getSuccessor(1);
124
125 if (&B == &Succ0 || &B == &Succ1 || &Succ0 == &Succ1) {
126 return false;
127 }
128
129 // Hoist from if-then (triangle).
130 if (Succ0.getSinglePredecessor() != nullptr &&
131 Succ0.getSingleSuccessor() == &Succ1) {
132 return considerHoistingFromTo(Succ0, B);
133 }
134
135 // Hoist from if-else (triangle).
136 if (Succ1.getSinglePredecessor() != nullptr &&
137 Succ1.getSingleSuccessor() == &Succ0) {
138 return considerHoistingFromTo(Succ1, B);
139 }
140
141 // Hoist from if-then-else (diamond), but only if it is equivalent to
142 // an if-else or if-then due to one of the branches doing nothing.
143 if (Succ0.getSinglePredecessor() != nullptr &&
144 Succ1.getSinglePredecessor() != nullptr &&
145 Succ1.getSingleSuccessor() != nullptr &&
146 Succ1.getSingleSuccessor() != &B &&
147 Succ1.getSingleSuccessor() == Succ0.getSingleSuccessor()) {
148 // If a block has only one instruction, then that is a terminator
149 // instruction so that the block does nothing. This does happen.
150 if (Succ1.size() == 1) // equivalent to if-then
151 return considerHoistingFromTo(Succ0, B);
152 if (Succ0.size() == 1) // equivalent to if-else
153 return considerHoistingFromTo(Succ1, B);
154 }
155
156 return false;
157 }
158
159 static unsigned ComputeSpeculationCost(const Instruction *I,
160 const TargetTransformInfo &TTI) {
161 switch (Operator::getOpcode(I)) {
162 case Instruction::GetElementPtr:
163 case Instruction::Add:
164 case Instruction::Mul:
165 case Instruction::And:
166 case Instruction::Or:
167 case Instruction::Select:
168 case Instruction::Shl:
169 case Instruction::Sub:
170 case Instruction::LShr:
171 case Instruction::AShr:
172 case Instruction::Xor:
173 case Instruction::ZExt:
174 case Instruction::SExt:
175 return TTI.getUserCost(I);
176
177 default:
178 return UINT_MAX; // Disallow anything not whitelisted.
179 }
180 }
181
182 bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock,
183 BasicBlock &ToBlock) {
184 SmallSet NotHoisted;
185 const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](User *U) {
186 for (Value* V : U->operand_values()) {
187 if (Instruction *I = dyn_cast(V)) {
188 if (NotHoisted.count(I) > 0)
189 return false;
190 }
191 }
192 return true;
193 };
194
195 unsigned TotalSpeculationCost = 0;
196 for (auto& I : FromBlock) {
197 const unsigned Cost = ComputeSpeculationCost(&I, *TTI);
198 if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) &&
199 AllPrecedingUsesFromBlockHoisted(&I)) {
200 TotalSpeculationCost += Cost;
201 if (TotalSpeculationCost > SpecExecMaxSpeculationCost)
202 return false; // too much to hoist
203 } else {
204 NotHoisted.insert(&I);
205 if (NotHoisted.size() > SpecExecMaxNotHoisted)
206 return false; // too much left behind
207 }
208 }
209
210 if (TotalSpeculationCost == 0)
211 return false; // nothing to hoist
212
213 for (auto I = FromBlock.begin(); I != FromBlock.end();) {
214 // We have to increment I before moving Current as moving Current
215 // changes the list that I is iterating through.
216 auto Current = I;
217 ++I;
218 if (!NotHoisted.count(Current)) {
219 Current->moveBefore(ToBlock.getTerminator());
220 }
221 }
222 return true;
223 }
224
225 namespace llvm {
226
227 FunctionPass *createSpeculativeExecutionPass() {
228 return new SpeculativeExecution();
229 }
230
231 } // namespace llvm
0 ; RUN: opt < %s -S -speculative-execution \
1 ; RUN: -spec-exec-max-speculation-cost 4 -spec-exec-max-not-hoisted 3 \
2 ; RUN: | FileCheck %s
3
4 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
5
6 ; Hoist in if-then pattern.
7 define void @ifThen() {
8 ; CHECK-LABEL: @ifThen(
9 ; CHECK: %x = add i32 2, 3
10 ; CHECK: br i1 true
11 br i1 true, label %a, label %b
12 ; CHECK: a:
13 a:
14 %x = add i32 2, 3
15 ; CHECK: br label
16 br label %b
17 ; CHECK: b:
18 b:
19 ; CHECK: ret void
20 ret void
21 }
22
23 ; Hoist in if-else pattern.
24 define void @ifElse() {
25 ; CHECK-LABEL: @ifElse(
26 ; CHECK: %x = add i32 2, 3
27 ; CHECK: br i1 true
28 br i1 true, label %b, label %a
29 ; CHECK: a:
30 a:
31 %x = add i32 2, 3
32 ; CHECK: br label
33 br label %b
34 ; CHECK: b:
35 b:
36 ; CHECK: ret void
37 ret void
38 }
39
40 ; Hoist in if-then-else pattern if it is equivalent to if-then.
41 define void @ifElseThenAsIfThen() {
42 ; CHECK-LABEL: @ifElseThenAsIfThen(
43 ; CHECK: %x = add i32 2, 3
44 ; CHECK: br
45 br i1 true, label %a, label %b
46 ; CHECK: a:
47 a:
48 %x = add i32 2, 3
49 ; CHECK: br label
50 br label %c
51 ; CHECK: b:
52 b:
53 br label %c
54 ; CHECK: c
55 c:
56 ret void
57 }
58
59 ; Hoist in if-then-else pattern if it is equivalent to if-else.
60 define void @ifElseThenAsIfElse() {
61 ; CHECK-LABEL: @ifElseThenAsIfElse(
62 ; CHECK: %x = add i32 2, 3
63 ; CHECK: br
64 br i1 true, label %b, label %a
65 ; CHECK: a:
66 a:
67 %x = add i32 2, 3
68 ; CHECK: br label
69 br label %c
70 ; CHECK: b:
71 b:
72 br label %c
73 ; CHECK: c
74 c:
75 ret void
76 }
77
78 ; Do not hoist if-then-else pattern if it is not equivalent to if-then
79 ; or if-else.
80 define void @ifElseThen() {
81 ; CHECK-LABEL: @ifElseThen(
82 ; CHECK: br
83 br i1 true, label %a, label %b
84 ; CHECK: a:
85 a:
86 ; CHECK: %x = add
87 %x = add i32 2, 3
88 ; CHECK: br label
89 br label %c
90 ; CHECK: b:
91 b:
92 ; CHECK: %y = add
93 %y = add i32 2, 3
94 br label %c
95 ; CHECK: c
96 c:
97 ret void
98 }
99
100 ; Do not hoist loads and do not hoist an instruction past a definition of
101 ; an operand.
102 define void @doNotHoistPastDef() {
103 ; CHECK-LABEL: @doNotHoistPastDef(
104 br i1 true, label %b, label %a
105 ; CHECK-NOT: load
106 ; CHECK-NOT: add
107 ; CHECK: a:
108 a:
109 ; CHECK: %def = load
110 %def = load i32, i32* null
111 ; CHECK: %use = add
112 %use = add i32 %def, 0
113 br label %b
114 ; CHECK: b:
115 b:
116 ret void
117 }
118
119 ; Case with nothing to speculate.
120 define void @nothingToSpeculate() {
121 ; CHECK-LABEL: @nothingToSpeculate(
122 br i1 true, label %b, label %a
123 ; CHECK: a:
124 a:
125 ; CHECK: %def = load
126 %def = load i32, i32* null
127 br label %b
128 ; CHECK: b:
129 b:
130 ret void
131 }
132
133 ; Still hoist if an operand is defined before the block or is itself hoisted.
134 define void @hoistIfNotPastDef() {
135 ; CHECK-LABEL: @hoistIfNotPastDef(
136 ; CHECK: %x = load
137 %x = load i32, i32* null
138 ; CHECK: %y = add i32 %x, 1
139 ; CHECK: %z = add i32 %y, 1
140 ; CHECK: br
141 br i1 true, label %b, label %a
142 ; CHECK: a:
143 a:
144 %y = add i32 %x, 1
145 %z = add i32 %y, 1
146 br label %b
147 ; CHECK: b:
148 b:
149 ret void
150 }
151
152 ; Do not hoist if the speculation cost is too high.
153 define void @costTooHigh() {
154 ; CHECK-LABEL: @costTooHigh(
155 ; CHECK: br
156 br i1 true, label %b, label %a
157 ; CHECK: a:
158 a:
159 ; CHECK: %r1 = add
160 %r1 = add i32 1, 1
161 ; CHECK: %r2 = add
162 %r2 = add i32 1, 1
163 ; CHECK: %r3 = add
164 %r3 = add i32 1, 1
165 ; CHECK: %r4 = add
166 %r4 = add i32 1, 1
167 ; CHECK: %r5 = add
168 %r5 = add i32 1, 1
169 br label %b
170 ; CHECK: b:
171 b:
172 ret void
173 }
174
175 ; Do not hoist if too many instructions are left behind.
176 define void @tooMuchLeftBehind() {
177 ; CHECK-LABEL: @tooMuchLeftBehind(
178 ; CHECK: br
179 br i1 true, label %b, label %a
180 ; CHECK: a:
181 a:
182 ; CHECK: %x = load
183 %x = load i32, i32* null
184 ; CHECK: %r1 = add
185 %r1 = add i32 %x, 1
186 ; CHECK: %r2 = add
187 %r2 = add i32 %x, 1
188 ; CHECK: %r3 = add
189 %r3 = add i32 %x, 1
190 br label %b
191 ; CHECK: b:
192 b:
193 ret void
194 }