llvm.org GIT mirror llvm / d2c4dd2
[ORCv2] - New Speculate Query Implementation Summary: This patch introduces, SequenceBBQuery - new heuristic to find likely next callable functions it tries to find the blocks with calls in order of execution sequence of Blocks. It still uses BlockFrequencyAnalysis to find high frequency blocks. For a handful of hottest blocks (plan to customize), the algorithm traverse and discovered the caller blocks along the way to Entry Basic Block and Exit Basic Block. It uses Block Hint, to stop traversing the already visited blocks in both direction. It implicitly assumes that once the block is visited during discovering entry or exit nodes, revisiting them again does not add much. It also branch probability info (cached result) to traverse only hot edges (planned to customize) from hot blocks. Without BPI, the algorithm mostly return's all the blocks in the CFG with calls. It also changes the heuristic queries, so they don't maintain states. Hence it is safe to call from multiple threads. It also implements, new instrumentation to avoid jumping into JIT on every call to the function with the help _orc_speculate.decision.block and _orc_speculate.block. "Speculator Registration Mechanism is also changed" - kudos to @lhames Open to review, mostly looking to change implementation of SequeceBBQuery heuristics with good data structure choices. Reviewers: lhames, dblaikie Reviewed By: lhames Subscribers: mgorny, hiraditya, mgrang, llvm-commits, lhames Tags: #speculative_compilation_in_orc, #llvm Differential Revision: https://reviews.llvm.org/D66399 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370092 91177308-0d34-0410-b5e6-96231b3b80d8 Praveen Velliengiri a month ago
5 changed file(s) with 384 addition(s) and 92 deletion(s). Raw diff Collapse all Expand all
118118 auto Work = [SharedMU, &JD]() { SharedMU->doMaterialize(JD); };
119119 CompileThreads.async(std::move(Work));
120120 });
121 JITEvaluatedSymbol SpeculatorSymbol(JITTargetAddress(&S),
122 JITSymbolFlags::Exported);
123 ExitOnErr(this->ES->getMainJITDylib().define(
124 absoluteSymbols({{Mangle("__orc_speculator"), SpeculatorSymbol}})));
121 ExitOnErr(S.addSpeculationRuntime(this->ES->getMainJITDylib(), Mangle));
125122 LocalCXXRuntimeOverrides CXXRuntimeoverrides;
126123 ExitOnErr(CXXRuntimeoverrides.enable(this->ES->getMainJITDylib(), Mangle));
127124 }
66 //===----------------------------------------------------------------------===//
77 // \file
88 /// Contains the Analyses and Result Interpretation to select likely functions
9 /// to Speculatively compile before they are called. [Experimentation]
9 /// to Speculatively compile before they are called. [Purely Experimentation]
1010 //===----------------------------------------------------------------------===//
1111
1212 #ifndef LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H
1313 #define LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H
1414
15 #include "llvm/Analysis/BranchProbabilityInfo.h"
1516 #include "llvm/ExecutionEngine/Orc/Core.h"
1617 #include "llvm/ExecutionEngine/Orc/Speculation.h"
1718
2122
2223 namespace orc {
2324
25 // Provides common code.
26 class SpeculateQuery {
27 protected:
28 void findCalles(const BasicBlock *, DenseSet &);
29 bool isStraightLine(const Function &F);
30
31 public:
32 using ResultTy = Optional>>;
33 };
34
2435 // Direct calls in high frequency basic blocks are extracted.
25 class BlockFreqQuery {
26 private:
27 void findCalles(const BasicBlock *, DenseSet &);
36 class BlockFreqQuery : public SpeculateQuery {
2837 size_t numBBToGet(size_t);
2938
3039 public:
31 using ResultTy = Optional>>;
40 // Find likely next executables based on IR Block Frequency
41 ResultTy operator()(Function &F);
42 };
3243
33 // Find likely next executables based on IR Block Frequency
34 ResultTy operator()(Function &F, FunctionAnalysisManager &FAM);
44 // This Query generates a sequence of basic blocks which follows the order of
45 // execution.
46 // A handful of BB with higher block frequencies are taken, then path to entry
47 // and end BB are discovered by traversing up & down the CFG.
48 class SequenceBBQuery : public SpeculateQuery {
49 struct WalkDirection {
50 bool Upward = true, Downward = true;
51 // the block associated contain a call
52 bool CallerBlock = false;
53 };
54
55 public:
56 using VisitedBlocksInfoTy = DenseMap;
57 using BlockListTy = SmallVector;
58 using BackEdgesInfoTy =
59 SmallVector, 8>;
60 using BlockFreqInfoTy =
61 SmallVector, 8>;
62
63 private:
64 std::size_t getHottestBlocks(std::size_t TotalBlocks);
65 BlockListTy rearrangeBB(const Function &, const BlockListTy &);
66 BlockListTy queryCFG(Function &, const BlockListTy &);
67 void traverseToEntryBlock(const BasicBlock *, const BlockListTy &,
68 const BackEdgesInfoTy &,
69 const BranchProbabilityInfo *,
70 VisitedBlocksInfoTy &);
71 void traverseToExitBlock(const BasicBlock *, const BlockListTy &,
72 const BackEdgesInfoTy &,
73 const BranchProbabilityInfo *,
74 VisitedBlocksInfoTy &);
75
76 public:
77 ResultTy operator()(Function &F);
3578 };
3679
3780 } // namespace orc
1919 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
2020 #include "llvm/IR/PassManager.h"
2121 #include "llvm/Passes/PassBuilder.h"
22 #include "llvm/Support/Debug.h"
2223
2324 #include
2425 #include
8081 {
8182 std::lock_guard Lockit(ConcurrentAccess);
8283 auto It = GlobalSpecMap.find(FAddr);
83 // Kill this when jump on first call instrumentation is in place;
84 auto Iv = AlreadyExecuted.insert(FAddr);
85 if (It == GlobalSpecMap.end() || Iv.second == false)
84 if (It == GlobalSpecMap.end())
8685 return;
87 else
88 CandidateSet = It->getSecond();
89 }
90
91 // Try to distinguish pre-compiled symbols!
86 CandidateSet = It->getSecond();
87 }
88
89 SymbolDependenceMap SpeculativeLookUpImpls;
90
9291 for (auto &Callee : CandidateSet) {
9392 auto ImplSymbol = AliaseeImplTable.getImplFor(Callee);
93 // try to distinguish already compiled & library symbols
9494 if (!ImplSymbol.hasValue())
9595 continue;
9696 const auto &ImplSymbolName = ImplSymbol.getPointer()->first;
97 auto *ImplJD = ImplSymbol.getPointer()->second;
98 ES.lookup(JITDylibSearchList({{ImplJD, true}}),
99 SymbolNameSet({ImplSymbolName}), SymbolState::Ready,
97 JITDylib *ImplJD = ImplSymbol.getPointer()->second;
98 auto &SymbolsInJD = SpeculativeLookUpImpls[ImplJD];
99 SymbolsInJD.insert(ImplSymbolName);
100 }
101
102 DEBUG_WITH_TYPE("orc", for (auto &I
103 : SpeculativeLookUpImpls) {
104 llvm::dbgs() << "\n In " << I.first->getName() << " JITDylib ";
105 for (auto &N : I.second)
106 llvm::dbgs() << "\n Likely Symbol : " << N;
107 });
108
109 // for a given symbol, there may be no symbol qualified for speculatively
110 // compile try to fix this before jumping to this code if possible.
111 for (auto &LookupPair : SpeculativeLookUpImpls)
112 ES.lookup(JITDylibSearchList({{LookupPair.first, true}}),
113 LookupPair.second, SymbolState::Ready,
100114 [this](Expected Result) {
101115 if (auto Err = Result.takeError())
102116 ES.reportError(std::move(Err));
103117 },
104118 NoDependenciesToRegister);
105 }
106119 }
107120
108121 public:
112125 Speculator(Speculator &&) = delete;
113126 Speculator &operator=(const Speculator &) = delete;
114127 Speculator &operator=(Speculator &&) = delete;
115 ~Speculator() {}
128
129 /// Define symbols for this Speculator object (__orc_speculator) and the
130 /// speculation runtime entry point symbol (__orc_speculate_for) in the
131 /// given JITDylib.
132 Error addSpeculationRuntime(JITDylib &JD, MangleAndInterner &Mangle);
116133
117134 // Speculatively compile likely functions for the given Stub Address.
118135 // destination of __orc_speculate_for jump
141158 ExecutionSession &getES() { return ES; }
142159
143160 private:
161 static void speculateForEntryPoint(Speculator *Ptr, uint64_t StubId);
144162 std::mutex ConcurrentAccess;
145163 ImplSymbolMap &AliaseeImplTable;
146164 ExecutionSession &ES;
147 DenseSet AlreadyExecuted;
148165 StubAddrLikelies GlobalSpecMap;
149166 };
150 // replace DenseMap with Pair
167
151168 class IRSpeculationLayer : public IRLayer {
152169 public:
153170 using IRlikiesStrRef = Optional>>;
154 using ResultEval =
155 std::function &)>;
171 using ResultEval = std::function &)>;
156172 using TargetAndLikelies = DenseMap;
157173
158174 IRSpeculationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer,
159175 Speculator &Spec, MangleAndInterner &Mangle,
160176 ResultEval Interpreter)
161177 : IRLayer(ES), NextLayer(BaseLayer), S(Spec), Mangle(Mangle),
162 QueryAnalysis(Interpreter) {
163 PB.registerFunctionAnalyses(FAM);
164 }
178 QueryAnalysis(Interpreter) {}
165179
166180 void emit(MaterializationResponsibility R, ThreadSafeModule TSM);
167181
183197 IRCompileLayer &NextLayer;
184198 Speculator &S;
185199 MangleAndInterner &Mangle;
186 PassBuilder PB;
187 FunctionAnalysisManager FAM;
188200 ResultEval QueryAnalysis;
189201 };
190202
191 // Runtime Function Interface
192 extern "C" {
193 void __orc_speculate_for(Speculator *, uint64_t stub_id);
194 }
195
196203 } // namespace orc
197204 } // namespace llvm
198205
66 //===----------------------------------------------------------------------===//
77
88 #include "llvm/ExecutionEngine/Orc/SpeculateAnalyses.h"
9 #include "llvm/ADT/ArrayRef.h"
910 #include "llvm/ADT/DenseMap.h"
1011 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/SmallPtrSet.h"
1113 #include "llvm/ADT/SmallVector.h"
1214 #include "llvm/Analysis/BlockFrequencyInfo.h"
15 #include "llvm/Analysis/BranchProbabilityInfo.h"
16 #include "llvm/Analysis/CFG.h"
17 #include "llvm/IR/PassManager.h"
18 #include "llvm/Passes/PassBuilder.h"
19 #include "llvm/Support/ErrorHandling.h"
20
21 #include
1322
1423 namespace {
1524 using namespace llvm;
16 std::vector findBBwithCalls(const Function &F,
17 bool IndirectCall = false) {
18 std::vector BBs;
25 SmallVector findBBwithCalls(const Function &F,
26 bool IndirectCall = false) {
27 SmallVector BBs;
1928
2029 auto findCallInst = [&IndirectCall](const Instruction &I) {
21 if (auto Call = dyn_cast(&I)) {
22 if (Call->isIndirectCall())
23 return IndirectCall;
24 else
25 return true;
26 } else
30 if (auto Call = dyn_cast(&I))
31 return Call->isIndirectCall() ? IndirectCall : true;
32 else
2733 return false;
2834 };
2935 for (auto &BB : F)
3743
3844 // Implementations of Queries shouldn't need to lock the resources
3945 // such as LLVMContext, each argument (function) has a non-shared LLVMContext
46 // Plus, if Queries contain states necessary locking scheme should be provided.
4047 namespace llvm {
4148 namespace orc {
4249
4350 // Collect direct calls only
44 void BlockFreqQuery::findCalles(const BasicBlock *BB,
51 void SpeculateQuery::findCalles(const BasicBlock *BB,
4552 DenseSet &CallesNames) {
4653 assert(BB != nullptr && "Traversing Null BB to find calls?");
4754
5865 getCalledFunction(II);
5966 }
6067
61 // blind calculation
68 bool SpeculateQuery::isStraightLine(const Function &F) {
69 return llvm::all_of(F.getBasicBlockList(), [](const BasicBlock &BB) {
70 return BB.getSingleSuccessor() != nullptr;
71 });
72 }
73
74 // BlockFreqQuery Implementations
75
6276 size_t BlockFreqQuery::numBBToGet(size_t numBB) {
6377 // small CFG
6478 if (numBB < 4)
7084 return (numBB / 2) + (numBB / 4);
7185 }
7286
73 BlockFreqQuery::ResultTy BlockFreqQuery::
74 operator()(Function &F, FunctionAnalysisManager &FAM) {
87 BlockFreqQuery::ResultTy BlockFreqQuery::operator()(Function &F) {
7588 DenseMap> CallerAndCalles;
7689 DenseSet Calles;
7790 SmallVector, 8> BBFreqs;
91
92 PassBuilder PB;
93 FunctionAnalysisManager FAM;
94 PB.registerFunctionAnalyses(FAM);
7895
7996 auto IBBs = findBBwithCalls(F);
8097
106123
107124 return CallerAndCalles;
108125 }
126
127 // SequenceBBQuery Implementation
128 std::size_t SequenceBBQuery::getHottestBlocks(std::size_t TotalBlocks) {
129 if (TotalBlocks == 1)
130 return TotalBlocks;
131 return TotalBlocks / 2;
132 }
133
134 // FIXME : find good implementation.
135 SequenceBBQuery::BlockListTy
136 SequenceBBQuery::rearrangeBB(const Function &F, const BlockListTy &BBList) {
137 BlockListTy RearrangedBBSet;
138
139 for (auto &Block : F.getBasicBlockList())
140 if (llvm::is_contained(BBList, &Block))
141 RearrangedBBSet.push_back(&Block);
142
143 assert(RearrangedBBSet.size() == BBList.size() &&
144 "BasicBlock missing while rearranging?");
145 return RearrangedBBSet;
146 }
147
148 void SequenceBBQuery::traverseToEntryBlock(const BasicBlock *AtBB,
149 const BlockListTy &CallerBlocks,
150 const BackEdgesInfoTy &BackEdgesInfo,
151 const BranchProbabilityInfo *BPI,
152 VisitedBlocksInfoTy &VisitedBlocks) {
153 auto Itr = VisitedBlocks.find(AtBB);
154 if (Itr != VisitedBlocks.end()) { // already visited.
155 if (!Itr->second.Upward)
156 return;
157 Itr->second.Upward = false;
158 } else {
159 // Create hint for newly discoverd blocks.
160 WalkDirection BlockHint;
161 BlockHint.Upward = false;
162 // FIXME: Expensive Check
163 if (llvm::is_contained(CallerBlocks, AtBB))
164 BlockHint.CallerBlock = true;
165 VisitedBlocks.insert(std::make_pair(AtBB, BlockHint));
166 }
167
168 const_pred_iterator PIt = pred_begin(AtBB), EIt = pred_end(AtBB);
169 // Move this check to top, when we have code setup to launch speculative
170 // compiles for function in entry BB, this triggers the speculative compiles
171 // before running the program.
172 if (PIt == EIt) // No Preds.
173 return;
174
175 DenseSet PredSkipNodes;
176
177 // Since we are checking for predecessor's backedges, this Block
178 // occurs in second position.
179 for (auto &I : BackEdgesInfo)
180 if (I.second == AtBB)
181 PredSkipNodes.insert(I.first);
182
183 // Skip predecessors which source of back-edges.
184 for (; PIt != EIt; ++PIt)
185 // checking EdgeHotness is cheaper
186 if (BPI->isEdgeHot(*PIt, AtBB) && !PredSkipNodes.count(*PIt))
187 traverseToEntryBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI,
188 VisitedBlocks);
189 }
190
191 void SequenceBBQuery::traverseToExitBlock(const BasicBlock *AtBB,
192 const BlockListTy &CallerBlocks,
193 const BackEdgesInfoTy &BackEdgesInfo,
194 const BranchProbabilityInfo *BPI,
195 VisitedBlocksInfoTy &VisitedBlocks) {
196 auto Itr = VisitedBlocks.find(AtBB);
197 if (Itr != VisitedBlocks.end()) { // already visited.
198 if (!Itr->second.Downward)
199 return;
200 Itr->second.Downward = false;
201 } else {
202 // Create hint for newly discoverd blocks.
203 WalkDirection BlockHint;
204 BlockHint.Downward = false;
205 // FIXME: Expensive Check
206 if (llvm::is_contained(CallerBlocks, AtBB))
207 BlockHint.CallerBlock = true;
208 VisitedBlocks.insert(std::make_pair(AtBB, BlockHint));
209 }
210
211 succ_const_iterator PIt = succ_begin(AtBB), EIt = succ_end(AtBB);
212 if (PIt == EIt) // No succs.
213 return;
214
215 // If there are hot edges, then compute SuccSkipNodes.
216 DenseSet SuccSkipNodes;
217
218 // Since we are checking for successor's backedges, this Block
219 // occurs in first position.
220 for (auto &I : BackEdgesInfo)
221 if (I.first == AtBB)
222 SuccSkipNodes.insert(I.second);
223
224 for (; PIt != EIt; ++PIt)
225 if (BPI->isEdgeHot(AtBB, *PIt) && !SuccSkipNodes.count(*PIt))
226 traverseToExitBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI,
227 VisitedBlocks);
228 }
229
230 // Get Block frequencies for blocks and take most frquently executed block,
231 // walk towards the entry block from those blocks and discover the basic blocks
232 // with call.
233 SequenceBBQuery::BlockListTy
234 SequenceBBQuery::queryCFG(Function &F, const BlockListTy &CallerBlocks) {
235
236 BlockFreqInfoTy BBFreqs;
237 VisitedBlocksInfoTy VisitedBlocks;
238 BackEdgesInfoTy BackEdgesInfo;
239
240 PassBuilder PB;
241 FunctionAnalysisManager FAM;
242 PB.registerFunctionAnalyses(FAM);
243
244 auto &BFI = FAM.getResult(F);
245
246 llvm::FindFunctionBackedges(F, BackEdgesInfo);
247
248 for (const auto I : CallerBlocks)
249 BBFreqs.push_back({I, BFI.getBlockFreq(I).getFrequency()});
250
251 llvm::sort(BBFreqs, [](decltype(BBFreqs)::const_reference Bbf,
252 decltype(BBFreqs)::const_reference Bbs) {
253 return Bbf.second > Bbs.second;
254 });
255
256 ArrayRef> HotBlocksRef(BBFreqs);
257 HotBlocksRef =
258 HotBlocksRef.drop_back(BBFreqs.size() - getHottestBlocks(BBFreqs.size()));
259
260 BranchProbabilityInfo *BPI =
261 FAM.getCachedResult(F);
262
263 // visit NHotBlocks,
264 // traverse upwards to entry
265 // traverse downwards to end.
266
267 for (auto I : HotBlocksRef) {
268 traverseToEntryBlock(I.first, CallerBlocks, BackEdgesInfo, BPI,
269 VisitedBlocks);
270 traverseToExitBlock(I.first, CallerBlocks, BackEdgesInfo, BPI,
271 VisitedBlocks);
272 }
273
274 BlockListTy MinCallerBlocks;
275 for (auto &I : VisitedBlocks)
276 if (I.second.CallerBlock)
277 MinCallerBlocks.push_back(std::move(I.first));
278
279 return rearrangeBB(F, MinCallerBlocks);
280 }
281
282 SpeculateQuery::ResultTy SequenceBBQuery::operator()(Function &F) {
283 // reduce the number of lists!
284 DenseMap> CallerAndCalles;
285 DenseSet Calles;
286 BlockListTy SequencedBlocks;
287 BlockListTy CallerBlocks;
288
289 CallerBlocks = findBBwithCalls(F);
290 if (CallerBlocks.empty())
291 return None;
292
293 if (isStraightLine(F))
294 SequencedBlocks = rearrangeBB(F, CallerBlocks);
295 else
296 SequencedBlocks = queryCFG(F, CallerBlocks);
297
298 for (auto BB : SequencedBlocks)
299 findCalles(BB, Calles);
300
301 CallerAndCalles.insert({F.getName(), std::move(Calles)});
302 return CallerAndCalles;
303 }
304
109305 } // namespace orc
110306 } // namespace llvm
66 //===----------------------------------------------------------------------===//
77
88 #include "llvm/ExecutionEngine/Orc/Speculation.h"
9
109 #include "llvm/IR/BasicBlock.h"
1110 #include "llvm/IR/Function.h"
1211 #include "llvm/IR/IRBuilder.h"
1615 #include "llvm/IR/Module.h"
1716 #include "llvm/IR/Type.h"
1817 #include "llvm/IR/Verifier.h"
18 #include "llvm/Support/Debug.h"
1919
2020 #include
2121
3535 }
3636 }
3737
38 // Trigger Speculative Compiles.
39 void Speculator::speculateForEntryPoint(Speculator *Ptr, uint64_t StubId) {
40 assert(Ptr && " Null Address Received in orc_speculate_for ");
41 Ptr->speculateFor(StubId);
42 }
43
44 Error Speculator::addSpeculationRuntime(JITDylib &JD,
45 MangleAndInterner &Mangle) {
46 JITEvaluatedSymbol ThisPtr(pointerToJITTargetAddress(this),
47 JITSymbolFlags::Exported);
48 JITEvaluatedSymbol SpeculateForEntryPtr(
49 pointerToJITTargetAddress(&speculateForEntryPoint),
50 JITSymbolFlags::Exported);
51 return JD.define(absoluteSymbols({
52 {Mangle("__orc_speculator"), ThisPtr}, // Data Symbol
53 {Mangle("__orc_speculate_for"), SpeculateForEntryPtr} // Callable Symbol
54 }));
55 }
56
3857 // If two modules, share the same LLVMContext, different threads must
39 // not access those modules concurrently, doing so leave the
40 // LLVMContext in in-consistent state.
41 // But here since each TSM has a unique Context associated with it,
42 // on locking is necessary!
58 // not access them concurrently without locking the associated LLVMContext
59 // this implementation follows this contract.
4360 void IRSpeculationLayer::emit(MaterializationResponsibility R,
4461 ThreadSafeModule TSM) {
4562
4764 assert(TSM.getContext().getContext() != nullptr &&
4865 "Module with null LLVMContext?");
4966
50 // Instrumentation of runtime calls
51 auto &InContext = *TSM.getContext().getContext();
52 auto SpeculatorVTy = StructType::create(InContext, "Class.Speculator");
53 auto RuntimeCallTy = FunctionType::get(
54 Type::getVoidTy(InContext),
55 {SpeculatorVTy->getPointerTo(), Type::getInt64Ty(InContext)}, false);
56 auto RuntimeCall =
57 Function::Create(RuntimeCallTy, Function::LinkageTypes::ExternalLinkage,
58 "__orc_speculate_for", TSM.getModuleUnlocked());
59 auto SpeclAddr = new GlobalVariable(
60 *TSM.getModuleUnlocked(), SpeculatorVTy, false,
61 GlobalValue::LinkageTypes::ExternalLinkage, nullptr, "__orc_speculator");
67 // Instrumentation of runtime calls, lock the Module
68 TSM.withModuleDo([this, &R](Module &M) {
69 auto &MContext = M.getContext();
70 auto SpeculatorVTy = StructType::create(MContext, "Class.Speculator");
71 auto RuntimeCallTy = FunctionType::get(
72 Type::getVoidTy(MContext),
73 {SpeculatorVTy->getPointerTo(), Type::getInt64Ty(MContext)}, false);
74 auto RuntimeCall =
75 Function::Create(RuntimeCallTy, Function::LinkageTypes::ExternalLinkage,
76 "__orc_speculate_for", &M);
77 auto SpeclAddr = new GlobalVariable(
78 M, SpeculatorVTy, false, GlobalValue::LinkageTypes::ExternalLinkage,
79 nullptr, "__orc_speculator");
6280
63 IRBuilder<> Mutator(InContext);
81 IRBuilder<> Mutator(MContext);
6482
65 // QueryAnalysis allowed to transform the IR source, one such example is
66 // Simplify CFG helps the static branch prediction heuristics!
67 for (auto &Fn : TSM.getModuleUnlocked()->getFunctionList()) {
68 if (!Fn.isDeclaration()) {
69 auto IRNames = QueryAnalysis(Fn, FAM);
70 // Instrument and register if Query has result
71 if (IRNames.hasValue()) {
72 Mutator.SetInsertPoint(&(Fn.getEntryBlock().front()));
73 auto ImplAddrToUint =
74 Mutator.CreatePtrToInt(&Fn, Type::getInt64Ty(InContext));
75 Mutator.CreateCall(RuntimeCallTy, RuntimeCall,
76 {SpeclAddr, ImplAddrToUint});
77 S.registerSymbols(internToJITSymbols(IRNames.getValue()),
78 &R.getTargetJITDylib());
83 // QueryAnalysis allowed to transform the IR source, one such example is
84 // Simplify CFG helps the static branch prediction heuristics!
85 for (auto &Fn : M.getFunctionList()) {
86 if (!Fn.isDeclaration()) {
87
88 auto IRNames = QueryAnalysis(Fn);
89 // Instrument and register if Query has result
90 if (IRNames.hasValue()) {
91
92 // Emit globals for each function.
93 auto LoadValueTy = Type::getInt8Ty(MContext);
94 auto SpeculatorGuard = new GlobalVariable(
95 M, LoadValueTy, false, GlobalValue::LinkageTypes::InternalLinkage,
96 ConstantInt::get(LoadValueTy, 0),
97 "__orc_speculate.guard.for." + Fn.getName());
98 SpeculatorGuard->setAlignment(1);
99 SpeculatorGuard->setUnnamedAddr(GlobalValue::UnnamedAddr::Local);
100
101 BasicBlock &ProgramEntry = Fn.getEntryBlock();
102 // Create BasicBlocks before the program's entry basicblock
103 BasicBlock *SpeculateBlock = BasicBlock::Create(
104 MContext, "__orc_speculate.block", &Fn, &ProgramEntry);
105 BasicBlock *SpeculateDecisionBlock = BasicBlock::Create(
106 MContext, "__orc_speculate.decision.block", &Fn, SpeculateBlock);
107
108 assert(SpeculateDecisionBlock == &Fn.getEntryBlock() &&
109 "SpeculateDecisionBlock not updated?");
110 Mutator.SetInsertPoint(SpeculateDecisionBlock);
111
112 auto LoadGuard =
113 Mutator.CreateLoad(LoadValueTy, SpeculatorGuard, "guard.value");
114 // if just loaded value equal to 0,return true.
115 auto CanSpeculate =
116 Mutator.CreateICmpEQ(LoadGuard, ConstantInt::get(LoadValueTy, 0),
117 "compare.to.speculate");
118 Mutator.CreateCondBr(CanSpeculate, SpeculateBlock, &ProgramEntry);
119
120 Mutator.SetInsertPoint(SpeculateBlock);
121 auto ImplAddrToUint =
122 Mutator.CreatePtrToInt(&Fn, Type::getInt64Ty(MContext));
123 Mutator.CreateCall(RuntimeCallTy, RuntimeCall,
124 {SpeclAddr, ImplAddrToUint});
125 Mutator.CreateStore(ConstantInt::get(LoadValueTy, 1),
126 SpeculatorGuard);
127 Mutator.CreateBr(&ProgramEntry);
128
129 assert(Mutator.GetInsertBlock()->getParent() == &Fn &&
130 "IR builder association mismatch?");
131 S.registerSymbols(internToJITSymbols(IRNames.getValue()),
132 &R.getTargetJITDylib());
133 }
79134 }
80135 }
81 }
82 // No locking needed read only operation.
83 assert(!(verifyModule(*TSM.getModuleUnlocked())) &&
136 });
137
138 assert(!TSM.withModuleDo([](const Module &M) { return verifyModule(M); }) &&
84139 "Speculation Instrumentation breaks IR?");
85140
86141 NextLayer.emit(std::move(R), std::move(TSM));
87142 }
88143
89 // Runtime Function Implementation
90 extern "C" void __orc_speculate_for(Speculator *Ptr, uint64_t StubId) {
91 assert(Ptr && " Null Address Received in orc_speculate_for ");
92 Ptr->speculateFor(StubId);
93 }
94
95144 } // namespace orc
96145 } // namespace llvm