llvm.org GIT mirror llvm / 981ceb8
[DA] DivergenceAnalysis for unstructured, reducible CFGs Summary: This is patch 2 of the new DivergenceAnalysis (https://reviews.llvm.org/D50433). This patch contains a generic divergence analysis implementation for unstructured, reducible Control-Flow Graphs. It contains two new classes. The `SyncDependenceAnalysis` class lazily computes sync dependences, which relate divergent branches to points of joining divergent control. The `DivergenceAnalysis` class contains the generic divergence analysis implementation. Reviewers: nhaehnle Reviewed By: nhaehnle Subscribers: sameerds, kristina, nhaehnle, xbolva00, tschuett, mgorny, llvm-commits Differential Revision: https://reviews.llvm.org/D51491 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@344734 91177308-0d34-0410-b5e6-96231b3b80d8 Nicolai Haehnle 11 months ago
8 changed file(s) with 1508 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
295295
296296 public:
297297 using rpo_iterator = typename std::vector::reverse_iterator;
298 using const_rpo_iterator = typename std::vector::const_reverse_iterator;
298299
299300 ReversePostOrderTraversal(GraphT G) { Initialize(GT::getEntryNode(G)); }
300301
301302 // Because we want a reverse post order, use reverse iterators from the vector
302303 rpo_iterator begin() { return Blocks.rbegin(); }
304 const_rpo_iterator begin() const { return Blocks.crbegin(); }
303305 rpo_iterator end() { return Blocks.rend(); }
306 const_rpo_iterator end() const { return Blocks.crend(); }
304307 };
305308
306309 } // end namespace llvm
0 //===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // The divergence analysis determines which instructions and branches are
11 // divergent given a set of divergent source instructions.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
16 #define LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
17
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/Analysis/SyncDependenceAnalysis.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/Pass.h"
22 #include
23
24 namespace llvm {
25 class Module;
26 class Value;
27 class Instruction;
28 class Loop;
29 class raw_ostream;
30 class TargetTransformInfo;
31
32 /// \brief Generic divergence analysis for reducible CFGs.
33 ///
34 /// This analysis propagates divergence in a data-parallel context from sources
35 /// of divergence to all users. It requires reducible CFGs. All assignments
36 /// should be in SSA form.
37 class DivergenceAnalysis {
38 public:
39 /// \brief This instance will analyze the whole function \p F or the loop \p
40 /// RegionLoop.
41 ///
42 /// \param RegionLoop if non-null the analysis is restricted to \p RegionLoop.
43 /// Otherwise the whole function is analyzed.
44 /// \param IsLCSSAForm whether the analysis may assume that the IR in the
45 /// region in in LCSSA form.
46 DivergenceAnalysis(const Function &F, const Loop *RegionLoop,
47 const DominatorTree &DT, const LoopInfo &LI,
48 SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
49
50 /// \brief The loop that defines the analyzed region (if any).
51 const Loop *getRegionLoop() const { return RegionLoop; }
52 const Function &getFunction() const { return F; }
53
54 /// \brief Whether \p BB is part of the region.
55 bool inRegion(const BasicBlock &BB) const;
56 /// \brief Whether \p I is part of the region.
57 bool inRegion(const Instruction &I) const;
58
59 /// \brief Mark \p UniVal as a value that is always uniform.
60 void addUniformOverride(const Value &UniVal);
61
62 /// \brief Mark \p DivVal as a value that is always divergent.
63 void markDivergent(const Value &DivVal);
64
65 /// \brief Propagate divergence to all instructions in the region.
66 /// Divergence is seeded by calls to \p markDivergent.
67 void compute();
68
69 /// \brief Whether any value was marked or analyzed to be divergent.
70 bool hasDetectedDivergence() const { return !DivergentValues.empty(); }
71
72 /// \brief Whether \p Val will always return a uniform value regardless of its
73 /// operands
74 bool isAlwaysUniform(const Value &Val) const;
75
76 /// \brief Whether \p Val is a divergent value
77 bool isDivergent(const Value &Val) const;
78
79 void print(raw_ostream &OS, const Module *) const;
80
81 private:
82 bool updateTerminator(const TerminatorInst &Term) const;
83 bool updatePHINode(const PHINode &Phi) const;
84
85 /// \brief Computes whether \p Inst is divergent based on the
86 /// divergence of its operands.
87 ///
88 /// \returns Whether \p Inst is divergent.
89 ///
90 /// This should only be called for non-phi, non-terminator instructions.
91 bool updateNormalInstruction(const Instruction &Inst) const;
92
93 /// \brief Mark users of live-out users as divergent.
94 ///
95 /// \param LoopHeader the header of the divergent loop.
96 ///
97 /// Marks all users of live-out values of the loop headed by \p LoopHeader
98 /// as divergent and puts them on the worklist.
99 void taintLoopLiveOuts(const BasicBlock &LoopHeader);
100
101 /// \brief Push all users of \p Val (in the region) to the worklist
102 void pushUsers(const Value &I);
103
104 /// \brief Push all phi nodes in @block to the worklist
105 void pushPHINodes(const BasicBlock &Block);
106
107 /// \brief Mark \p Block as join divergent
108 ///
109 /// A block is join divergent if two threads may reach it from different
110 /// incoming blocks at the same time.
111 void markBlockJoinDivergent(const BasicBlock &Block) {
112 DivergentJoinBlocks.insert(&Block);
113 }
114
115 /// \brief Whether \p Val is divergent when read in \p ObservingBlock.
116 bool isTemporalDivergent(const BasicBlock &ObservingBlock,
117 const Value &Val) const;
118
119 /// \brief Whether \p Block is join divergent
120 ///
121 /// (see markBlockJoinDivergent).
122 bool isJoinDivergent(const BasicBlock &Block) const {
123 return DivergentJoinBlocks.find(&Block) != DivergentJoinBlocks.end();
124 }
125
126 /// \brief Propagate control-induced divergence to users (phi nodes and
127 /// instructions).
128 //
129 // \param JoinBlock is a divergent loop exit or join point of two disjoint
130 // paths.
131 // \returns Whether \p JoinBlock is a divergent loop exit of \p TermLoop.
132 bool propagateJoinDivergence(const BasicBlock &JoinBlock,
133 const Loop *TermLoop);
134
135 /// \brief Propagate induced value divergence due to control divergence in \p
136 /// Term.
137 void propagateBranchDivergence(const TerminatorInst &Term);
138
139 /// \brief Propagate divergent caused by a divergent loop exit.
140 ///
141 /// \param ExitingLoop is a divergent loop.
142 void propagateLoopDivergence(const Loop &ExitingLoop);
143
144 private:
145 const Function &F;
146 // If regionLoop != nullptr, analysis is only performed within \p RegionLoop.
147 // Otw, analyze the whole function
148 const Loop *RegionLoop;
149
150 const DominatorTree &DT;
151 const LoopInfo &LI;
152
153 // Recognized divergent loops
154 DenseSet DivergentLoops;
155
156 // The SDA links divergent branches to divergent control-flow joins.
157 SyncDependenceAnalysis &SDA;
158
159 // Use simplified code path for LCSSA form.
160 bool IsLCSSAForm;
161
162 // Set of known-uniform values.
163 DenseSet UniformOverrides;
164
165 // Blocks with joining divergent control from different predecessors.
166 DenseSet DivergentJoinBlocks;
167
168 // Detected/marked divergent values.
169 DenseSet DivergentValues;
170
171 // Internal worklist for divergence propagation.
172 std::vector Worklist;
173 };
174
175 } // namespace llvm
176
177 #endif // LLVM_ANALYSIS_DIVERGENCE_ANALYSIS_H
0 //===- SyncDependenceAnalysis.h - Divergent Branch Dependence -*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // This file defines the SyncDependenceAnalysis class, which computes for
11 // every divergent branch the set of phi nodes that the branch will make
12 // divergent.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #ifndef LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
17 #define LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
18
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/PostOrderIterator.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include
24
25 namespace llvm {
26
27 class BasicBlock;
28 class DominatorTree;
29 class Loop;
30 class PostDominatorTree;
31 class TerminatorInst;
32 class TerminatorInst;
33
34 using ConstBlockSet = SmallPtrSet;
35
36 /// \brief Relates points of divergent control to join points in
37 /// reducible CFGs.
38 ///
39 /// This analysis relates points of divergent control to points of converging
40 /// divergent control. The analysis requires all loops to be reducible.
41 class SyncDependenceAnalysis {
42 void visitSuccessor(const BasicBlock &succBlock, const Loop *termLoop,
43 const BasicBlock *defBlock);
44
45 public:
46 bool inRegion(const BasicBlock &BB) const;
47
48 ~SyncDependenceAnalysis();
49 SyncDependenceAnalysis(const DominatorTree &DT, const PostDominatorTree &PDT,
50 const LoopInfo &LI);
51
52 /// \brief Computes divergent join points and loop exits caused by branch
53 /// divergence in \p Term.
54 ///
55 /// The set of blocks which are reachable by disjoint paths from \p Term.
56 /// The set also contains loop exits if there two disjoint paths:
57 /// one from \p Term to the loop exit and another from \p Term to the loop
58 /// header. Those exit blocks are added to the returned set.
59 /// If L is the parent loop of \p Term and an exit of L is in the returned
60 /// set then L is a divergent loop.
61 const ConstBlockSet &join_blocks(const TerminatorInst &Term);
62
63 /// \brief Computes divergent join points and loop exits (in the surrounding
64 /// loop) caused by the divergent loop exits of\p Loop.
65 ///
66 /// The set of blocks which are reachable by disjoint paths from the
67 /// loop exits of \p Loop.
68 /// This treats the loop as a single node in \p Loop's parent loop.
69 /// The returned set has the same properties as for join_blocks(TermInst&).
70 const ConstBlockSet &join_blocks(const Loop &Loop);
71
72 private:
73 static ConstBlockSet EmptyBlockSet;
74
75 ReversePostOrderTraversal FuncRPOT;
76 const DominatorTree &DT;
77 const PostDominatorTree &PDT;
78 const LoopInfo &LI;
79
80 std::map> CachedLoopExitJoins;
81 std::map>
82 CachedBranchJoins;
83 };
84
85 } // namespace llvm
86
87 #endif // LLVM_ANALYSIS_SYNC_DEPENDENCE_ANALYSIS_H
2424 Delinearization.cpp
2525 DemandedBits.cpp
2626 DependenceAnalysis.cpp
27 DivergenceAnalysis.cpp
2728 DomPrinter.cpp
2829 DominanceFrontier.cpp
2930 EHPersonalities.cpp
7980 ScalarEvolutionAliasAnalysis.cpp
8081 ScalarEvolutionExpander.cpp
8182 ScalarEvolutionNormalization.cpp
83 SyncDependenceAnalysis.cpp
8284 SyntheticCountsUtils.cpp
8385 TargetLibraryInfo.cpp
8486 TargetTransformInfo.cpp
0 //===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a general divergence analysis for loop vectorization
10 // and GPU programs. It determines which branches and values in a loop or GPU
11 // program are divergent. It can help branch optimizations such as jump
12 // threading and loop unswitching to make better decisions.
13 //
14 // GPU programs typically use the SIMD execution model, where multiple threads
15 // in the same execution group have to execute in lock-step. Therefore, if the
16 // code contains divergent branches (i.e., threads in a group do not agree on
17 // which path of the branch to take), the group of threads has to execute all
18 // the paths from that branch with different subsets of threads enabled until
19 // they re-converge.
20 //
21 // Due to this execution model, some optimizations such as jump
22 // threading and loop unswitching can interfere with thread re-convergence.
23 // Therefore, an analysis that computes which branches in a GPU program are
24 // divergent can help the compiler to selectively run these optimizations.
25 //
26 // This implementation is derived from the Vectorization Analysis of the
27 // Region Vectorizer (RV). That implementation in turn is based on the approach
28 // described in
29 //
30 // Improving Performance of OpenCL on CPUs
31 // Ralf Karrenberg and Sebastian Hack
32 // CC '12
33 //
34 // This DivergenceAnalysis implementation is generic in the sense that it does
35 // not itself identify original sources of divergence.
36 // Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
37 // (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence
38 // (e.g., special variables that hold the thread ID or the iteration variable).
39 //
40 // The generic implementation propagates divergence to variables that are data
41 // or sync dependent on a source of divergence.
42 //
43 // While data dependency is a well-known concept, the notion of sync dependency
44 // is worth more explanation. Sync dependence characterizes the control flow
45 // aspect of the propagation of branch divergence. For example,
46 //
47 // %cond = icmp slt i32 %tid, 10
48 // br i1 %cond, label %then, label %else
49 // then:
50 // br label %merge
51 // else:
52 // br label %merge
53 // merge:
54 // %a = phi i32 [ 0, %then ], [ 1, %else ]
55 //
56 // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
57 // because %tid is not on its use-def chains, %a is sync dependent on %tid
58 // because the branch "br i1 %cond" depends on %tid and affects which value %a
59 // is assigned to.
60 //
61 // The sync dependence detection (which branch induces divergence in which join
62 // points) is implemented in the SyncDependenceAnalysis.
63 //
64 // The current DivergenceAnalysis implementation has the following limitations:
65 // 1. intra-procedural. It conservatively considers the arguments of a
66 // non-kernel-entry function and the return value of a function call as
67 // divergent.
68 // 2. memory as black box. It conservatively considers values loaded from
69 // generic or local address as divergent. This can be improved by leveraging
70 // pointer analysis and/or by modelling non-escaping memory objects in SSA
71 // as done in RV.
72 //
73 //===----------------------------------------------------------------------===//
74
75 #include "llvm/Analysis/DivergenceAnalysis.h"
76 #include "llvm/Analysis/LoopInfo.h"
77 #include "llvm/Analysis/Passes.h"
78 #include "llvm/Analysis/PostDominators.h"
79 #include "llvm/Analysis/TargetTransformInfo.h"
80 #include "llvm/IR/Dominators.h"
81 #include "llvm/IR/InstIterator.h"
82 #include "llvm/IR/Instructions.h"
83 #include "llvm/IR/IntrinsicInst.h"
84 #include "llvm/IR/Value.h"
85 #include "llvm/Support/Debug.h"
86 #include "llvm/Support/raw_ostream.h"
87 #include
88
89 using namespace llvm;
90
91 #define DEBUG_TYPE "divergence-analysis"
92
93 // class DivergenceAnalysis
94 DivergenceAnalysis::DivergenceAnalysis(
95 const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
96 const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
97 : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
98 IsLCSSAForm(IsLCSSAForm) {}
99
100 void DivergenceAnalysis::markDivergent(const Value &DivVal) {
101 assert(isa(DivVal) || isa(DivVal));
102 assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
103 DivergentValues.insert(&DivVal);
104 }
105
106 void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
107 UniformOverrides.insert(&UniVal);
108 }
109
110 bool DivergenceAnalysis::updateTerminator(const TerminatorInst &Term) const {
111 if (Term.getNumSuccessors() <= 1)
112 return false;
113 if (auto *BranchTerm = dyn_cast(&Term)) {
114 assert(BranchTerm->isConditional());
115 return isDivergent(*BranchTerm->getCondition());
116 }
117 if (auto *SwitchTerm = dyn_cast(&Term)) {
118 return isDivergent(*SwitchTerm->getCondition());
119 }
120 if (isa(Term)) {
121 return false; // ignore abnormal executions through landingpad
122 }
123
124 llvm_unreachable("unexpected terminator");
125 }
126
127 bool DivergenceAnalysis::updateNormalInstruction(const Instruction &I) const {
128 // TODO function calls with side effects, etc
129 for (const auto &Op : I.operands()) {
130 if (isDivergent(*Op))
131 return true;
132 }
133 return false;
134 }
135
136 bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
137 const Value &Val) const {
138 const auto *Inst = dyn_cast(&Val);
139 if (!Inst)
140 return false;
141 // check whether any divergent loop carrying Val terminates before control
142 // proceeds to ObservingBlock
143 for (const auto *Loop = LI.getLoopFor(Inst->getParent());
144 Loop != RegionLoop && !Loop->contains(&ObservingBlock);
145 Loop = Loop->getParentLoop()) {
146 if (DivergentLoops.find(Loop) != DivergentLoops.end())
147 return true;
148 }
149
150 return false;
151 }
152
153 bool DivergenceAnalysis::updatePHINode(const PHINode &Phi) const {
154 // joining divergent disjoint path in Phi parent block
155 if (!Phi.hasConstantOrUndefValue() && isJoinDivergent(*Phi.getParent())) {
156 return true;
157 }
158
159 // An incoming value could be divergent by itself.
160 // Otherwise, an incoming value could be uniform within the loop
161 // that carries its definition but it may appear divergent
162 // from outside the loop. This happens when divergent loop exits
163 // drop definitions of that uniform value in different iterations.
164 //
165 // for (int i = 0; i < n; ++i) { // 'i' is uniform inside the loop
166 // if (i % thread_id == 0) break; // divergent loop exit
167 // }
168 // int divI = i; // divI is divergent
169 for (size_t i = 0; i < Phi.getNumIncomingValues(); ++i) {
170 const auto *InVal = Phi.getIncomingValue(i);
171 if (isDivergent(*Phi.getIncomingValue(i)) ||
172 isTemporalDivergent(*Phi.getParent(), *InVal)) {
173 return true;
174 }
175 }
176 return false;
177 }
178
179 bool DivergenceAnalysis::inRegion(const Instruction &I) const {
180 return I.getParent() && inRegion(*I.getParent());
181 }
182
183 bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
184 return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB);
185 }
186
187 // marks all users of loop-carried values of the loop headed by LoopHeader as
188 // divergent
189 void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) {
190 auto *DivLoop = LI.getLoopFor(&LoopHeader);
191 assert(DivLoop && "loopHeader is not actually part of a loop");
192
193 SmallVector TaintStack;
194 DivLoop->getExitBlocks(TaintStack);
195
196 // Otherwise potential users of loop-carried values could be anywhere in the
197 // dominance region of DivLoop (including its fringes for phi nodes)
198 DenseSet Visited;
199 for (auto *Block : TaintStack) {
200 Visited.insert(Block);
201 }
202 Visited.insert(&LoopHeader);
203
204 while (!TaintStack.empty()) {
205 auto *UserBlock = TaintStack.back();
206 TaintStack.pop_back();
207
208 // don't spread divergence beyond the region
209 if (!inRegion(*UserBlock))
210 continue;
211
212 assert(!DivLoop->contains(UserBlock) &&
213 "irreducible control flow detected");
214
215 // phi nodes at the fringes of the dominance region
216 if (!DT.dominates(&LoopHeader, UserBlock)) {
217 // all PHI nodes of UserBlock become divergent
218 for (auto &Phi : UserBlock->phis()) {
219 Worklist.push_back(&Phi);
220 }
221 continue;
222 }
223
224 // taint outside users of values carried by DivLoop
225 for (auto &I : *UserBlock) {
226 if (isAlwaysUniform(I))
227 continue;
228 if (isDivergent(I))
229 continue;
230
231 for (auto &Op : I.operands()) {
232 auto *OpInst = dyn_cast(&Op);
233 if (!OpInst)
234 continue;
235 if (DivLoop->contains(OpInst->getParent())) {
236 markDivergent(I);
237 pushUsers(I);
238 break;
239 }
240 }
241 }
242
243 // visit all blocks in the dominance region
244 for (auto *SuccBlock : successors(UserBlock)) {
245 if (!Visited.insert(SuccBlock).second) {
246 continue;
247 }
248 TaintStack.push_back(SuccBlock);
249 }
250 }
251 }
252
253 void DivergenceAnalysis::pushPHINodes(const BasicBlock &Block) {
254 for (const auto &Phi : Block.phis()) {
255 if (isDivergent(Phi))
256 continue;
257 Worklist.push_back(&Phi);
258 }
259 }
260
261 void DivergenceAnalysis::pushUsers(const Value &V) {
262 for (const auto *User : V.users()) {
263 const auto *UserInst = dyn_cast(User);
264 if (!UserInst)
265 continue;
266
267 if (isDivergent(*UserInst))
268 continue;
269
270 // only compute divergent inside loop
271 if (!inRegion(*UserInst))
272 continue;
273 Worklist.push_back(UserInst);
274 }
275 }
276
277 bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock,
278 const Loop *BranchLoop) {
279 LLVM_DEBUG(dbgs() << "\tpropJoinDiv " << JoinBlock.getName() << "\n");
280
281 // ignore divergence outside the region
282 if (!inRegion(JoinBlock)) {
283 return false;
284 }
285
286 // push non-divergent phi nodes in JoinBlock to the worklist
287 pushPHINodes(JoinBlock);
288
289 // JoinBlock is a divergent loop exit
290 if (BranchLoop && !BranchLoop->contains(&JoinBlock)) {
291 return true;
292 }
293
294 // disjoint-paths divergent at JoinBlock
295 markBlockJoinDivergent(JoinBlock);
296 return false;
297 }
298
299 void DivergenceAnalysis::propagateBranchDivergence(const TerminatorInst &Term) {
300 LLVM_DEBUG(dbgs() << "propBranchDiv " << Term.getParent()->getName() << "\n");
301
302 markDivergent(Term);
303
304 const auto *BranchLoop = LI.getLoopFor(Term.getParent());
305
306 // whether there is a divergent loop exit from BranchLoop (if any)
307 bool IsBranchLoopDivergent = false;
308
309 // iterate over all blocks reachable by disjoint from Term within the loop
310 // also iterates over loop exits that become divergent due to Term.
311 for (const auto *JoinBlock : SDA.join_blocks(Term)) {
312 IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
313 }
314
315 // Branch loop is a divergent loop due to the divergent branch in Term
316 if (IsBranchLoopDivergent) {
317 assert(BranchLoop);
318 if (!DivergentLoops.insert(BranchLoop).second) {
319 return;
320 }
321 propagateLoopDivergence(*BranchLoop);
322 }
323 }
324
325 void DivergenceAnalysis::propagateLoopDivergence(const Loop &ExitingLoop) {
326 LLVM_DEBUG(dbgs() << "propLoopDiv " << ExitingLoop.getName() << "\n");
327
328 // don't propagate beyond region
329 if (!inRegion(*ExitingLoop.getHeader()))
330 return;
331
332 const auto *BranchLoop = ExitingLoop.getParentLoop();
333
334 // Uses of loop-carried values could occur anywhere
335 // within the dominance region of the definition. All loop-carried
336 // definitions are dominated by the loop header (reducible control).
337 // Thus all users have to be in the dominance region of the loop header,
338 // except PHI nodes that can also live at the fringes of the dom region
339 // (incoming defining value).
340 if (!IsLCSSAForm)
341 taintLoopLiveOuts(*ExitingLoop.getHeader());
342
343 // whether there is a divergent loop exit from BranchLoop (if any)
344 bool IsBranchLoopDivergent = false;
345
346 // iterate over all blocks reachable by disjoint paths from exits of
347 // ExitingLoop also iterates over loop exits (of BranchLoop) that in turn
348 // become divergent.
349 for (const auto *JoinBlock : SDA.join_blocks(ExitingLoop)) {
350 IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
351 }
352
353 // Branch loop is a divergent due to divergent loop exit in ExitingLoop
354 if (IsBranchLoopDivergent) {
355 assert(BranchLoop);
356 if (!DivergentLoops.insert(BranchLoop).second) {
357 return;
358 }
359 propagateLoopDivergence(*BranchLoop);
360 }
361 }
362
363 void DivergenceAnalysis::compute() {
364 for (auto *DivVal : DivergentValues) {
365 pushUsers(*DivVal);
366 }
367
368 // propagate divergence
369 while (!Worklist.empty()) {
370 const Instruction &I = *Worklist.back();
371 Worklist.pop_back();
372
373 // maintain uniformity of overrides
374 if (isAlwaysUniform(I))
375 continue;
376
377 bool WasDivergent = isDivergent(I);
378 if (WasDivergent)
379 continue;
380
381 // propagate divergence caused by terminator
382 if (isa(I)) {
383 auto &Term = cast(I);
384 if (updateTerminator(Term)) {
385 // propagate control divergence to affected instructions
386 propagateBranchDivergence(Term);
387 continue;
388 }
389 }
390
391 // update divergence of I due to divergent operands
392 bool DivergentUpd = false;
393 const auto *Phi = dyn_cast(&I);
394 if (Phi) {
395 DivergentUpd = updatePHINode(*Phi);
396 } else {
397 DivergentUpd = updateNormalInstruction(I);
398 }
399
400 // propagate value divergence to users
401 if (DivergentUpd) {
402 markDivergent(I);
403 pushUsers(I);
404 }
405 }
406 }
407
408 bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
409 return UniformOverrides.find(&V) != UniformOverrides.end();
410 }
411
412 bool DivergenceAnalysis::isDivergent(const Value &V) const {
413 return DivergentValues.find(&V) != DivergentValues.end();
414 }
415
416 void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
417 if (DivergentValues.empty())
418 return;
419 // iterate instructions using instructions() to ensure a deterministic order.
420 for (auto &I : instructions(F)) {
421 if (isDivergent(I))
422 OS << "DIVERGENT:" << I << '\n';
423 }
424 }
0 //===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
1 //--===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements an algorithm that returns for a divergent branch
11 // the set of basic blocks whose phi nodes become divergent due to divergent
12 // control. These are the blocks that are reachable by two disjoint paths from
13 // the branch or loop exits that have a reaching path that is disjoint from a
14 // path to the loop latch.
15 //
16 // The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
17 // control-induced divergence in phi nodes.
18 //
19 // -- Summary --
20 // The SyncDependenceAnalysis lazily computes sync dependences [3].
21 // The analysis evaluates the disjoint path criterion [2] by a reduction
22 // to SSA construction. The SSA construction algorithm is implemented as
23 // a simple data-flow analysis [1].
24 //
25 // [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy
26 // [2] "Efficiently Computing Static Single Assignment Form
27 // and the Control Dependence Graph", TOPLAS '91,
28 // Cytron, Ferrante, Rosen, Wegman and Zadeck
29 // [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack
30 // [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira
31 //
32 // -- Sync dependence --
33 // Sync dependence [4] characterizes the control flow aspect of the
34 // propagation of branch divergence. For example,
35 //
36 // %cond = icmp slt i32 %tid, 10
37 // br i1 %cond, label %then, label %else
38 // then:
39 // br label %merge
40 // else:
41 // br label %merge
42 // merge:
43 // %a = phi i32 [ 0, %then ], [ 1, %else ]
44 //
45 // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
46 // because %tid is not on its use-def chains, %a is sync dependent on %tid
47 // because the branch "br i1 %cond" depends on %tid and affects which value %a
48 // is assigned to.
49 //
50 // -- Reduction to SSA construction --
51 // There are two disjoint paths from A to X, if a certain variant of SSA
52 // construction places a phi node in X under the following set-up scheme [2].
53 //
54 // This variant of SSA construction ignores incoming undef values.
55 // That is paths from the entry without a definition do not result in
56 // phi nodes.
57 //
58 // entry
59 // / \
60 // A \
61 // / \ Y
62 // B C /
63 // \ / \ /
64 // D E
65 // \ /
66 // F
67 // Assume that A contains a divergent branch. We are interested
68 // in the set of all blocks where each block is reachable from A
69 // via two disjoint paths. This would be the set {D, F} in this
70 // case.
71 // To generally reduce this query to SSA construction we introduce
72 // a virtual variable x and assign to x different values in each
73 // successor block of A.
74 // entry
75 // / \
76 // A \
77 // / \ Y
78 // x = 0 x = 1 /
79 // \ / \ /
80 // D E
81 // \ /
82 // F
83 // Our flavor of SSA construction for x will construct the following
84 // entry
85 // / \
86 // A \
87 // / \ Y
88 // x0 = 0 x1 = 1 /
89 // \ / \ /
90 // x2=phi E
91 // \ /
92 // x3=phi
93 // The blocks D and F contain phi nodes and are thus each reachable
94 // by two disjoins paths from A.
95 //
96 // -- Remarks --
97 // In case of loop exits we need to check the disjoint path criterion for loops
98 // [2]. To this end, we check whether the definition of x differs between the
99 // loop exit and the loop header (_after_ SSA construction).
100 //
101 //===----------------------------------------------------------------------===//
102 #include "llvm/ADT/PostOrderIterator.h"
103 #include "llvm/ADT/SmallPtrSet.h"
104 #include "llvm/Analysis/PostDominators.h"
105 #include "llvm/Analysis/SyncDependenceAnalysis.h"
106 #include "llvm/IR/BasicBlock.h"
107 #include "llvm/IR/CFG.h"
108 #include "llvm/IR/Dominators.h"
109 #include "llvm/IR/Function.h"
110
111 #include
112 #include
113
114 #define DEBUG_TYPE "sync-dependence"
115
116 namespace llvm {
117
118 ConstBlockSet SyncDependenceAnalysis::EmptyBlockSet;
119
120 SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
121 const PostDominatorTree &PDT,
122 const LoopInfo &LI)
123 : FuncRPOT(DT.getRoot()->getParent()), DT(DT), PDT(PDT), LI(LI) {}
124
125 SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
126
127 using FunctionRPOT = ReversePostOrderTraversal;
128
129 // divergence propagator for reducible CFGs
130 struct DivergencePropagator {
131 const FunctionRPOT &FuncRPOT;
132 const DominatorTree &DT;
133 const PostDominatorTree &PDT;
134 const LoopInfo &LI;
135
136 // identified join points
137 std::unique_ptr JoinBlocks;
138
139 // reached loop exits (by a path disjoint to a path to the loop header)
140 SmallPtrSet ReachedLoopExits;
141
142 // if DefMap[B] == C then C is the dominating definition at block B
143 // if DefMap[B] ~ undef then we haven't seen B yet
144 // if DefMap[B] == B then B is a join point of disjoint paths from X or B is
145 // an immediate successor of X (initial value).
146 using DefiningBlockMap = std::map;
147 DefiningBlockMap DefMap;
148
149 // all blocks with pending visits
150 std::unordered_set PendingUpdates;
151
152 DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT,
153 const PostDominatorTree &PDT, const LoopInfo &LI)
154 : FuncRPOT(FuncRPOT), DT(DT), PDT(PDT), LI(LI),
155 JoinBlocks(new ConstBlockSet) {}
156
157 // set the definition at @block and mark @block as pending for a visit
158 void addPending(const BasicBlock &Block, const BasicBlock &DefBlock) {
159 bool WasAdded = DefMap.emplace(&Block, &DefBlock).second;
160 if (WasAdded)
161 PendingUpdates.insert(&Block);
162 }
163
164 void printDefs(raw_ostream &Out) {
165 Out << "Propagator::DefMap {\n";
166 for (const auto *Block : FuncRPOT) {
167 auto It = DefMap.find(Block);
168 Out << Block->getName() << " : ";
169 if (It == DefMap.end()) {
170 Out << "\n";
171 } else {
172 const auto *DefBlock = It->second;
173 Out << (DefBlock ? DefBlock->getName() : "") << "\n";
174 }
175 }
176 Out << "}\n";
177 }
178
179 // process @succBlock with reaching definition @defBlock
180 // the original divergent branch was in @parentLoop (if any)
181 void visitSuccessor(const BasicBlock &SuccBlock, const Loop *ParentLoop,
182 const BasicBlock &DefBlock) {
183
184 // @succBlock is a loop exit
185 if (ParentLoop && !ParentLoop->contains(&SuccBlock)) {
186 DefMap.emplace(&SuccBlock, &DefBlock);
187 ReachedLoopExits.insert(&SuccBlock);
188 return;
189 }
190
191 // first reaching def?
192 auto ItLastDef = DefMap.find(&SuccBlock);
193 if (ItLastDef == DefMap.end()) {
194 addPending(SuccBlock, DefBlock);
195 return;
196 }
197
198 // a join of at least two definitions
199 if (ItLastDef->second != &DefBlock) {
200 // do we know this join already?
201 if (!JoinBlocks->insert(&SuccBlock).second)
202 return;
203
204 // update the definition
205 addPending(SuccBlock, SuccBlock);
206 }
207 }
208
209 // find all blocks reachable by two disjoint paths from @rootTerm.
210 // This method works for both divergent TerminatorInsts and loops with
211 // divergent exits.
212 // @rootBlock is either the block containing the branch or the header of the
213 // divergent loop.
214 // @nodeSuccessors is the set of successors of the node (Loop or Terminator)
215 // headed by @rootBlock.
216 // @parentLoop is the parent loop of the Loop or the loop that contains the
217 // Terminator.
218 template
219 std::unique_ptr
220 computeJoinPoints(const BasicBlock &RootBlock,
221 SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
222 assert(JoinBlocks);
223
224 // immediate post dominator (no join block beyond that block)
225 const auto *PdNode = PDT.getNode(const_cast(&RootBlock));
226 const auto *IpdNode = PdNode->getIDom();
227 const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
228
229 // bootstrap with branch targets
230 for (const auto *SuccBlock : NodeSuccessors) {
231 DefMap.emplace(SuccBlock, SuccBlock);
232
233 if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
234 // immediate loop exit from node.
235 ReachedLoopExits.insert(SuccBlock);
236 continue;
237 } else {
238 // regular successor
239 PendingUpdates.insert(SuccBlock);
240 }
241 }
242
243 auto ItBeginRPO = FuncRPOT.begin();
244
245 // skip until term (TODO RPOT won't let us start at @term directly)
246 for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) {}
247
248 auto ItEndRPO = FuncRPOT.end();
249 assert(ItBeginRPO != ItEndRPO);
250
251 // propagate definitions at the immediate successors of the node in RPO
252 auto ItBlockRPO = ItBeginRPO;
253 while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
254 const auto *Block = *ItBlockRPO;
255
256 // skip @block if not pending update
257 auto ItPending = PendingUpdates.find(Block);
258 if (ItPending == PendingUpdates.end())
259 continue;
260 PendingUpdates.erase(ItPending);
261
262 // propagate definition at @block to its successors
263 auto ItDef = DefMap.find(Block);
264 const auto *DefBlock = ItDef->second;
265 assert(DefBlock);
266
267 auto *BlockLoop = LI.getLoopFor(Block);
268 if (ParentLoop &&
269 (ParentLoop != BlockLoop && ParentLoop->contains(BlockLoop))) {
270 // if the successor is the header of a nested loop pretend its a
271 // single node with the loop's exits as successors
272 SmallVector BlockLoopExits;
273 BlockLoop->getExitBlocks(BlockLoopExits);
274 for (const auto *BlockLoopExit : BlockLoopExits) {
275 visitSuccessor(*BlockLoopExit, ParentLoop, *DefBlock);
276 }
277
278 } else {
279 // the successors are either on the same loop level or loop exits
280 for (const auto *SuccBlock : successors(Block)) {
281 visitSuccessor(*SuccBlock, ParentLoop, *DefBlock);
282 }
283 }
284 }
285
286 // We need to know the definition at the parent loop header to decide
287 // whether the definition at the header is different from the definition at
288 // the loop exits, which would indicate a divergent loop exits.
289 //
290 // A // loop header
291 // |
292 // B // nested loop header
293 // |
294 // C -> X (exit from B loop) -..-> (A latch)
295 // |
296 // D -> back to B (B latch)
297 // |
298 // proper exit from both loops
299 //
300 // D post-dominates B as it is the only proper exit from the "A loop".
301 // If C has a divergent branch, propagation will therefore stop at D.
302 // That implies that B will never receive a definition.
303 // But that definition can only be the same as at D (D itself in thise case)
304 // because all paths to anywhere have to pass through D.
305 //
306 const BasicBlock *ParentLoopHeader =
307 ParentLoop ? ParentLoop->getHeader() : nullptr;
308 if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
309 DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
310 }
311
312 // analyze reached loop exits
313 if (!ReachedLoopExits.empty()) {
314 assert(ParentLoop);
315 const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
316 LLVM_DEBUG(printDefs(dbgs()));
317 assert(HeaderDefBlock && "no definition in header of carrying loop");
318
319 for (const auto *ExitBlock : ReachedLoopExits) {
320 auto ItExitDef = DefMap.find(ExitBlock);
321 assert((ItExitDef != DefMap.end()) &&
322 "no reaching def at reachable loop exit");
323 if (ItExitDef->second != HeaderDefBlock) {
324 JoinBlocks->insert(ExitBlock);
325 }
326 }
327 }
328
329 return std::move(JoinBlocks);
330 }
331 };
332
333 const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
334 using LoopExitVec = SmallVector;
335 LoopExitVec LoopExits;
336 Loop.getExitBlocks(LoopExits);
337 if (LoopExits.size() < 1) {
338 return EmptyBlockSet;
339 }
340
341 // already available in cache?
342 auto ItCached = CachedLoopExitJoins.find(&Loop);
343 if (ItCached != CachedLoopExitJoins.end())
344 return *ItCached->second;
345
346 // compute all join points
347 DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
348 auto JoinBlocks = Propagator.computeJoinPoints(
349 *Loop.getHeader(), LoopExits, Loop.getParentLoop());
350
351 auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
352 assert(ItInserted.second);
353 return *ItInserted.first->second;
354 }
355
356 const ConstBlockSet &
357 SyncDependenceAnalysis::join_blocks(const TerminatorInst &Term) {
358 // trivial case
359 if (Term.getNumSuccessors() < 1) {
360 return EmptyBlockSet;
361 }
362
363 // already available in cache?
364 auto ItCached = CachedBranchJoins.find(&Term);
365 if (ItCached != CachedBranchJoins.end())
366 return *ItCached->second;
367
368 // compute all join points
369 DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
370 const auto &TermBlock = *Term.getParent();
371 auto JoinBlocks = Propagator.computeJoinPoints(
372 TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
373
374 auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
375 assert(ItInserted.second);
376 return *ItInserted.first->second;
377 }
378
379 } // namespace llvm
1313 CallGraphTest.cpp
1414 CFGTest.cpp
1515 CGSCCPassManagerTest.cpp
16 DivergenceAnalysisTest.cpp
1617 GlobalsModRefTest.cpp
1718 ValueLatticeTest.cpp
1819 LazyCallGraphTest.cpp
0 //===- DivergenceAnalysisTest.cpp - DivergenceAnalysis unit tests ---------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/ADT/SmallVector.h"
10 #include "llvm/Analysis/AssumptionCache.h"
11 #include "llvm/Analysis/DivergenceAnalysis.h"
12 #include "llvm/Analysis/LoopInfo.h"
13 #include "llvm/Analysis/PostDominators.h"
14 #include "llvm/Analysis/SyncDependenceAnalysis.h"
15 #include "llvm/Analysis/TargetLibraryInfo.h"
16 #include "llvm/AsmParser/Parser.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/Dominators.h"
19 #include "llvm/IR/GlobalVariable.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/InstIterator.h"
22 #include "llvm/IR/LLVMContext.h"
23 #include "llvm/IR/LegacyPassManager.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/IR/Verifier.h"
26 #include "llvm/Support/SourceMgr.h"
27 #include "gtest/gtest.h"
28
29 namespace llvm {
30 namespace {
31
32 BasicBlock *GetBlockByName(StringRef BlockName, Function &F) {
33 for (auto &BB : F) {
34 if (BB.getName() != BlockName)
35 continue;
36 return &BB;
37 }
38 return nullptr;
39 }
40
41 // We use this fixture to ensure that we clean up DivergenceAnalysis before
42 // deleting the PassManager.
43 class DivergenceAnalysisTest : public testing::Test {
44 protected:
45 LLVMContext Context;
46 Module M;
47 TargetLibraryInfoImpl TLII;
48 TargetLibraryInfo TLI;
49
50 std::unique_ptr DT;
51 std::unique_ptr PDT;
52 std::unique_ptr LI;
53 std::unique_ptr SDA;
54
55 DivergenceAnalysisTest() : M("", Context), TLII(), TLI(TLII) {}
56
57 DivergenceAnalysis buildDA(Function &F, bool IsLCSSA) {
58 DT.reset(new DominatorTree(F));
59 PDT.reset(new PostDominatorTree(F));
60 LI.reset(new LoopInfo(*DT));
61 SDA.reset(new SyncDependenceAnalysis(*DT, *PDT, *LI));
62 return DivergenceAnalysis(F, nullptr, *DT, *LI, *SDA, IsLCSSA);
63 }
64
65 void runWithDA(
66 Module &M, StringRef FuncName, bool IsLCSSA,
67 function_ref
68 Test) {
69 auto *F = M.getFunction(FuncName);
70 ASSERT_NE(F, nullptr) << "Could not find " << FuncName;
71 DivergenceAnalysis DA = buildDA(*F, IsLCSSA);
72 Test(*F, *LI, DA);
73 }
74 };
75
76 // Simple initial state test
77 TEST_F(DivergenceAnalysisTest, DAInitialState) {
78 IntegerType *IntTy = IntegerType::getInt32Ty(Context);
79 FunctionType *FTy =
80 FunctionType::get(Type::getVoidTy(Context), {IntTy}, false);
81 Function *F = cast(M.getOrInsertFunction("f", FTy));
82 BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
83 ReturnInst::Create(Context, nullptr, BB);
84
85 DivergenceAnalysis DA = buildDA(*F, false);
86
87 // Whole function region
88 EXPECT_EQ(DA.getRegionLoop(), nullptr);
89
90 // No divergence in initial state
91 EXPECT_FALSE(DA.hasDetectedDivergence());
92
93 // No spurious divergence
94 DA.compute();
95 EXPECT_FALSE(DA.hasDetectedDivergence());
96
97 // Detected divergence after marking
98 Argument &arg = *F->arg_begin();
99 DA.markDivergent(arg);
100
101 EXPECT_TRUE(DA.hasDetectedDivergence());
102 EXPECT_TRUE(DA.isDivergent(arg));
103
104 DA.compute();
105 EXPECT_TRUE(DA.hasDetectedDivergence());
106 EXPECT_TRUE(DA.isDivergent(arg));
107 }
108
109 TEST_F(DivergenceAnalysisTest, DANoLCSSA) {
110 LLVMContext C;
111 SMDiagnostic Err;
112
113 std::unique_ptr M = parseAssemblyString(
114 "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" "
115 " "
116 "define i32 @f_1(i8* nocapture %arr, i32 %n, i32* %A, i32* %B) "
117 " local_unnamed_addr { "
118 "entry: "
119 " br label %loop.ph "
120 " "
121 "loop.ph: "
122 " br label %loop "
123 " "
124 "loop: "
125 " %iv0 = phi i32 [ %iv0.inc, %loop ], [ 0, %loop.ph ] "
126 " %iv1 = phi i32 [ %iv1.inc, %loop ], [ -2147483648, %loop.ph ] "
127 " %iv0.inc = add i32 %iv0, 1 "
128 " %iv1.inc = add i32 %iv1, 3 "
129 " %cond.cont = icmp slt i32 %iv0, %n "
130 " br i1 %cond.cont, label %loop, label %for.end.loopexit "
131 " "
132 "for.end.loopexit: "
133 " ret i32 %iv0 "
134 "} ",
135 Err, C);
136
137 Function *F = M->getFunction("f_1");
138 DivergenceAnalysis DA = buildDA(*F, false);
139 EXPECT_FALSE(DA.hasDetectedDivergence());
140
141 auto ItArg = F->arg_begin();
142 ItArg++;
143 auto &NArg = *ItArg;
144
145 // Seed divergence in argument %n
146 DA.markDivergent(NArg);
147
148 DA.compute();
149 EXPECT_TRUE(DA.hasDetectedDivergence());
150
151 // Verify that "ret %iv.0" is divergent
152 auto ItBlock = F->begin();
153 std::advance(ItBlock, 3);
154 auto &ExitBlock = *GetBlockByName("for.end.loopexit", *F);
155 auto &RetInst = *cast(ExitBlock.begin());
156 EXPECT_TRUE(DA.isDivergent(RetInst));
157 }
158
159 TEST_F(DivergenceAnalysisTest, DALCSSA) {
160 LLVMContext C;
161 SMDiagnostic Err;
162
163 std::unique_ptr M = parseAssemblyString(
164 "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" "
165 " "
166 "define i32 @f_lcssa(i8* nocapture %arr, i32 %n, i32* %A, i32* %B) "
167 " local_unnamed_addr { "
168 "entry: "
169 " br label %loop.ph "
170 " "
171 "loop.ph: "
172 " br label %loop "
173 " "
174 "loop: "
175 " %iv0 = phi i32 [ %iv0.inc, %loop ], [ 0, %loop.ph ] "
176 " %iv1 = phi i32 [ %iv1.inc, %loop ], [ -2147483648, %loop.ph ] "
177 " %iv0.inc = add i32 %iv0, 1 "
178 " %iv1.inc = add i32 %iv1, 3 "
179 " %cond.cont = icmp slt i32 %iv0, %n "
180 " br i1 %cond.cont, label %loop, label %for.end.loopexit "
181 " "
182 "for.end.loopexit: "
183 " %val.ret = phi i32 [ %iv0, %loop ] "
184 " br label %detached.return "
185 " "
186 "detached.return: "
187 " ret i32 %val.ret "
188 "} ",
189 Err, C);
190
191 Function *F = M->getFunction("f_lcssa");
192 DivergenceAnalysis DA = buildDA(*F, true);
193 EXPECT_FALSE(DA.hasDetectedDivergence());
194
195 auto ItArg = F->arg_begin();
196 ItArg++;
197 auto &NArg = *ItArg;
198
199 // Seed divergence in argument %n
200 DA.markDivergent(NArg);
201
202 DA.compute();
203 EXPECT_TRUE(DA.hasDetectedDivergence());
204
205 // Verify that "ret %iv.0" is divergent
206 auto ItBlock = F->begin();
207 std::advance(ItBlock, 4);
208 auto &ExitBlock = *GetBlockByName("detached.return", *F);
209 auto &RetInst = *cast(ExitBlock.begin());
210 EXPECT_TRUE(DA.isDivergent(RetInst));
211 }
212
213 TEST_F(DivergenceAnalysisTest, DAJoinDivergence) {
214 LLVMContext C;
215 SMDiagnostic Err;
216
217 std::unique_ptr M = parseAssemblyString(
218 "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" "
219 " "
220 "define void @f_1(i1 %a, i1 %b, i1 %c) "
221 " local_unnamed_addr { "
222 "A: "
223 " br i1 %a, label %B, label %C "
224 " "
225 "B: "
226 " br i1 %b, label %C, label %D "
227 " "
228 "C: "
229 " %c.join = phi i32 [ 0, %A ], [ 1, %B ] "
230 " br i1 %c, label %D, label %E "
231 " "
232 "D: "
233 " %d.join = phi i32 [ 0, %B ], [ 1, %C ] "
234 " br label %E "
235 " "
236 "E: "
237 " %e.join = phi i32 [ 0, %C ], [ 1, %D ] "
238 " ret void "
239 "} "
240 " "
241 "define void @f_2(i1 %a, i1 %b, i1 %c) "
242 " local_unnamed_addr { "
243 "A: "
244 " br i1 %a, label %B, label %E "
245 " "
246 "B: "
247 " br i1 %b, label %C, label %D "
248 " "
249 "C: "
250 " br label %D "
251 " "
252 "D: "
253 " %d.join = phi i32 [ 0, %B ], [ 1, %C ] "
254 " br label %E "
255 " "
256 "E: "
257 " %e.join = phi i32 [ 0, %A ], [ 1, %D ] "
258 " ret void "
259 "} "
260 " "
261 "define void @f_3(i1 %a, i1 %b, i1 %c)"
262 " local_unnamed_addr { "
263 "A: "
264 " br i1 %a, label %B, label %C "
265 " "
266 "B: "
267 " br label %C "
268 " "
269 "C: "
270 " %c.join = phi i32 [ 0, %A ], [ 1, %B ] "
271 " br i1 %c, label %D, label %E "
272 " "
273 "D: "
274 " br label %E "
275 " "
276 "E: "
277 " %e.join = phi i32 [ 0, %C ], [ 1, %D ] "
278 " ret void "
279 "} ",
280 Err, C);
281
282 // Maps divergent conditions to the basic blocks whose Phi nodes become
283 // divergent. Blocks need to be listed in IR order.
284 using SmallBlockVec = SmallVector;
285 using InducedDivJoinMap = std::map;
286
287 // Actual function performing the checks.
288 auto CheckDivergenceFunc = [this](Function &F,
289 InducedDivJoinMap &ExpectedDivJoins) {
290 for (auto &ItCase : ExpectedDivJoins) {
291 auto *DivVal = ItCase.first;
292 auto DA = buildDA(F, false);
293 DA.markDivergent(*DivVal);
294 DA.compute();
295
296 // List of basic blocks that shall host divergent Phi nodes.
297 auto ItDivJoins = ItCase.second.begin();
298
299 for (auto &BB : F) {
300 auto *Phi = dyn_cast(BB.begin());
301 if (!Phi)
302 continue;
303
304 if (&BB == *ItDivJoins) {
305 EXPECT_TRUE(DA.isDivergent(*Phi));
306 // Advance to next block with expected divergent PHI node.
307 ++ItDivJoins;
308 } else {
309 EXPECT_FALSE(DA.isDivergent(*Phi));
310 }
311 }
312 }
313 };
314
315 {
316 auto *F = M->getFunction("f_1");
317 auto ItBlocks = F->begin();
318 ItBlocks++; // Skip A
319 ItBlocks++; // Skip B
320 auto *C = &*ItBlocks++;
321 auto *D = &*ItBlocks++;
322 auto *E = &*ItBlocks;
323
324 auto ItArg = F->arg_begin();
325 auto *AArg = &*ItArg++;
326 auto *BArg = &*ItArg++;
327 auto *CArg = &*ItArg;
328
329 InducedDivJoinMap DivJoins;
330 DivJoins.emplace(AArg, SmallBlockVec({C, D, E}));
331 DivJoins.emplace(BArg, SmallBlockVec({D, E}));
332 DivJoins.emplace(CArg, SmallBlockVec({E}));
333
334 CheckDivergenceFunc(*F, DivJoins);
335 }
336
337 {
338 auto *F = M->getFunction("f_2");
339 auto ItBlocks = F->begin();
340 ItBlocks++; // Skip A
341 ItBlocks++; // Skip B
342 ItBlocks++; // Skip C
343 auto *D = &*ItBlocks++;
344 auto *E = &*ItBlocks;
345
346 auto ItArg = F->arg_begin();
347 auto *AArg = &*ItArg++;
348 auto *BArg = &*ItArg++;
349 auto *CArg = &*ItArg;
350
351 InducedDivJoinMap DivJoins;
352 DivJoins.emplace(AArg, SmallBlockVec({E}));
353 DivJoins.emplace(BArg, SmallBlockVec({D}));
354 DivJoins.emplace(CArg, SmallBlockVec({}));
355
356 CheckDivergenceFunc(*F, DivJoins);
357 }
358
359 {
360 auto *F = M->getFunction("f_3");
361 auto ItBlocks = F->begin();
362 ItBlocks++; // Skip A
363 ItBlocks++; // Skip B
364 auto *C = &*ItBlocks++;
365 ItBlocks++; // Skip D
366 auto *E = &*ItBlocks;
367
368 auto ItArg = F->arg_begin();
369 auto *AArg = &*ItArg++;
370 auto *BArg = &*ItArg++;
371 auto *CArg = &*ItArg;
372
373 InducedDivJoinMap DivJoins;
374 DivJoins.emplace(AArg, SmallBlockVec({C}));
375 DivJoins.emplace(BArg, SmallBlockVec({}));
376 DivJoins.emplace(CArg, SmallBlockVec({E}));
377
378 CheckDivergenceFunc(*F, DivJoins);
379 }
380 }
381
382 TEST_F(DivergenceAnalysisTest, DASwitchUnreachableDefault) {
383 LLVMContext C;
384 SMDiagnostic Err;
385
386 std::unique_ptr M = parseAssemblyString(
387 "target datalayout = \"e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128\" "
388 " "
389 "define void @switch_unreachable_default(i32 %cond) local_unnamed_addr { "
390 "entry: "
391 " switch i32 %cond, label %sw.default [ "
392 " i32 0, label %sw.bb0 "
393 " i32 1, label %sw.bb1 "
394 " ] "
395 " "
396 "sw.bb0: "
397 " br label %sw.epilog "
398 " "
399 "sw.bb1: "
400 " br label %sw.epilog "
401 " "
402 "sw.default: "
403 " unreachable "
404 " "
405 "sw.epilog: "
406 " %div.dbl = phi double [ 0.0, %sw.bb0], [ -1.0, %sw.bb1 ] "
407 " ret void "
408 "}",
409 Err, C);
410
411 auto *F = M->getFunction("switch_unreachable_default");
412 auto &CondArg = *F->arg_begin();
413 auto DA = buildDA(*F, false);
414
415 EXPECT_FALSE(DA.hasDetectedDivergence());
416
417 DA.markDivergent(CondArg);
418 DA.compute();
419
420 // Still %CondArg is divergent.
421 EXPECT_TRUE(DA.hasDetectedDivergence());
422
423 // The join uni.dbl is not divergent (see D52221)
424 auto &ExitBlock = *GetBlockByName("sw.epilog", *F);
425 auto &DivDblPhi = *cast(ExitBlock.begin());
426 EXPECT_TRUE(DA.isDivergent(DivDblPhi));
427 }
428
429 } // end anonymous namespace
430 } // end namespace llvm