llvm.org GIT mirror llvm / 25eb624
[DivergenceAnalysis] Add methods for querying divergence at use Summary: The existing isDivergent(Value) methods query whether a value is divergent at its definition. However even if a value is uniform at its definition, a use of it in another basic block can be divergent because of divergent control flow between the def and the use. This patch adds new isDivergent(Use) methods to DivergenceAnalysis, LegacyDivergenceAnalysis and GPUDivergenceAnalysis. This might allow D63953 or other similar workarounds to be removed. Reviewers: alex-t, nhaehnle, arsenm, rtaylor, rampitec, simoll, jingyue Reviewed By: nhaehnle Subscribers: jfb, jvesely, wdng, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65141 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@367218 91177308-0d34-0410-b5e6-96231b3b80d8 Jay Foad 1 year, 17 days ago
6 changed file(s) with 79 addition(s) and 20 deletion(s). Raw diff Collapse all Expand all
7272 /// operands
7373 bool isAlwaysUniform(const Value &Val) const;
7474
75 /// \brief Whether \p Val is a divergent value
75 /// \brief Whether \p Val is divergent at its definition.
7676 bool isDivergent(const Value &Val) const;
77
78 /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent.
79 bool isDivergentUse(const Use &U) const;
7780
7881 void print(raw_ostream &OS, const Module *) const;
7982
188191 /// The GPU kernel this analysis result is for
189192 const Function &getFunction() const { return DA.getFunction(); }
190193
191 /// Whether \p V is divergent.
194 /// Whether \p V is divergent at its definition.
192195 bool isDivergent(const Value &V) const;
193196
194 /// Whether \p V is uniform/non-divergent
197 /// Whether \p U is divergent. Uses of a uniform value can be divergent.
198 bool isDivergentUse(const Use &U) const;
199
200 /// Whether \p V is uniform/non-divergent.
195201 bool isUniform(const Value &V) const { return !isDivergent(V); }
202
203 /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
204 /// divergent.
205 bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
196206
197207 /// Print all divergent values in the kernel.
198208 void print(raw_ostream &OS, const Module *) const;
3838 void print(raw_ostream &OS, const Module *) const override;
3939
4040 // Returns true if V is divergent at its definition.
41 //
42 // Even if this function returns false, V may still be divergent when used
43 // in a different basic block.
4441 bool isDivergent(const Value *V) const;
4542
43 // Returns true if U is divergent. Uses of a uniform value can be divergent.
44 bool isDivergentUse(const Use *U) const;
45
4646 // Returns true if V is uniform/non-divergent.
47 //
48 // Even if this function returns true, V may still be divergent when used
49 // in a different basic block.
5047 bool isUniform(const Value *V) const { return !isDivergent(V); }
48
49 // Returns true if U is uniform/non-divergent. Uses of a uniform value can be
50 // divergent.
51 bool isUniformUse(const Use *U) const { return !isDivergentUse(U); }
5152
5253 // Keep the analysis results uptodate by removing an erased value.
5354 void removeValue(const Value *V) { DivergentValues.erase(V); }
6162
6263 // Stores all divergent values.
6364 DenseSet DivergentValues;
65
66 // Stores divergent uses of possibly uniform values.
67 DenseSet DivergentUses;
6468 };
6569 } // End llvm namespace
6670
411411 return DivergentValues.find(&V) != DivergentValues.end();
412412 }
413413
414 bool DivergenceAnalysis::isDivergentUse(const Use &U) const {
415 Value &V = *U.get();
416 Instruction &I = *cast(U.getUser());
417 return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
418 }
419
414420 void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
415421 if (DivergentValues.empty())
416422 return;
448454 return DA.isDivergent(val);
449455 }
450456
457 bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const {
458 return DA.isDivergentUse(use);
459 }
460
451461 void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
452462 OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
453463 DA.print(OS, mod);
9292 class DivergencePropagator {
9393 public:
9494 DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
95 PostDominatorTree &PDT, DenseSet &DV)
96 : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
95 PostDominatorTree &PDT, DenseSet &DV,
96 DenseSet &DU)
97 : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {}
9798 void populateWithSourcesOfDivergence();
9899 void propagate();
99100
117118 PostDominatorTree &PDT;
118119 std::vector Worklist; // Stack for DFS.
119120 DenseSet &DV; // Stores all divergent values.
121 DenseSet &DU; // Stores divergent uses of possibly uniform
122 // values.
120123 };
121124
122125 void DivergencePropagator::populateWithSourcesOfDivergence() {
123126 Worklist.clear();
124127 DV.clear();
128 DU.clear();
125129 for (auto &I : instructions(F)) {
126130 if (TTI.isSourceOfDivergence(&I)) {
127131 Worklist.push_back(&I);
196200 // dominators of TI until it is outside the influence region.
197201 BasicBlock *InfluencedBB = ThisBB;
198202 while (InfluenceRegion.count(InfluencedBB)) {
199 for (auto &I : *InfluencedBB)
200 findUsersOutsideInfluenceRegion(I, InfluenceRegion);
203 for (auto &I : *InfluencedBB) {
204 if (!DV.count(&I))
205 findUsersOutsideInfluenceRegion(I, InfluenceRegion);
206 }
201207 DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
202208 if (IDomNode == nullptr)
203209 break;
207213
208214 void DivergencePropagator::findUsersOutsideInfluenceRegion(
209215 Instruction &I, const DenseSet &InfluenceRegion) {
210 for (User *U : I.users()) {
211 Instruction *UserInst = cast(U);
216 for (Use &Use : I.uses()) {
217 Instruction *UserInst = cast(Use.getUser());
212218 if (!InfluenceRegion.count(UserInst->getParent())) {
219 DU.insert(&Use);
213220 if (DV.insert(UserInst).second)
214221 Worklist.push_back(UserInst);
215222 }
319326 return false;
320327
321328 DivergentValues.clear();
329 DivergentUses.clear();
322330 gpuDA = nullptr;
323331
324332 auto &DT = getAnalysis().getDomTree();
331339
332340 } else {
333341 // run LLVM's existing DivergenceAnalysis
334 DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues);
342 DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses);
335343 DP.populateWithSourcesOfDivergence();
336344 DP.propagate();
337345 }
348356 return gpuDA->isDivergent(*V);
349357 }
350358 return DivergentValues.count(V);
359 }
360
361 bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const {
362 if (gpuDA) {
363 return gpuDA->isDivergentUse(*U);
364 }
365 return DivergentValues.count(U->get()) || DivergentUses.count(U);
351366 }
352367
353368 void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
141141
142142 // If the pointer operand is divergent, then each lane is doing an atomic
143143 // operation on a different address, and we cannot optimize that.
144 if (DA->isDivergent(I.getOperand(PtrIdx))) {
144 if (DA->isDivergentUse(&I.getOperandUse(PtrIdx))) {
145145 return;
146146 }
147147
148 const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
148 const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));
149149
150150 // If the value operand is divergent, each lane is contributing a different
151151 // value to the atomic calculation. We can only optimize divergent values if
218218
219219 const unsigned ValIdx = 0;
220220
221 const bool ValDivergent = DA->isDivergent(I.getOperand(ValIdx));
221 const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));
222222
223223 // If the value operand is divergent, each lane is contributing a different
224224 // value to the atomic calculation. We can only optimize divergent values if
231231 // If any of the other arguments to the intrinsic are divergent, we can't
232232 // optimize the operation.
233233 for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) {
234 if (DA->isDivergent(I.getOperand(Idx))) {
234 if (DA->isDivergentUse(&I.getOperandUse(Idx))) {
235235 return;
236236 }
237237 }
0 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s | FileCheck %s
1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=true < %s -use-gpu-divergence-analysis | FileCheck %s
2
3 @local = addrspace(3) global i32 undef
4
5 define void @reducible(i32 %x) {
6 ; CHECK-LABEL: reducible:
7 ; CHECK-NOT: dpp
8 entry:
9 br label %loop
10 loop:
11 %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
12 %gep = getelementptr i32, i32 addrspace(3)* @local, i32 %i
13 %cond = icmp ult i32 %i, %x
14 %i1 = add i32 %i, 1
15 br i1 %cond, label %loop, label %exit
16 exit:
17 %old = atomicrmw add i32 addrspace(3)* %gep, i32 %x acq_rel
18 ret void
19 }