llvm.org GIT mirror llvm / 60d74b7
[RewriteStatepointsForGC] Generalized vector phi/select handling for base pointers This change extends the detection of base pointers for vector constructs to handle arbitrary phi and select nodes. The existing non-vector code already handles those, so this is basically just extending the vector special case to be less special cased. It still isn't generalized vector handling since we can't handle arbitrary vector instructions (e.g. shufflevectors), but it's a lot closer. The general structure of the change is as follows: * Extend the base defining value relation over a subset of vector instructions and vector typed phi & select instructions. * Move scalarization from before base pointer rewriting to after base pointer rewriting. The extension of the BDV relation is sufficient to find vector base phis for vector inputs. * Preserve the existing special case logic for when the base of a vector element is locally obvious. This general idea could be extended to the scalar case as well. Differential Revision: http://reviews.llvm.org/D10461#inline-84275 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240850 91177308-0d34-0410-b5e6-96231b3b80d8 Philip Reames 4 years ago
2 changed file(s) with 168 addition(s) and 59 deletion(s). Raw diff Collapse all Expand all
293293
294294 static Value *findBaseDefiningValue(Value *I);
295295
296 /// If we can trivially determine that the index specified in the given vector
297 /// is a base pointer, return it. In cases where the entire vector is known to
298 /// consist of base pointers, the entire vector will be returned. This
299 /// indicates that the relevant extractelement is a valid base pointer and
300 /// should be used directly.
301 static Value *findBaseOfVector(Value *I, Value *Index) {
296 /// Return a base defining value for the 'Index' element of the given vector
297 /// instruction 'I'. If Index is null, returns a BDV for the entire vector
298 /// 'I'. As an optimization, this method will try to determine when the
299 /// element is known to already be a base pointer. If this can be established,
300 /// the second value in the returned pair will be true. Note that either a
301 /// vector or a pointer typed value can be returned. For the former, the
302 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
303 /// If the later, the return pointer is a BDV (or possibly a base) for the
304 /// particular element in 'I'.
305 static std::pair
306 findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
302307 assert(I->getType()->isVectorTy() &&
303308 cast(I->getType())->getElementType()->isPointerTy() &&
304309 "Illegal to ask for the base pointer of a non-pointer type");
308313
309314 if (isa(I))
310315 // An incoming argument to the function is a base pointer
311 return I;
316 return std::make_pair(I, true);
312317
313318 // We shouldn't see the address of a global as a vector value?
314319 assert(!isa(I) &&
319324 if (isa(I))
320325 // utterly meaningless, but useful for dealing with partially optimized
321326 // code.
322 return I;
327 return std::make_pair(I, true);
323328
324329 // Due to inheritance, this must be _after_ the global variable and undef
325330 // checks
327332 assert(!isa(I) && !isa(I) &&
328333 "order of checks wrong!");
329334 assert(Con->isNullValue() && "null is the only case which makes sense");
330 return Con;
331 }
332
335 return std::make_pair(Con, true);
336 }
337
333338 if (isa(I))
334 return I;
335
339 return std::make_pair(I, true);
340
336341 // For an insert element, we might be able to look through it if we know
337 // something about the indexes, but if the indices are arbitrary values, we
338 // can't without much more extensive scalarization.
342 // something about the indexes.
339343 if (InsertElementInst *IEI = dyn_cast(I)) {
340 Value *InsertIndex = IEI->getOperand(2);
341 // This index is inserting the value, look for it's base
342 if (InsertIndex == Index)
343 return findBaseDefiningValue(IEI->getOperand(1));
344 // Both constant, and can't be equal per above. This insert is definitely
345 // not relevant, look back at the rest of the vector and keep trying.
346 if (isa(Index) && isa(InsertIndex))
347 return findBaseOfVector(IEI->getOperand(0), Index);
348 }
349
350 // Note: This code is currently rather incomplete. We are essentially only
351 // handling cases where the vector element is trivially a base pointer. We
352 // need to update the entire base pointer construction algorithm to know how
353 // to track vector elements and potentially scalarize, but the case which
354 // would motivate the work hasn't shown up in real workloads yet.
355 llvm_unreachable("no base found for vector element");
356 }
344 if (Index) {
345 Value *InsertIndex = IEI->getOperand(2);
346 // This index is inserting the value, look for its BDV
347 if (InsertIndex == Index)
348 return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false);
349 // Both constant, and can't be equal per above. This insert is definitely
350 // not relevant, look back at the rest of the vector and keep trying.
351 if (isa(Index) && isa(InsertIndex))
352 return findBaseDefiningValueOfVector(IEI->getOperand(0), Index);
353 }
354
355 // We don't know whether this vector contains entirely base pointers or
356 // not. To be conservatively correct, we treat it as a BDV and will
357 // duplicate code as needed to construct a parallel vector of bases.
358 return std::make_pair(IEI, false);
359 }
360
361 if (isa(I))
362 // We don't know whether this vector contains entirely base pointers or
363 // not. To be conservatively correct, we treat it as a BDV and will
364 // duplicate code as needed to construct a parallel vector of bases.
365 // TODO: There a number of local optimizations which could be applied here
366 // for particular sufflevector patterns.
367 return std::make_pair(I, false);
368
369 // A PHI or Select is a base defining value. The outer findBasePointer
370 // algorithm is responsible for constructing a base value for this BDV.
371 assert((isa(I) || isa(I)) &&
372 "unknown vector instruction - no base found for vector element");
373 return std::make_pair(I, false);
374 }
375
376 static bool isKnownBaseResult(Value *V);
357377
358378 /// Helper function for findBasePointer - Will return a value which either a)
359379 /// defines the base pointer for the input or b) blocks the simple search
360380 /// (i.e. a PHI or Select of two derived pointers)
361381 static Value *findBaseDefiningValue(Value *I) {
382 if (I->getType()->isVectorTy())
383 return findBaseDefiningValueOfVector(I).first;
384
362385 assert(I->getType()->isPointerTy() &&
363386 "Illegal to ask for the base pointer of a non-pointer type");
364387
369392 if (auto *EEI = dyn_cast(I)) {
370393 Value *VectorOperand = EEI->getVectorOperand();
371394 Value *Index = EEI->getIndexOperand();
372 Value *VectorBase = findBaseOfVector(VectorOperand, Index);
373 // If the result returned is a vector, we know the entire vector must
374 // contain base pointers. In that case, the extractelement is a valid base
375 // for this value.
376 if (VectorBase->getType()->isVectorTy())
377 return EEI;
378 // Otherwise, we needed to look through the vector to find the base for
379 // this particular element.
380 assert(VectorBase->getType()->isPointerTy());
381 return VectorBase;
395 std::pair pair =
396 findBaseDefiningValueOfVector(VectorOperand, Index);
397 Value *VectorBase = pair.first;
398 if (VectorBase->getType()->isPointerTy())
399 // We found a BDV for this specific element with the vector. This is an
400 // optimization, but in practice it covers most of the useful cases
401 // created via scalarization.
402 return VectorBase;
403 else {
404 assert(VectorBase->getType()->isVectorTy());
405 if (pair.second)
406 // If the entire vector returned is known to be entirely base pointers,
407 // then the extractelement is valid base for this value.
408 return EEI;
409 else {
410 // Otherwise, we have an instruction which potentially produces a
411 // derived pointer and we need findBasePointers to clone code for us
412 // such that we can create an instruction which produces the
413 // accompanying base pointer.
414 // Note: This code is currently rather incomplete. We don't currently
415 // support the general form of shufflevector of insertelement.
416 // Conceptually, these are just 'base defining values' of the same
417 // variety as phi or select instructions. We need to update the
418 // findBasePointers algorithm to insert new 'base-only' versions of the
419 // original instructions. This is relative straight forward to do, but
420 // the case which would motivate the work hasn't shown up in real
421 // workloads yet.
422 assert((isa(VectorBase) || isa(VectorBase)) &&
423 "need to extend findBasePointers for generic vector"
424 "instruction cases");
425 return VectorBase;
426 }
427 }
382428 }
383429
384430 if (isa(I))
17111757 /// slightly non-trivial since it requires a format change. Given how rare
17121758 /// such cases are (for the moment?) scalarizing is an acceptable comprimise.
17131759 static void splitVectorValues(Instruction *StatepointInst,
1714 StatepointLiveSetTy &LiveSet, DominatorTree &DT) {
1760 StatepointLiveSetTy &LiveSet,
1761 DenseMap& PointerToBase,
1762 DominatorTree &DT) {
17151763 SmallVector ToSplit;
17161764 for (Value *V : LiveSet)
17171765 if (isa(V->getType()))
17201768 if (ToSplit.empty())
17211769 return;
17221770
1771 DenseMap> ElementMapping;
1772
17231773 Function &F = *(StatepointInst->getParent()->getParent());
17241774
17251775 DenseMap AllocaMap;
17261776 // First is normal return, second is exceptional return (invoke only)
17271777 DenseMap> Replacements;
17281778 for (Value *V : ToSplit) {
1729 LiveSet.erase(V);
1730
17311779 AllocaInst *Alloca =
17321780 new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
17331781 AllocaMap[V] = Alloca;
17371785 SmallVector Elements;
17381786 for (unsigned i = 0; i < VT->getNumElements(); i++)
17391787 Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
1740 LiveSet.insert(Elements.begin(), Elements.end());
1788 ElementMapping[V] = Elements;
17411789
17421790 auto InsertVectorReform = [&](Instruction *IP) {
17431791 Builder.SetInsertPoint(IP);
17701818 Replacements[V].second = InsertVectorReform(IP);
17711819 }
17721820 }
1821
17731822 for (Value *V : ToSplit) {
17741823 AllocaInst *Alloca = AllocaMap[V];
17751824
18131862 for (Value *V : ToSplit)
18141863 Allocas.push_back(AllocaMap[V]);
18151864 PromoteMemToReg(Allocas, DT);
1865
1866 // Update our tracking of live pointers and base mappings to account for the
1867 // changes we just made.
1868 for (Value *V : ToSplit) {
1869 auto &Elements = ElementMapping[V];
1870
1871 LiveSet.erase(V);
1872 LiveSet.insert(Elements.begin(), Elements.end());
1873 // We need to update the base mapping as well.
1874 assert(PointerToBase.count(V));
1875 Value *OldBase = PointerToBase[V];
1876 auto &BaseElements = ElementMapping[OldBase];
1877 PointerToBase.erase(V);
1878 assert(Elements.size() == BaseElements.size());
1879 for (unsigned i = 0; i < Elements.size(); i++) {
1880 Value *Elem = Elements[i];
1881 PointerToBase[Elem] = BaseElements[i];
1882 }
1883 }
18161884 }
18171885
18181886 // Helper function for the "rematerializeLiveValues". It walks use chain
20732141 // A) Identify all gc pointers which are staticly live at the given call
20742142 // site.
20752143 findLiveReferences(F, DT, P, toUpdate, records);
2076
2077 // Do a limited scalarization of any live at safepoint vector values which
2078 // contain pointers. This enables this pass to run after vectorization at
2079 // the cost of some possible performance loss. TODO: it would be nice to
2080 // natively support vectors all the way through the backend so we don't need
2081 // to scalarize here.
2082 for (size_t i = 0; i < records.size(); i++) {
2083 struct PartiallyConstructedSafepointRecord &info = records[i];
2084 Instruction *statepoint = toUpdate[i].getInstruction();
2085 splitVectorValues(cast(statepoint), info.liveset, DT);
2086 }
20872144
20882145 // B) Find the base pointers for each live pointer
20892146 /* scope for caching */ {
21442201 holders[i] = nullptr;
21452202 }
21462203 holders.clear();
2204
2205 // Do a limited scalarization of any live at safepoint vector values which
2206 // contain pointers. This enables this pass to run after vectorization at
2207 // the cost of some possible performance loss. TODO: it would be nice to
2208 // natively support vectors all the way through the backend so we don't need
2209 // to scalarize here.
2210 for (size_t i = 0; i < records.size(); i++) {
2211 struct PartiallyConstructedSafepointRecord &info = records[i];
2212 Instruction *statepoint = toUpdate[i].getInstruction();
2213 splitVectorValues(cast(statepoint), info.liveset,
2214 info.PointerToBase, DT);
2215 }
21472216
21482217 // In order to reduce live set of statepoint we might choose to rematerialize
21492218 // some values instead of relocating them. This is purelly an optimization and
104104 ; CHECK-NEXT: bitcast
105105 ; CHECK-NEXT: gc.relocate
106106 ; CHECK-NEXT: bitcast
107 ; CHECK-NEXT: gc.relocate
108 ; CHECK-NEXT: bitcast
109107 ; CHECK-NEXT: insertelement
110108 ; CHECK-NEXT: insertelement
111109 ; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
115113 ret <2 x i64 addrspace(1)*> %vec
116114 }
117115
116
117 ; A base vector from a load
118 define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
119 gc "statepoint-example" {
120 ; CHECK-LABEL: test6
121 ; CHECK-LABEL: merge:
122 ; CHECK-NEXT: = phi
123 ; CHECK-NEXT: = phi
124 ; CHECK-NEXT: extractelement
125 ; CHECK-NEXT: extractelement
126 ; CHECK-NEXT: extractelement
127 ; CHECK-NEXT: extractelement
128 ; CHECK-NEXT: gc.statepoint
129 ; CHECK-NEXT: gc.relocate
130 ; CHECK-NEXT: bitcast
131 ; CHECK-NEXT: gc.relocate
132 ; CHECK-NEXT: bitcast
133 ; CHECK-NEXT: gc.relocate
134 ; CHECK-NEXT: bitcast
135 ; CHECK-NEXT: gc.relocate
136 ; CHECK-NEXT: bitcast
137 ; CHECK-NEXT: insertelement
138 ; CHECK-NEXT: insertelement
139 ; CHECK-NEXT: insertelement
140 ; CHECK-NEXT: insertelement
141 ; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
142 entry:
143 br i1 %cnd, label %taken, label %untaken
144 taken:
145 %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
146 br label %merge
147 untaken:
148 %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
149 br label %merge
150
151 merge:
152 %obj = phi <2 x i64 addrspace(1)*> [%obja, %taken], [%objb, %untaken]
153 %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
154 ret <2 x i64 addrspace(1)*> %obj
155 }
156
157
118158 declare void @do_safepoint()
119159
120160 declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)