llvm.org GIT mirror llvm / 80f020a
Now that SROA can form alloca's for dynamic vector accesses, further improve it to be able to replace operations on these vector alloca's with insert/extract element insts git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158623 91177308-0d34-0410-b5e6-96231b3b80d8 Pete Cooper 7 years ago
2 changed file(s) with 161 addition(s) and 61 deletion(s). Raw diff Collapse all Expand all
263263 /// large integers unless there is some potential for optimization.
264264 bool HadNonMemTransferAccess;
265265
266 /// HadDynamicAccess - True if some element of this alloca was dynamic.
267 /// We don't yet have support for turning a dynamic access into a large
268 /// integer.
269 bool HadDynamicAccess;
270
266271 public:
267272 explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
268273 : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown),
269 VectorTy(0), HadNonMemTransferAccess(false) { }
274 VectorTy(0), HadNonMemTransferAccess(false), HadDynamicAccess(false) { }
270275
271276 AllocaInst *TryConvert(AllocaInst *AI);
272277
273278 private:
274 bool CanConvertToScalar(Value *V, uint64_t Offset);
279 bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx);
275280 void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
276281 bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
277 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
282 void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset,
283 Value *NonConstantIdx);
278284
279285 Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
280 uint64_t Offset, IRBuilder<> &Builder);
286 uint64_t Offset, Value* NonConstantIdx,
287 IRBuilder<> &Builder);
281288 Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
282 uint64_t Offset, IRBuilder<> &Builder);
289 uint64_t Offset, Value* NonConstantIdx,
290 IRBuilder<> &Builder);
283291 };
284292 } // end anonymous namespace.
285293
290298 AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
291299 // If we can't convert this scalar, or if mem2reg can trivially do it, bail
292300 // out.
293 if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
301 if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial)
294302 return 0;
295303
296304 // If an alloca has only memset / memcpy uses, it may still have an Unknown
318326 if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
319327 !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
320328 return 0;
329 // Dynamic accesses on integers aren't yet supported. They need us to shift
330 // by a dynamic amount which could be difficult to work out as we might not
331 // know whether to use a left or right shift.
332 if (ScalarKind == Integer && HadDynamicAccess)
333 return 0;
321334
322335 DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
323336 // Create and insert the integer alloca.
324337 NewTy = IntegerType::get(AI->getContext(), BitWidth);
325338 }
326339 AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
327 ConvertUsesToScalar(AI, NewAI, 0);
340 ConvertUsesToScalar(AI, NewAI, 0, 0);
328341 return NewAI;
329342 }
330343
411424 ///
412425 /// If we see at least one access to the value that is as a vector type, set the
413426 /// SawVec flag.
414 bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
427 bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
428 Value* NonConstantIdx) {
415429 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
416430 Instruction *User = cast(*UI);
417431
441455 if (BitCastInst *BCI = dyn_cast(User)) {
442456 if (!onlyUsedByLifetimeMarkers(BCI))
443457 IsNotTrivial = true; // Can't be mem2reg'd.
444 if (!CanConvertToScalar(BCI, Offset))
458 if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
445459 return false;
446460 continue;
447461 }
448462
449463 if (GetElementPtrInst *GEP = dyn_cast(User)) {
450464 // If this is a GEP with a variable indices, we can't handle it.
451 if (!GEP->hasAllConstantIndices())
465 PointerType* PtrTy = dyn_cast(GEP->getPointerOperandType());
466 if (!PtrTy)
452467 return false;
453468
454469 // Compute the offset that this GEP adds to the pointer.
455470 SmallVector Indices(GEP->op_begin()+1, GEP->op_end());
456 if (!GEP->getPointerOperandType()->isPointerTy())
457 return false;
458 uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
471 Value *GEPNonConstantIdx = 0;
472 if (!GEP->hasAllConstantIndices()) {
473 if (!isa(PtrTy->getElementType()))
474 return false;
475 if (NonConstantIdx)
476 return false;
477 GEPNonConstantIdx = Indices.pop_back_val();
478 if (!GEPNonConstantIdx->getType()->isIntegerTy(32))
479 return false;
480 HadDynamicAccess = true;
481 } else
482 GEPNonConstantIdx = NonConstantIdx;
483 uint64_t GEPOffset = TD.getIndexedOffset(PtrTy,
459484 Indices);
460485 // See if all uses can be converted.
461 if (!CanConvertToScalar(GEP, Offset+GEPOffset))
486 if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx))
462487 return false;
463488 IsNotTrivial = true; // Can't be mem2reg'd.
464489 HadNonMemTransferAccess = true;
468493 // If this is a constant sized memset of a constant value (e.g. 0) we can
469494 // handle it.
470495 if (MemSetInst *MSI = dyn_cast(User)) {
496 // Store to dynamic index.
497 if (NonConstantIdx)
498 return false;
471499 // Store of constant value.
472500 if (!isa(MSI->getValue()))
473501 return false;
492520 // If this is a memcpy or memmove into or out of the whole allocation, we
493521 // can handle it like a load or store of the scalar type.
494522 if (MemTransferInst *MTI = dyn_cast(User)) {
523 // Store to dynamic index.
524 if (NonConstantIdx)
525 return false;
495526 ConstantInt *Len = dyn_cast(MTI->getLength());
496527 if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
497528 return false;
523554 /// Offset is an offset from the original alloca, in bits that need to be
524555 /// shifted to the right. By the end of this, there should be no uses of Ptr.
525556 void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
526 uint64_t Offset) {
557 uint64_t Offset,
558 Value* NonConstantIdx) {
527559 while (!Ptr->use_empty()) {
528560 Instruction *User = cast(Ptr->use_back());
529561
530562 if (BitCastInst *CI = dyn_cast(User)) {
531 ConvertUsesToScalar(CI, NewAI, Offset);
563 ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
532564 CI->eraseFromParent();
533565 continue;
534566 }
536568 if (GetElementPtrInst *GEP = dyn_cast(User)) {
537569 // Compute the offset that this GEP adds to the pointer.
538570 SmallVector Indices(GEP->op_begin()+1, GEP->op_end());
571 if (!GEP->hasAllConstantIndices())
572 NonConstantIdx = Indices.pop_back_val();
539573 uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
540574 Indices);
541 ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
575 ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx);
542576 GEP->eraseFromParent();
543577 continue;
544578 }
549583 // The load is a bit extract from NewAI shifted right by Offset bits.
550584 Value *LoadedVal = Builder.CreateLoad(NewAI);
551585 Value *NewLoadVal
552 = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
586 = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset,
587 NonConstantIdx, Builder);
553588 LI->replaceAllUsesWith(NewLoadVal);
554589 LI->eraseFromParent();
555590 continue;
559594 assert(SI->getOperand(0) != Ptr && "Consistency error!");
560595 Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
561596 Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
562 Builder);
597 NonConstantIdx, Builder);
563598 Builder.CreateStore(New, NewAI);
564599 SI->eraseFromParent();
565600
574609 // transform it into a store of the expanded constant value.
575610 if (MemSetInst *MSI = dyn_cast(User)) {
576611 assert(MSI->getRawDest() == Ptr && "Consistency error!");
612 assert(!NonConstantIdx && "Cannot replace dynamic memset with insert");
577613 int64_t SNumBytes = cast(MSI->getLength())->getSExtValue();
578614 if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
579615 unsigned NumBytes = static_cast(SNumBytes);
590626 Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
591627 Value *New = ConvertScalar_InsertValue(
592628 ConstantInt::get(User->getContext(), APVal),
593 Old, Offset, Builder);
629 Old, Offset, 0, Builder);
594630 Builder.CreateStore(New, NewAI);
595631
596632 // If the load we just inserted is now dead, then the memset overwrote
606642 // can handle it like a load or store of the scalar type.
607643 if (MemTransferInst *MTI = dyn_cast(User)) {
608644 assert(Offset == 0 && "must be store to start of alloca");
645 assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert");
609646
610647 // If the source and destination are both to the same alloca, then this is
611648 // a noop copy-to-self, just delete it. Otherwise, emit a load and store
678715 /// shifted to the right.
679716 Value *ConvertToScalarInfo::
680717 ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
681 uint64_t Offset, IRBuilder<> &Builder) {
718 uint64_t Offset, Value* NonConstantIdx,
719 IRBuilder<> &Builder) {
682720 // If the load is of the whole new alloca, no conversion is needed.
683721 Type *FromType = FromVal->getType();
684722 if (FromType == ToType && Offset == 0)
700738 assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
701739 }
702740 // Return the element extracted out of it.
703 Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt));
741 Value *Idx;
742 if (NonConstantIdx) {
743 if (Elt)
744 Idx = Builder.CreateAdd(NonConstantIdx,
745 Builder.getInt32(Elt),
746 "dyn.offset");
747 else
748 Idx = NonConstantIdx;
749 } else
750 Idx = Builder.getInt32(Elt);
751 Value *V = Builder.CreateExtractElement(FromVal, Idx);
704752 if (V->getType() != ToType)
705753 V = Builder.CreateBitCast(V, ToType);
706754 return V;
709757 // If ToType is a first class aggregate, extract out each of the pieces and
710758 // use insertvalue's to form the FCA.
711759 if (StructType *ST = dyn_cast(ToType)) {
760 assert(!NonConstantIdx &&
761 "Dynamic indexing into struct types not supported");
712762 const StructLayout &Layout = *TD.getStructLayout(ST);
713763 Value *Res = UndefValue::get(ST);
714764 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
715765 Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
716766 Offset+Layout.getElementOffsetInBits(i),
717 Builder);
767 0, Builder);
718768 Res = Builder.CreateInsertValue(Res, Elt, i);
719769 }
720770 return Res;
721771 }
722772
723773 if (ArrayType *AT = dyn_cast(ToType)) {
774 assert(!NonConstantIdx &&
775 "Dynamic indexing into array types not supported");
724776 uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
725777 Value *Res = UndefValue::get(AT);
726778 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
727779 Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
728 Offset+i*EltSize, Builder);
780 Offset+i*EltSize, 0, Builder);
729781 Res = Builder.CreateInsertValue(Res, Elt, i);
730782 }
731783 return Res;
791843 ///
792844 /// Offset is an offset from the original alloca, in bits that need to be
793845 /// shifted to the right.
846 ///
847 /// NonConstantIdx is an index value if there was a GEP with a non-constant
848 /// index value. If this is 0 then all GEPs used to find this insert address
849 /// are constant.
794850 Value *ConvertToScalarInfo::
795851 ConvertScalar_InsertValue(Value *SV, Value *Old,
796 uint64_t Offset, IRBuilder<> &Builder) {
852 uint64_t Offset, Value* NonConstantIdx,
853 IRBuilder<> &Builder) {
797854 // Convert the stored type to the actual type, shift it left to insert
798855 // then 'or' into place.
799856 Type *AllocaType = Old->getType();
814871 SV = Builder.CreateBitCast(SV, EltTy);
815872 uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy);
816873 unsigned Elt = Offset/EltSize;
817 return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt));
874 Value *Idx;
875 if (NonConstantIdx) {
876 if (Elt)
877 Idx = Builder.CreateAdd(NonConstantIdx,
878 Builder.getInt32(Elt),
879 "dyn.offset");
880 else
881 Idx = NonConstantIdx;
882 } else
883 Idx = Builder.getInt32(Elt);
884 return Builder.CreateInsertElement(Old, SV, Idx);
818885 }
819886
820887 // If SV is a first-class aggregate value, insert each value recursively.
821888 if (StructType *ST = dyn_cast(SV->getType())) {
889 assert(!NonConstantIdx &&
890 "Dynamic indexing into struct types not supported");
822891 const StructLayout &Layout = *TD.getStructLayout(ST);
823892 for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
824893 Value *Elt = Builder.CreateExtractValue(SV, i);
825894 Old = ConvertScalar_InsertValue(Elt, Old,
826895 Offset+Layout.getElementOffsetInBits(i),
827 Builder);
896 0, Builder);
828897 }
829898 return Old;
830899 }
831900
832901 if (ArrayType *AT = dyn_cast(SV->getType())) {
902 assert(!NonConstantIdx &&
903 "Dynamic indexing into array types not supported");
833904 uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
834905 for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
835906 Value *Elt = Builder.CreateExtractValue(SV, i);
836 Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
907 Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder);
837908 }
838909 return Old;
839910 }
33 target triple = "x86_64-apple-darwin10.0.0"
44
55 ; CHECK: @test1
6 ; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float>
7 ; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float>
8 ; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]]
6 ; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
7 ; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
8 ; CHECK: memset
9 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
910
1011 ; Split the array but don't replace the memset with an insert
1112 ; element as its not a constant offset.
13 ; The load, however, can be replaced with an extract element.
1214 define float @test1(i32 %idx1, i32 %idx2) {
1315 entry:
1416 %0 = alloca [4 x <4 x float>]
2224 }
2325
2426 ; CHECK: @test2
25 ; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
26 ; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
27 ; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx1
28 ; CHECK: store float 1.000000e+00, float* %ptr1
29 ; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc]], i32 0, i32 %idx2
30 ; CHECK: %ret = load float* %ptr2
31 ; CHECK: ret float %ret
27 ; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
28 ; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2
3229
3330 ; Do SROA on the array when it has dynamic vector reads and writes.
3431 define float @test2(i32 %idx1, i32 %idx2) {
6057 ret float %ret
6158 }
6259
63 ; CHECK: @test4
60 ; CHECK: test4
61 ; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
62 ; CHECK: extractelement <16 x float> %0, i32 %idx2
63
64 ; Don't do SROA on a dynamically indexed vector when it spans
65 ; more than one array element of the alloca array it is within.
66 ; However, unlike test3, the store is on the vector type
67 ; so SROA will convert the large alloca into the large vector
68 ; type and do all accesses with insert/extract element
69 define float @test4(i32 %idx1, i32 %idx2) {
70 entry:
71 %0 = alloca [4 x <4 x float>]
72 %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
73 store <16 x float> zeroinitializer, <16 x float>* %bigvec
74 %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
75 store float 1.0, float* %ptr1
76 %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
77 %ret = load float* %ptr2
78 ret float %ret
79 }
80
81 ; CHECK: @test5
6482 ; CHECK: %0 = alloca [4 x <4 x float>]
6583 ; CHECK-NOT: alloca
6684
6785 ; Don't do SROA as the is a second dynamically indexed array
6886 ; which may span multiple elements of the alloca.
69 define float @test4(i32 %idx1, i32 %idx2) {
87 define float @test5(i32 %idx1, i32 %idx2) {
7088 entry:
7189 %0 = alloca [4 x <4 x float>]
7290 store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
7997 ret float %ret
8098 }
8199
82 ; CHECK: test5
83 ; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float>
84 ; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float>
85 ; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]]
86 ; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc1]]
87 ; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc0]], i32 0, i32 %idx1
88 ; CHECK: store float 1.000000e+00, float* %ptr1
89 ; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc1]], i32 0, i32 %idx2
90 ; CHECK: %ret = load float* %ptr2
100 ; CHECK: test6
101 ; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
102 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
91103
92104 %vector.pair = type { %vector.anon, %vector.anon }
93105 %vector.anon = type { %vector }
98110 ; the original GEP, just the indices it needs to get to the correct offset of
99111 ; some type, not necessarily the dynamic vector.
100112 ; This test makes sure we don't have this crash.
101 define float @test5(i32 %idx1, i32 %idx2) {
113 define float @test6(i32 %idx1, i32 %idx2) {
102114 entry:
103115 %0 = alloca %vector.pair
104116 store %vector.pair zeroinitializer, %vector.pair* %0
109121 ret float %ret
110122 }
111123
112 ; CHECK: test6
113 ; CHECK: %[[alloc0:[\.a-z0-9]*]] = alloca <4 x float>
114 ; CHECK: %[[alloc1:[\.a-z0-9]*]] = alloca <4 x float>
115 ; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc0]]
116 ; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc1]]
117 ; CHECK: %ptr1 = getelementptr inbounds <4 x float>* %[[alloc0]], i32 0, i32 %idx1
118 ; CHECK: store float 1.000000e+00, float* %ptr1
119 ; CHECK: %ptr2 = getelementptr inbounds <4 x float>* %[[alloc1]], i32 0, i32 %idx2
120 ; CHECK: %ret = load float* %ptr2
124 ; CHECK: test7
125 ; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
126 ; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
121127
122128 %array.pair = type { [2 x %array.anon], %array.anon }
123129 %array.anon = type { [2 x %vector] }
124130
125 ; This is the same as test5 and tests the same crash, but on arrays.
126 define float @test6(i32 %idx1, i32 %idx2) {
131 ; This is the same as test6 and tests the same crash, but on arrays.
132 define float @test7(i32 %idx1, i32 %idx2) {
127133 entry:
128134 %0 = alloca %array.pair
129135 store %array.pair zeroinitializer, %array.pair* %0
134140 ret float %ret
135141 }
136142
143 ; CHECK: test8
144 ; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1
145 ; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]]
146 ; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2
147 ; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]]
148
149 ; Do SROA on the vector when it has dynamic vector reads and writes
150 ; from a non-zero offset.
151 define float @test8(i32 %idx1, i32 %idx2) {
152 entry:
153 %0 = alloca <4 x float>
154 store <4 x float> zeroinitializer, <4 x float>* %0
155 %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1
156 %ptr2 = bitcast float* %ptr1 to <3 x float>*
157 %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1
158 store float 1.0, float* %ptr3
159 %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2
160 %ptr5 = bitcast float* %ptr4 to <2 x float>*
161 %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2
162 %ret = load float* %ptr6
163 ret float %ret
164 }
165
137166 declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)