llvm.org GIT mirror llvm / 88adfde
[TargetLowering] findOptimalMemOpLowering. NFCI. This was a local static funtion in SelectionDAG, which I've promoted to TargetLowering so that I can reuse it to estimate the cost of a memory operation in D59787. Differential Revision: https://reviews.llvm.org/D59766 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359543 91177308-0d34-0410-b5e6-96231b3b80d8 Sjoerd Meijer 1 year, 4 months ago
3 changed file(s) with 133 addition(s) and 123 deletion(s). Raw diff Collapse all Expand all
29332933 }
29342934 };
29352935
2936 /// Determines the optimal series of memory ops to replace the memset / memcpy.
2937 /// Return true if the number of memory ops is below the threshold (Limit).
2938 /// It returns the types of the sequence of memory ops to perform
2939 /// memset / memcpy by reference.
2940 bool findOptimalMemOpLowering(std::vector &MemOps,
2941 unsigned Limit, uint64_t Size,
2942 unsigned DstAlign, unsigned SrcAlign,
2943 bool IsMemset,
2944 bool ZeroMemset,
2945 bool MemcpyStrSrc,
2946 bool AllowOverlap,
2947 unsigned DstAS, unsigned SrcAS,
2948 const AttributeList &FuncAttributes) const;
2949
29362950 /// Check to see if the specified operand of the specified instruction is a
29372951 /// constant integer. If so, check to see if there are any bits set in the
29382952 /// constant that are not demanded. If so, shrink the constant and return
55625562 SrcDelta + G->getOffset());
55635563 }
55645564
5565 /// Determines the optimal series of memory ops to replace the memset / memcpy.
5566 /// Return true if the number of memory ops is below the threshold (Limit).
5567 /// It returns the types of the sequence of memory ops to perform
5568 /// memset / memcpy by reference.
5569 static bool FindOptimalMemOpLowering(std::vector &MemOps,
5570 unsigned Limit, uint64_t Size,
5571 unsigned DstAlign, unsigned SrcAlign,
5572 bool IsMemset,
5573 bool ZeroMemset,
5574 bool MemcpyStrSrc,
5575 bool AllowOverlap,
5576 unsigned DstAS, unsigned SrcAS,
5577 SelectionDAG &DAG,
5578 const TargetLowering &TLI) {
5579 assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
5580 "Expecting memcpy / memset source to meet alignment requirement!");
5581 // If 'SrcAlign' is zero, that means the memory operation does not need to
5582 // load the value, i.e. memset or memcpy from constant string. Otherwise,
5583 // it's the inferred alignment of the source. 'DstAlign', on the other hand,
5584 // is the specified alignment of the memory operation. If it is zero, that
5585 // means it's possible to change the alignment of the destination.
5586 // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
5587 // not need to be loaded.
5588 const Function &F = DAG.getMachineFunction().getFunction();
5589 EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
5590 IsMemset, ZeroMemset, MemcpyStrSrc,
5591 F.getAttributes());
5592
5593 if (VT == MVT::Other) {
5594 // Use the largest integer type whose alignment constraints are satisfied.
5595 // We only need to check DstAlign here as SrcAlign is always greater or
5596 // equal to DstAlign (or zero).
5597 VT = MVT::i64;
5598 while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
5599 !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
5600 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
5601 assert(VT.isInteger());
5602
5603 // Find the largest legal integer type.
5604 MVT LVT = MVT::i64;
5605 while (!TLI.isTypeLegal(LVT))
5606 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
5607 assert(LVT.isInteger());
5608
5609 // If the type we've chosen is larger than the largest legal integer type
5610 // then use that instead.
5611 if (VT.bitsGT(LVT))
5612 VT = LVT;
5613 }
5614
5615 unsigned NumMemOps = 0;
5616 while (Size != 0) {
5617 unsigned VTSize = VT.getSizeInBits() / 8;
5618 while (VTSize > Size) {
5619 // For now, only use non-vector load / store's for the left-over pieces.
5620 EVT NewVT = VT;
5621 unsigned NewVTSize;
5622
5623 bool Found = false;
5624 if (VT.isVector() || VT.isFloatingPoint()) {
5625 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
5626 if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
5627 TLI.isSafeMemOpType(NewVT.getSimpleVT()))
5628 Found = true;
5629 else if (NewVT == MVT::i64 &&
5630 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
5631 TLI.isSafeMemOpType(MVT::f64)) {
5632 // i64 is usually not legal on 32-bit targets, but f64 may be.
5633 NewVT = MVT::f64;
5634 Found = true;
5635 }
5636 }
5637
5638 if (!Found) {
5639 do {
5640 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
5641 if (NewVT == MVT::i8)
5642 break;
5643 } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
5644 }
5645 NewVTSize = NewVT.getSizeInBits() / 8;
5646
5647 // If the new VT cannot cover all of the remaining bits, then consider
5648 // issuing a (or a pair of) unaligned and overlapping load / store.
5649 bool Fast;
5650 if (NumMemOps && AllowOverlap && NewVTSize < Size &&
5651 TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
5652 Fast)
5653 VTSize = Size;
5654 else {
5655 VT = NewVT;
5656 VTSize = NewVTSize;
5657 }
5658 }
5659
5660 if (++NumMemOps > Limit)
5661 return false;
5662
5663 MemOps.push_back(VT);
5664 Size -= VTSize;
5665 }
5666
5667 return true;
5668 }
5669
56705565 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
56715566 // On Darwin, -Os means optimize for size without hurting performance, so
56725567 // only really optimize for size when -Oz (MinSize) is used.
57335628 bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
57345629 unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
57355630
5736 if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
5737 (DstAlignCanChange ? 0 : Align),
5738 (isZeroConstant ? 0 : SrcAlign),
5739 false, false, CopyFromConstant, true,
5740 DstPtrInfo.getAddrSpace(),
5741 SrcPtrInfo.getAddrSpace(),
5742 DAG, TLI))
5631 if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size,
5632 (DstAlignCanChange ? 0 : Align),
5633 (isZeroConstant ? 0 : SrcAlign),
5634 false, false, CopyFromConstant, true,
5635 DstPtrInfo.getAddrSpace(),
5636 SrcPtrInfo.getAddrSpace(),
5637 MF.getFunction().getAttributes()))
57435638 return SDValue();
57445639
57455640 if (DstAlignCanChange) {
59145809 SrcAlign = Align;
59155810 unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
59165811
5917 if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
5918 (DstAlignCanChange ? 0 : Align), SrcAlign,
5919 false, false, false, false,
5920 DstPtrInfo.getAddrSpace(),
5921 SrcPtrInfo.getAddrSpace(),
5922 DAG, TLI))
5812 if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size,
5813 (DstAlignCanChange ? 0 : Align), SrcAlign,
5814 false, false, false, false,
5815 DstPtrInfo.getAddrSpace(),
5816 SrcPtrInfo.getAddrSpace(),
5817 MF.getFunction().getAttributes()))
59235818 return SDValue();
59245819
59255820 if (DstAlignCanChange) {
60145909 DstAlignCanChange = true;
60155910 bool IsZeroVal =
60165911 isa(Src) && cast(Src)->isNullValue();
6017 if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
6018 Size, (DstAlignCanChange ? 0 : Align), 0,
6019 true, IsZeroVal, false, true,
6020 DstPtrInfo.getAddrSpace(), ~0u,
6021 DAG, TLI))
5912 if (!TLI.findOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
5913 Size, (DstAlignCanChange ? 0 : Align), 0,
5914 true, IsZeroVal, false, true,
5915 DstPtrInfo.getAddrSpace(), ~0u,
5916 MF.getFunction().getAttributes()))
60225917 return SDValue();
60235918
60245919 if (DstAlignCanChange) {
150150 .setSExtResult(signExtend)
151151 .setZExtResult(!signExtend);
152152 return LowerCallTo(CLI);
153 }
154
155 bool
156 TargetLowering::findOptimalMemOpLowering(std::vector &MemOps,
157 unsigned Limit, uint64_t Size,
158 unsigned DstAlign, unsigned SrcAlign,
159 bool IsMemset,
160 bool ZeroMemset,
161 bool MemcpyStrSrc,
162 bool AllowOverlap,
163 unsigned DstAS, unsigned SrcAS,
164 const AttributeList &FuncAttributes) const {
165 // If 'SrcAlign' is zero, that means the memory operation does not need to
166 // load the value, i.e. memset or memcpy from constant string. Otherwise,
167 // it's the inferred alignment of the source. 'DstAlign', on the other hand,
168 // is the specified alignment of the memory operation. If it is zero, that
169 // means it's possible to change the alignment of the destination.
170 // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
171 // not need to be loaded.
172 if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
173 return false;
174
175 EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
176 IsMemset, ZeroMemset, MemcpyStrSrc,
177 FuncAttributes);
178
179 if (VT == MVT::Other) {
180 // Use the largest integer type whose alignment constraints are satisfied.
181 // We only need to check DstAlign here as SrcAlign is always greater or
182 // equal to DstAlign (or zero).
183 VT = MVT::i64;
184 while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
185 !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
186 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
187 assert(VT.isInteger());
188
189 // Find the largest legal integer type.
190 MVT LVT = MVT::i64;
191 while (!isTypeLegal(LVT))
192 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
193 assert(LVT.isInteger());
194
195 // If the type we've chosen is larger than the largest legal integer type
196 // then use that instead.
197 if (VT.bitsGT(LVT))
198 VT = LVT;
199 }
200
201 unsigned NumMemOps = 0;
202 while (Size != 0) {
203 unsigned VTSize = VT.getSizeInBits() / 8;
204 while (VTSize > Size) {
205 // For now, only use non-vector load / store's for the left-over pieces.
206 EVT NewVT = VT;
207 unsigned NewVTSize;
208
209 bool Found = false;
210 if (VT.isVector() || VT.isFloatingPoint()) {
211 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
212 if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
213 isSafeMemOpType(NewVT.getSimpleVT()))
214 Found = true;
215 else if (NewVT == MVT::i64 &&
216 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
217 isSafeMemOpType(MVT::f64)) {
218 // i64 is usually not legal on 32-bit targets, but f64 may be.
219 NewVT = MVT::f64;
220 Found = true;
221 }
222 }
223
224 if (!Found) {
225 do {
226 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
227 if (NewVT == MVT::i8)
228 break;
229 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
230 }
231 NewVTSize = NewVT.getSizeInBits() / 8;
232
233 // If the new VT cannot cover all of the remaining bits, then consider
234 // issuing a (or a pair of) unaligned and overlapping load / store.
235 bool Fast;
236 if (NumMemOps && AllowOverlap && NewVTSize < Size &&
237 allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
238 Fast)
239 VTSize = Size;
240 else {
241 VT = NewVT;
242 VTSize = NewVTSize;
243 }
244 }
245
246 if (++NumMemOps > Limit)
247 return false;
248
249 MemOps.push_back(VT);
250 Size -= VTSize;
251 }
252
253 return true;
153254 }
154255
155256 /// Soften the operands of a comparison. This code is shared among BR_CC,