llvm.org GIT mirror llvm / cf397e4
[LV] Move InterleaveGroup and InterleavedAccessInfo to VectorUtils.h (NFC) Move the 2 classes out of LoopVectorize.cpp to make it easier to re-use them for VPlan outside LoopVectorize.cpp Reviewers: Ayal, mssimpso, rengolin, dcaballe, mkuper, hsaito, hfinkel, xbolva00 Reviewed By: rengolin, xbolva00 Differential Revision: https://reviews.llvm.org/D49488 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@342027 91177308-0d34-0410-b5e6-96231b3b80d8 Florian Hahn 1 year, 1 month ago
5 changed file(s) with 691 addition(s) and 695 deletion(s). Raw diff Collapse all Expand all
1414 #define LLVM_ANALYSIS_VECTORUTILS_H
1515
1616 #include "llvm/ADT/MapVector.h"
17 #include "llvm/Analysis/LoopAccessAnalysis.h"
1718 #include "llvm/Analysis/TargetLibraryInfo.h"
1819 #include "llvm/IR/IRBuilder.h"
1920
175176 /// elements, it will be padded with undefs.
176177 Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef Vecs);
177178
179 /// The group of interleaved loads/stores sharing the same stride and
180 /// close to each other.
181 ///
182 /// Each member in this group has an index starting from 0, and the largest
183 /// index should be less than interleaved factor, which is equal to the absolute
184 /// value of the access's stride.
185 ///
186 /// E.g. An interleaved load group of factor 4:
187 /// for (unsigned i = 0; i < 1024; i+=4) {
188 /// a = A[i]; // Member of index 0
189 /// b = A[i+1]; // Member of index 1
190 /// d = A[i+3]; // Member of index 3
191 /// ...
192 /// }
193 ///
194 /// An interleaved store group of factor 4:
195 /// for (unsigned i = 0; i < 1024; i+=4) {
196 /// ...
197 /// A[i] = a; // Member of index 0
198 /// A[i+1] = b; // Member of index 1
199 /// A[i+2] = c; // Member of index 2
200 /// A[i+3] = d; // Member of index 3
201 /// }
202 ///
203 /// Note: the interleaved load group could have gaps (missing members), but
204 /// the interleaved store group doesn't allow gaps.
205 class InterleaveGroup {
206 public:
207 InterleaveGroup(Instruction *Instr, int Stride, unsigned Align)
208 : Align(Align), InsertPos(Instr) {
209 assert(Align && "The alignment should be non-zero");
210
211 Factor = std::abs(Stride);
212 assert(Factor > 1 && "Invalid interleave factor");
213
214 Reverse = Stride < 0;
215 Members[0] = Instr;
216 }
217
218 bool isReverse() const { return Reverse; }
219 unsigned getFactor() const { return Factor; }
220 unsigned getAlignment() const { return Align; }
221 unsigned getNumMembers() const { return Members.size(); }
222
223 /// Try to insert a new member \p Instr with index \p Index and
224 /// alignment \p NewAlign. The index is related to the leader and it could be
225 /// negative if it is the new leader.
226 ///
227 /// \returns false if the instruction doesn't belong to the group.
228 bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) {
229 assert(NewAlign && "The new member's alignment should be non-zero");
230
231 int Key = Index + SmallestKey;
232
233 // Skip if there is already a member with the same index.
234 if (Members.find(Key) != Members.end())
235 return false;
236
237 if (Key > LargestKey) {
238 // The largest index is always less than the interleave factor.
239 if (Index >= static_cast(Factor))
240 return false;
241
242 LargestKey = Key;
243 } else if (Key < SmallestKey) {
244 // The largest index is always less than the interleave factor.
245 if (LargestKey - Key >= static_cast(Factor))
246 return false;
247
248 SmallestKey = Key;
249 }
250
251 // It's always safe to select the minimum alignment.
252 Align = std::min(Align, NewAlign);
253 Members[Key] = Instr;
254 return true;
255 }
256
257 /// Get the member with the given index \p Index
258 ///
259 /// \returns nullptr if contains no such member.
260 Instruction *getMember(unsigned Index) const {
261 int Key = SmallestKey + Index;
262 auto Member = Members.find(Key);
263 if (Member == Members.end())
264 return nullptr;
265
266 return Member->second;
267 }
268
269 /// Get the index for the given member. Unlike the key in the member
270 /// map, the index starts from 0.
271 unsigned getIndex(Instruction *Instr) const {
272 for (auto I : Members)
273 if (I.second == Instr)
274 return I.first - SmallestKey;
275
276 llvm_unreachable("InterleaveGroup contains no such member");
277 }
278
279 Instruction *getInsertPos() const { return InsertPos; }
280 void setInsertPos(Instruction *Inst) { InsertPos = Inst; }
281
282 /// Add metadata (e.g. alias info) from the instructions in this group to \p
283 /// NewInst.
284 ///
285 /// FIXME: this function currently does not add noalias metadata a'la
286 /// addNewMedata. To do that we need to compute the intersection of the
287 /// noalias info from all members.
288 void addMetadata(Instruction *NewInst) const {
289 SmallVector VL;
290 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
291 [](std::pair p) { return p.second; });
292 propagateMetadata(NewInst, VL);
293 }
294
295 private:
296 unsigned Factor; // Interleave Factor.
297 bool Reverse;
298 unsigned Align;
299 DenseMap Members;
300 int SmallestKey = 0;
301 int LargestKey = 0;
302
303 // To avoid breaking dependences, vectorized instructions of an interleave
304 // group should be inserted at either the first load or the last store in
305 // program order.
306 //
307 // E.g. %even = load i32 // Insert Position
308 // %add = add i32 %even // Use of %even
309 // %odd = load i32
310 //
311 // store i32 %even
312 // %odd = add i32 // Def of %odd
313 // store i32 %odd // Insert Position
314 Instruction *InsertPos;
315 };
316
317 /// Drive the analysis of interleaved memory accesses in the loop.
318 ///
319 /// Use this class to analyze interleaved accesses only when we can vectorize
320 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
321 /// on interleaved accesses is unsafe.
322 ///
323 /// The analysis collects interleave groups and records the relationships
324 /// between the member and the group in a map.
325 class InterleavedAccessInfo {
326 public:
327 InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
328 DominatorTree *DT, LoopInfo *LI,
329 const LoopAccessInfo *LAI)
330 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
331
332 ~InterleavedAccessInfo() {
333 SmallPtrSet DelSet;
334 // Avoid releasing a pointer twice.
335 for (auto &I : InterleaveGroupMap)
336 DelSet.insert(I.second);
337 for (auto *Ptr : DelSet)
338 delete Ptr;
339 }
340
341 /// Analyze the interleaved accesses and collect them in interleave
342 /// groups. Substitute symbolic strides using \p Strides.
343 void analyzeInterleaving();
344
345 /// Check if \p Instr belongs to any interleave group.
346 bool isInterleaved(Instruction *Instr) const {
347 return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
348 }
349
350 /// Get the interleave group that \p Instr belongs to.
351 ///
352 /// \returns nullptr if doesn't have such group.
353 InterleaveGroup *getInterleaveGroup(Instruction *Instr) const {
354 auto Group = InterleaveGroupMap.find(Instr);
355 if (Group == InterleaveGroupMap.end())
356 return nullptr;
357 return Group->second;
358 }
359
360 /// Returns true if an interleaved group that may access memory
361 /// out-of-bounds requires a scalar epilogue iteration for correctness.
362 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
363
364 private:
365 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
366 /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
367 /// The interleaved access analysis can also add new predicates (for example
368 /// by versioning strides of pointers).
369 PredicatedScalarEvolution &PSE;
370
371 Loop *TheLoop;
372 DominatorTree *DT;
373 LoopInfo *LI;
374 const LoopAccessInfo *LAI;
375
376 /// True if the loop may contain non-reversed interleaved groups with
377 /// out-of-bounds accesses. We ensure we don't speculatively access memory
378 /// out-of-bounds by executing at least one scalar epilogue iteration.
379 bool RequiresScalarEpilogue = false;
380
381 /// Holds the relationships between the members and the interleave group.
382 DenseMap InterleaveGroupMap;
383
384 /// Holds dependences among the memory accesses in the loop. It maps a source
385 /// access to a set of dependent sink accesses.
386 DenseMap> Dependences;
387
388 /// The descriptor for a strided memory access.
389 struct StrideDescriptor {
390 StrideDescriptor() = default;
391 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
392 unsigned Align)
393 : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
394
395 // The access's stride. It is negative for a reverse access.
396 int64_t Stride = 0;
397
398 // The scalar expression of this access.
399 const SCEV *Scev = nullptr;
400
401 // The size of the memory object.
402 uint64_t Size = 0;
403
404 // The alignment of this access.
405 unsigned Align = 0;
406 };
407
408 /// A type for holding instructions and their stride descriptors.
409 using StrideEntry = std::pair;
410
411 /// Create a new interleave group with the given instruction \p Instr,
412 /// stride \p Stride and alignment \p Align.
413 ///
414 /// \returns the newly created interleave group.
415 InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride,
416 unsigned Align) {
417 assert(!isInterleaved(Instr) && "Already in an interleaved access group");
418 InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align);
419 return InterleaveGroupMap[Instr];
420 }
421
422 /// Release the group and remove all the relationships.
423 void releaseGroup(InterleaveGroup *Group) {
424 for (unsigned i = 0; i < Group->getFactor(); i++)
425 if (Instruction *Member = Group->getMember(i))
426 InterleaveGroupMap.erase(Member);
427
428 delete Group;
429 }
430
431 /// Collect all the accesses with a constant stride in program order.
432 void collectConstStrideAccesses(
433 MapVector &AccessStrideInfo,
434 const ValueToValueMap &Strides);
435
436 /// Returns true if \p Stride is allowed in an interleaved group.
437 static bool isStrided(int Stride);
438
439 /// Returns true if \p BB is a predicated block.
440 bool isPredicated(BasicBlock *BB) const {
441 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
442 }
443
444 /// Returns true if LoopAccessInfo can be used for dependence queries.
445 bool areDependencesValid() const {
446 return LAI && LAI->getDepChecker().getDependences();
447 }
448
449 /// Returns true if memory accesses \p A and \p B can be reordered, if
450 /// necessary, when constructing interleaved groups.
451 ///
452 /// \p A must precede \p B in program order. We return false if reordering is
453 /// not necessary or is prevented because \p A and \p B may be dependent.
454 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
455 StrideEntry *B) const {
456 // Code motion for interleaved accesses can potentially hoist strided loads
457 // and sink strided stores. The code below checks the legality of the
458 // following two conditions:
459 //
460 // 1. Potentially moving a strided load (B) before any store (A) that
461 // precedes B, or
462 //
463 // 2. Potentially moving a strided store (A) after any load or store (B)
464 // that A precedes.
465 //
466 // It's legal to reorder A and B if we know there isn't a dependence from A
467 // to B. Note that this determination is conservative since some
468 // dependences could potentially be reordered safely.
469
470 // A is potentially the source of a dependence.
471 auto *Src = A->first;
472 auto SrcDes = A->second;
473
474 // B is potentially the sink of a dependence.
475 auto *Sink = B->first;
476 auto SinkDes = B->second;
477
478 // Code motion for interleaved accesses can't violate WAR dependences.
479 // Thus, reordering is legal if the source isn't a write.
480 if (!Src->mayWriteToMemory())
481 return true;
482
483 // At least one of the accesses must be strided.
484 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
485 return true;
486
487 // If dependence information is not available from LoopAccessInfo,
488 // conservatively assume the instructions can't be reordered.
489 if (!areDependencesValid())
490 return false;
491
492 // If we know there is a dependence from source to sink, assume the
493 // instructions can't be reordered. Otherwise, reordering is legal.
494 return Dependences.find(Src) == Dependences.end() ||
495 !Dependences.lookup(Src).count(Sink);
496 }
497
498 /// Collect the dependences from LoopAccessInfo.
499 ///
500 /// We process the dependences once during the interleaved access analysis to
501 /// enable constant-time dependence queries.
502 void collectDependences() {
503 if (!areDependencesValid())
504 return;
505 auto *Deps = LAI->getDepChecker().getDependences();
506 for (auto Dep : *Deps)
507 Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
508 }
509 };
510
178511 } // llvm namespace
179512
180513 #endif
53305330 return nullptr;
53315331 }
53325332
5333 /// A helper function that returns the alignment of load or store instruction.
5334 inline unsigned getLoadStoreAlignment(Value *I) {
5335 assert((isa(I) || isa(I)) &&
5336 "Expected Load or Store instruction");
5337 if (auto *LI = dyn_cast(I))
5338 return LI->getAlignment();
5339 return cast(I)->getAlignment();
5340 }
5341
5342 /// A helper function that returns the address space of the pointer operand of
5343 /// load or store instruction.
5344 inline unsigned getLoadStoreAddressSpace(Value *I) {
5345 assert((isa(I) || isa(I)) &&
5346 "Expected Load or Store instruction");
5347 if (auto *LI = dyn_cast(I))
5348 return LI->getPointerAddressSpace();
5349 return cast(I)->getPointerAddressSpace();
5350 }
5351
53335352 } // end namespace llvm
53345353
53355354 #endif // LLVM_IR_INSTRUCTIONS_H
1414 #include "llvm/ADT/EquivalenceClasses.h"
1515 #include "llvm/Analysis/DemandedBits.h"
1616 #include "llvm/Analysis/LoopInfo.h"
17 #include "llvm/Analysis/LoopIterator.h"
1718 #include "llvm/Analysis/ScalarEvolution.h"
1819 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
1920 #include "llvm/Analysis/TargetTransformInfo.h"
2425 #include "llvm/IR/PatternMatch.h"
2526 #include "llvm/IR/Value.h"
2627
28 #define DEBUG_TYPE "vectorutils"
29
2730 using namespace llvm;
2831 using namespace llvm::PatternMatch;
32
33 /// Maximum factor for an interleaved memory access.
34 static cl::opt MaxInterleaveGroupFactor(
35 "max-interleave-group-factor", cl::Hidden,
36 cl::desc("Maximum factor for an interleaved access group (default = 8)"),
37 cl::init(8));
2938
3039 /// Identify if the intrinsic is trivially vectorizable.
3140 /// This method returns true if the intrinsic's argument types are all
574583
575584 return ResList[0];
576585 }
586
587 bool InterleavedAccessInfo::isStrided(int Stride) {
588 unsigned Factor = std::abs(Stride);
589 return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
590 }
591
592 void InterleavedAccessInfo::collectConstStrideAccesses(
593 MapVector &AccessStrideInfo,
594 const ValueToValueMap &Strides) {
595 auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
596
597 // Since it's desired that the load/store instructions be maintained in
598 // "program order" for the interleaved access analysis, we have to visit the
599 // blocks in the loop in reverse postorder (i.e., in a topological order).
600 // Such an ordering will ensure that any load/store that may be executed
601 // before a second load/store will precede the second load/store in
602 // AccessStrideInfo.
603 LoopBlocksDFS DFS(TheLoop);
604 DFS.perform(LI);
605 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
606 for (auto &I : *BB) {
607 auto *LI = dyn_cast(&I);
608 auto *SI = dyn_cast(&I);
609 if (!LI && !SI)
610 continue;
611
612 Value *Ptr = getLoadStorePointerOperand(&I);
613 // We don't check wrapping here because we don't know yet if Ptr will be
614 // part of a full group or a group with gaps. Checking wrapping for all
615 // pointers (even those that end up in groups with no gaps) will be overly
616 // conservative. For full groups, wrapping should be ok since if we would
617 // wrap around the address space we would do a memory access at nullptr
618 // even without the transformation. The wrapping checks are therefore
619 // deferred until after we've formed the interleaved groups.
620 int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
621 /*Assume=*/true, /*ShouldCheckWrap=*/false);
622
623 const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
624 PointerType *PtrTy = dyn_cast(Ptr->getType());
625 uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
626
627 // An alignment of 0 means target ABI alignment.
628 unsigned Align = getLoadStoreAlignment(&I);
629 if (!Align)
630 Align = DL.getABITypeAlignment(PtrTy->getElementType());
631
632 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
633 }
634 }
635
636 // Analyze interleaved accesses and collect them into interleaved load and
637 // store groups.
638 //
639 // When generating code for an interleaved load group, we effectively hoist all
640 // loads in the group to the location of the first load in program order. When
641 // generating code for an interleaved store group, we sink all stores to the
642 // location of the last store. This code motion can change the order of load
643 // and store instructions and may break dependences.
644 //
645 // The code generation strategy mentioned above ensures that we won't violate
646 // any write-after-read (WAR) dependences.
647 //
648 // E.g., for the WAR dependence: a = A[i]; // (1)
649 // A[i] = b; // (2)
650 //
651 // The store group of (2) is always inserted at or below (2), and the load
652 // group of (1) is always inserted at or above (1). Thus, the instructions will
653 // never be reordered. All other dependences are checked to ensure the
654 // correctness of the instruction reordering.
655 //
656 // The algorithm visits all memory accesses in the loop in bottom-up program
657 // order. Program order is established by traversing the blocks in the loop in
658 // reverse postorder when collecting the accesses.
659 //
660 // We visit the memory accesses in bottom-up order because it can simplify the
661 // construction of store groups in the presence of write-after-write (WAW)
662 // dependences.
663 //
664 // E.g., for the WAW dependence: A[i] = a; // (1)
665 // A[i] = b; // (2)
666 // A[i + 1] = c; // (3)
667 //
668 // We will first create a store group with (3) and (2). (1) can't be added to
669 // this group because it and (2) are dependent. However, (1) can be grouped
670 // with other accesses that may precede it in program order. Note that a
671 // bottom-up order does not imply that WAW dependences should not be checked.
672 void InterleavedAccessInfo::analyzeInterleaving() {
673 LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
674 const ValueToValueMap &Strides = LAI->getSymbolicStrides();
675
676 // Holds all accesses with a constant stride.
677 MapVector AccessStrideInfo;
678 collectConstStrideAccesses(AccessStrideInfo, Strides);
679
680 if (AccessStrideInfo.empty())
681 return;
682
683 // Collect the dependences in the loop.
684 collectDependences();
685
686 // Holds all interleaved store groups temporarily.
687 SmallSetVector StoreGroups;
688 // Holds all interleaved load groups temporarily.
689 SmallSetVector LoadGroups;
690
691 // Search in bottom-up program order for pairs of accesses (A and B) that can
692 // form interleaved load or store groups. In the algorithm below, access A
693 // precedes access B in program order. We initialize a group for B in the
694 // outer loop of the algorithm, and then in the inner loop, we attempt to
695 // insert each A into B's group if:
696 //
697 // 1. A and B have the same stride,
698 // 2. A and B have the same memory object size, and
699 // 3. A belongs in B's group according to its distance from B.
700 //
701 // Special care is taken to ensure group formation will not break any
702 // dependences.
703 for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
704 BI != E; ++BI) {
705 Instruction *B = BI->first;
706 StrideDescriptor DesB = BI->second;
707
708 // Initialize a group for B if it has an allowable stride. Even if we don't
709 // create a group for B, we continue with the bottom-up algorithm to ensure
710 // we don't break any of B's dependences.
711 InterleaveGroup *Group = nullptr;
712 if (isStrided(DesB.Stride)) {
713 Group = getInterleaveGroup(B);
714 if (!Group) {
715 LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
716 << '\n');
717 Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
718 }
719 if (B->mayWriteToMemory())
720 StoreGroups.insert(Group);
721 else
722 LoadGroups.insert(Group);
723 }
724
725 for (auto AI = std::next(BI); AI != E; ++AI) {
726 Instruction *A = AI->first;
727 StrideDescriptor DesA = AI->second;
728
729 // Our code motion strategy implies that we can't have dependences
730 // between accesses in an interleaved group and other accesses located
731 // between the first and last member of the group. Note that this also
732 // means that a group can't have more than one member at a given offset.
733 // The accesses in a group can have dependences with other accesses, but
734 // we must ensure we don't extend the boundaries of the group such that
735 // we encompass those dependent accesses.
736 //
737 // For example, assume we have the sequence of accesses shown below in a
738 // stride-2 loop:
739 //
740 // (1, 2) is a group | A[i] = a; // (1)
741 // | A[i-1] = b; // (2) |
742 // A[i-3] = c; // (3)
743 // A[i] = d; // (4) | (2, 4) is not a group
744 //
745 // Because accesses (2) and (3) are dependent, we can group (2) with (1)
746 // but not with (4). If we did, the dependent access (3) would be within
747 // the boundaries of the (2, 4) group.
748 if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
749 // If a dependence exists and A is already in a group, we know that A
750 // must be a store since A precedes B and WAR dependences are allowed.
751 // Thus, A would be sunk below B. We release A's group to prevent this
752 // illegal code motion. A will then be free to form another group with
753 // instructions that precede it.
754 if (isInterleaved(A)) {
755 InterleaveGroup *StoreGroup = getInterleaveGroup(A);
756 StoreGroups.remove(StoreGroup);
757 releaseGroup(StoreGroup);
758 }
759
760 // If a dependence exists and A is not already in a group (or it was
761 // and we just released it), B might be hoisted above A (if B is a
762 // load) or another store might be sunk below A (if B is a store). In
763 // either case, we can't add additional instructions to B's group. B
764 // will only form a group with instructions that it precedes.
765 break;
766 }
767
768 // At this point, we've checked for illegal code motion. If either A or B
769 // isn't strided, there's nothing left to do.
770 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
771 continue;
772
773 // Ignore A if it's already in a group or isn't the same kind of memory
774 // operation as B.
775 // Note that mayReadFromMemory() isn't mutually exclusive to
776 // mayWriteToMemory in the case of atomic loads. We shouldn't see those
777 // here, canVectorizeMemory() should have returned false - except for the
778 // case we asked for optimization remarks.
779 if (isInterleaved(A) ||
780 (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
781 (A->mayWriteToMemory() != B->mayWriteToMemory()))
782 continue;
783
784 // Check rules 1 and 2. Ignore A if its stride or size is different from
785 // that of B.
786 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
787 continue;
788
789 // Ignore A if the memory object of A and B don't belong to the same
790 // address space
791 if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))
792 continue;
793
794 // Calculate the distance from A to B.
795 const SCEVConstant *DistToB = dyn_cast(
796 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
797 if (!DistToB)
798 continue;
799 int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
800
801 // Check rule 3. Ignore A if its distance to B is not a multiple of the
802 // size.
803 if (DistanceToB % static_cast(DesB.Size))
804 continue;
805
806 // Ignore A if either A or B is in a predicated block. Although we
807 // currently prevent group formation for predicated accesses, we may be
808 // able to relax this limitation in the future once we handle more
809 // complicated blocks.
810 if (isPredicated(A->getParent()) || isPredicated(B->getParent()))
811 continue;
812
813 // The index of A is the index of B plus A's distance to B in multiples
814 // of the size.
815 int IndexA =
816 Group->getIndex(B) + DistanceToB / static_cast(DesB.Size);
817
818 // Try to insert A into B's group.
819 if (Group->insertMember(A, IndexA, DesA.Align)) {
820 LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
821 << " into the interleave group with" << *B
822 << '\n');
823 InterleaveGroupMap[A] = Group;
824
825 // Set the first load in program order as the insert position.
826 if (A->mayReadFromMemory())
827 Group->setInsertPos(A);
828 }
829 } // Iteration over A accesses.
830 } // Iteration over B accesses.
831
832 // Remove interleaved store groups with gaps.
833 for (InterleaveGroup *Group : StoreGroups)
834 if (Group->getNumMembers() != Group->getFactor()) {
835 LLVM_DEBUG(
836 dbgs() << "LV: Invalidate candidate interleaved store group due "
837 "to gaps.\n");
838 releaseGroup(Group);
839 }
840 // Remove interleaved groups with gaps (currently only loads) whose memory
841 // accesses may wrap around. We have to revisit the getPtrStride analysis,
842 // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
843 // not check wrapping (see documentation there).
844 // FORNOW we use Assume=false;
845 // TODO: Change to Assume=true but making sure we don't exceed the threshold
846 // of runtime SCEV assumptions checks (thereby potentially failing to
847 // vectorize altogether).
848 // Additional optional optimizations:
849 // TODO: If we are peeling the loop and we know that the first pointer doesn't
850 // wrap then we can deduce that all pointers in the group don't wrap.
851 // This means that we can forcefully peel the loop in order to only have to
852 // check the first pointer for no-wrap. When we'll change to use Assume=true
853 // we'll only need at most one runtime check per interleaved group.
854 for (InterleaveGroup *Group : LoadGroups) {
855 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
856 // load would wrap around the address space we would do a memory access at
857 // nullptr even without the transformation.
858 if (Group->getNumMembers() == Group->getFactor())
859 continue;
860
861 // Case 2: If first and last members of the group don't wrap this implies
862 // that all the pointers in the group don't wrap.
863 // So we check only group member 0 (which is always guaranteed to exist),
864 // and group member Factor - 1; If the latter doesn't exist we rely on
865 // peeling (if it is a non-reveresed accsess -- see Case 3).
866 Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
867 if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
868 /*ShouldCheckWrap=*/true)) {
869 LLVM_DEBUG(
870 dbgs() << "LV: Invalidate candidate interleaved group due to "
871 "first group member potentially pointer-wrapping.\n");
872 releaseGroup(Group);
873 continue;
874 }
875 Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
876 if (LastMember) {
877 Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
878 if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
879 /*ShouldCheckWrap=*/true)) {
880 LLVM_DEBUG(
881 dbgs() << "LV: Invalidate candidate interleaved group due to "
882 "last group member potentially pointer-wrapping.\n");
883 releaseGroup(Group);
884 }
885 } else {
886 // Case 3: A non-reversed interleaved load group with gaps: We need
887 // to execute at least one scalar epilogue iteration. This will ensure
888 // we don't speculatively access memory out-of-bounds. We only need
889 // to look for a member at index factor - 1, since every group must have
890 // a member at index zero.
891 if (Group->isReverse()) {
892 LLVM_DEBUG(
893 dbgs() << "LV: Invalidate candidate interleaved group due to "
894 "a reverse access with gaps.\n");
895 releaseGroup(Group);
896 continue;
897 }
898 LLVM_DEBUG(
899 dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
900 RequiresScalarEpilogue = true;
901 }
902 }
903 }
170170 "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
171171 cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
172172
173 /// Maximum factor for an interleaved memory access.
174 static cl::opt MaxInterleaveGroupFactor(
175 "max-interleave-group-factor", cl::Hidden,
176 cl::desc("Maximum factor for an interleaved access group (default = 8)"),
177 cl::init(8));
178
179173 /// We don't interleave loops with a known constant trip count below this
180174 /// number.
181175 static const unsigned TinyTripCountInterleaveThreshold = 128;
264258 return VectorType::get(Scalar, VF);
265259 }
266260
267 // FIXME: The following helper functions have multiple implementations
268 // in the project. They can be effectively organized in a common Load/Store
269 // utilities unit.
270
271261 /// A helper function that returns the type of loaded or stored value.
272262 static Type *getMemInstValueType(Value *I) {
273263 assert((isa(I) || isa(I)) &&
275265 if (auto *LI = dyn_cast(I))
276266 return LI->getType();
277267 return cast(I)->getValueOperand()->getType();
278 }
279
280 /// A helper function that returns the alignment of load or store instruction.
281 static unsigned getMemInstAlignment(Value *I) {
282 assert((isa(I) || isa(I)) &&
283 "Expected Load or Store instruction");
284 if (auto *LI = dyn_cast(I))
285 return LI->getAlignment();
286 return cast(I)->getAlignment();
287 }
288
289 /// A helper function that returns the address space of the pointer operand of
290 /// load or store instruction.
291 static unsigned getMemInstAddressSpace(Value *I) {
292 assert((isa(I) || isa(I)) &&
293 "Expected Load or Store instruction");
294 if (auto *LI = dyn_cast(I))
295 return LI->getPointerAddressSpace();
296 return cast(I)->getPointerAddressSpace();
297268 }
298269
299270 /// A helper function that returns true if the given type is irregular. The
807778 addMetadata(I, From);
808779 }
809780 }
810
811 namespace llvm {
812
813 /// The group of interleaved loads/stores sharing the same stride and
814 /// close to each other.
815 ///
816 /// Each member in this group has an index starting from 0, and the largest
817 /// index should be less than interleaved factor, which is equal to the absolute
818 /// value of the access's stride.
819 ///
820 /// E.g. An interleaved load group of factor 4:
821 /// for (unsigned i = 0; i < 1024; i+=4) {
822 /// a = A[i]; // Member of index 0
823 /// b = A[i+1]; // Member of index 1
824 /// d = A[i+3]; // Member of index 3
825 /// ...
826 /// }
827 ///
828 /// An interleaved store group of factor 4:
829 /// for (unsigned i = 0; i < 1024; i+=4) {
830 /// ...
831 /// A[i] = a; // Member of index 0
832 /// A[i+1] = b; // Member of index 1
833 /// A[i+2] = c; // Member of index 2
834 /// A[i+3] = d; // Member of index 3
835 /// }
836 ///
837 /// Note: the interleaved load group could have gaps (missing members), but
838 /// the interleaved store group doesn't allow gaps.
839 class InterleaveGroup {
840 public:
841 InterleaveGroup(Instruction *Instr, int Stride, unsigned Align)
842 : Align(Align), InsertPos(Instr) {
843 assert(Align && "The alignment should be non-zero");
844
845 Factor = std::abs(Stride);
846 assert(Factor > 1 && "Invalid interleave factor");
847
848 Reverse = Stride < 0;
849 Members[0] = Instr;
850 }
851
852 bool isReverse() const { return Reverse; }
853 unsigned getFactor() const { return Factor; }
854 unsigned getAlignment() const { return Align; }
855 unsigned getNumMembers() const { return Members.size(); }
856
857 /// Try to insert a new member \p Instr with index \p Index and
858 /// alignment \p NewAlign. The index is related to the leader and it could be
859 /// negative if it is the new leader.
860 ///
861 /// \returns false if the instruction doesn't belong to the group.
862 bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) {
863 assert(NewAlign && "The new member's alignment should be non-zero");
864
865 int Key = Index + SmallestKey;
866
867 // Skip if there is already a member with the same index.
868 if (Members.find(Key) != Members.end())
869 return false;
870
871 if (Key > LargestKey) {
872 // The largest index is always less than the interleave factor.
873 if (Index >= static_cast(Factor))
874 return false;
875
876 LargestKey = Key;
877 } else if (Key < SmallestKey) {
878 // The largest index is always less than the interleave factor.
879 if (LargestKey - Key >= static_cast(Factor))
880 return false;
881
882 SmallestKey = Key;
883 }
884
885 // It's always safe to select the minimum alignment.
886 Align = std::min(Align, NewAlign);
887 Members[Key] = Instr;
888 return true;
889 }
890
891 /// Get the member with the given index \p Index
892 ///
893 /// \returns nullptr if contains no such member.
894 Instruction *getMember(unsigned Index) const {
895 int Key = SmallestKey + Index;
896 auto Member = Members.find(Key);
897 if (Member == Members.end())
898 return nullptr;
899
900 return Member->second;
901 }
902
903 /// Get the index for the given member. Unlike the key in the member
904 /// map, the index starts from 0.
905 unsigned getIndex(Instruction *Instr) const {
906 for (auto I : Members)
907 if (I.second == Instr)
908 return I.first - SmallestKey;
909
910 llvm_unreachable("InterleaveGroup contains no such member");
911 }
912
913 Instruction *getInsertPos() const { return InsertPos; }
914 void setInsertPos(Instruction *Inst) { InsertPos = Inst; }
915
916 /// Add metadata (e.g. alias info) from the instructions in this group to \p
917 /// NewInst.
918 ///
919 /// FIXME: this function currently does not add noalias metadata a'la
920 /// addNewMedata. To do that we need to compute the intersection of the
921 /// noalias info from all members.
922 void addMetadata(Instruction *NewInst) const {
923 SmallVector VL;
924 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
925 [](std::pair p) { return p.second; });
926 propagateMetadata(NewInst, VL);
927 }
928
929 private:
930 unsigned Factor; // Interleave Factor.
931 bool Reverse;
932 unsigned Align;
933 DenseMap Members;
934 int SmallestKey = 0;
935 int LargestKey = 0;
936
937 // To avoid breaking dependences, vectorized instructions of an interleave
938 // group should be inserted at either the first load or the last store in
939 // program order.
940 //
941 // E.g. %even = load i32 // Insert Position
942 // %add = add i32 %even // Use of %even
943 // %odd = load i32
944 //
945 // store i32 %even
946 // %odd = add i32 // Def of %odd
947 // store i32 %odd // Insert Position
948 Instruction *InsertPos;
949 };
950 } // end namespace llvm
951
952 namespace {
953
954 /// Drive the analysis of interleaved memory accesses in the loop.
955 ///
956 /// Use this class to analyze interleaved accesses only when we can vectorize
957 /// a loop. Otherwise it's meaningless to do analysis as the vectorization
958 /// on interleaved accesses is unsafe.
959 ///
960 /// The analysis collects interleave groups and records the relationships
961 /// between the member and the group in a map.
962 class InterleavedAccessInfo {
963 public:
964 InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
965 DominatorTree *DT, LoopInfo *LI,
966 const LoopAccessInfo *LAI)
967 : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
968
969 ~InterleavedAccessInfo() {
970 SmallPtrSet DelSet;
971 // Avoid releasing a pointer twice.
972 for (auto &I : InterleaveGroupMap)
973 DelSet.insert(I.second);
974 for (auto *Ptr : DelSet)
975 delete Ptr;
976 }
977
978 /// Analyze the interleaved accesses and collect them in interleave
979 /// groups. Substitute symbolic strides using \p Strides.
980 void analyzeInterleaving();
981
982 /// Check if \p Instr belongs to any interleave group.
983 bool isInterleaved(Instruction *Instr) const {
984 return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
985 }
986
987 /// Get the interleave group that \p Instr belongs to.
988 ///
989 /// \returns nullptr if doesn't have such group.
990 InterleaveGroup *getInterleaveGroup(Instruction *Instr) const {
991 auto Group = InterleaveGroupMap.find(Instr);
992 if (Group == InterleaveGroupMap.end())
993 return nullptr;
994 return Group->second;
995 }
996
997 /// Returns true if an interleaved group that may access memory
998 /// out-of-bounds requires a scalar epilogue iteration for correctness.
999 bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }
1000
1001 private:
1002 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
1003 /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
1004 /// The interleaved access analysis can also add new predicates (for example
1005 /// by versioning strides of pointers).
1006 PredicatedScalarEvolution &PSE;
1007
1008 Loop *TheLoop;
1009 DominatorTree *DT;
1010 LoopInfo *LI;
1011 const LoopAccessInfo *LAI;
1012
1013 /// True if the loop may contain non-reversed interleaved groups with
1014 /// out-of-bounds accesses. We ensure we don't speculatively access memory
1015 /// out-of-bounds by executing at least one scalar epilogue iteration.
1016 bool RequiresScalarEpilogue = false;
1017
1018 /// Holds the relationships between the members and the interleave group.
1019 DenseMap InterleaveGroupMap;
1020
1021 /// Holds dependences among the memory accesses in the loop. It maps a source
1022 /// access to a set of dependent sink accesses.
1023 DenseMap> Dependences;
1024
1025 /// The descriptor for a strided memory access.
1026 struct StrideDescriptor {
1027 StrideDescriptor() = default;
1028 StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
1029 unsigned Align)
1030 : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
1031
1032 // The access's stride. It is negative for a reverse access.
1033 int64_t Stride = 0;
1034
1035 // The scalar expression of this access.
1036 const SCEV *Scev = nullptr;
1037
1038 // The size of the memory object.
1039 uint64_t Size = 0;
1040
1041 // The alignment of this access.
1042 unsigned Align = 0;
1043 };
1044
1045 /// A type for holding instructions and their stride descriptors.
1046 using StrideEntry = std::pair;
1047
1048 /// Create a new interleave group with the given instruction \p Instr,
1049 /// stride \p Stride and alignment \p Align.
1050 ///
1051 /// \returns the newly created interleave group.
1052 InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride,
1053 unsigned Align) {
1054 assert(!isInterleaved(Instr) && "Already in an interleaved access group");
1055 InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align);
1056 return InterleaveGroupMap[Instr];
1057 }
1058
1059 /// Release the group and remove all the relationships.
1060 void releaseGroup(InterleaveGroup *Group) {
1061 for (unsigned i = 0; i < Group->getFactor(); i++)
1062 if (Instruction *Member = Group->getMember(i))
1063 InterleaveGroupMap.erase(Member);
1064
1065 delete Group;
1066 }
1067
1068 /// Collect all the accesses with a constant stride in program order.
1069 void collectConstStrideAccesses(
1070 MapVector &AccessStrideInfo,
1071 const ValueToValueMap &Strides);
1072
1073 /// Returns true if \p Stride is allowed in an interleaved group.
1074 static bool isStrided(int Stride) {
1075 unsigned Factor = std::abs(Stride);
1076 return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
1077 }
1078
1079 /// Returns true if \p BB is a predicated block.
1080 bool isPredicated(BasicBlock *BB) const {
1081 return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1082 }
1083
1084 /// Returns true if LoopAccessInfo can be used for dependence queries.
1085 bool areDependencesValid() const {
1086 return LAI && LAI->getDepChecker().getDependences();
1087 }
1088
1089 /// Returns true if memory accesses \p A and \p B can be reordered, if
1090 /// necessary, when constructing interleaved groups.
1091 ///
1092 /// \p A must precede \p B in program order. We return false if reordering is
1093 /// not necessary or is prevented because \p A and \p B may be dependent.
1094 bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,
1095 StrideEntry *B) const {
1096 // Code motion for interleaved accesses can potentially hoist strided loads
1097 // and sink strided stores. The code below checks the legality of the
1098 // following two conditions:
1099 //
1100 // 1. Potentially moving a strided load (B) before any store (A) that
1101 // precedes B, or
1102 //
1103 // 2. Potentially moving a strided store (A) after any load or store (B)
1104 // that A precedes.
1105 //
1106 // It's legal to reorder A and B if we know there isn't a dependence from A
1107 // to B. Note that this determination is conservative since some
1108 // dependences could potentially be reordered safely.
1109
1110 // A is potentially the source of a dependence.
1111 auto *Src = A->first;
1112 auto SrcDes = A->second;
1113
1114 // B is potentially the sink of a dependence.
1115 auto *Sink = B->first;
1116 auto SinkDes = B->second;
1117
1118 // Code motion for interleaved accesses can't violate WAR dependences.
1119 // Thus, reordering is legal if the source isn't a write.
1120 if (!Src->mayWriteToMemory())
1121 return true;
1122
1123 // At least one of the accesses must be strided.
1124 if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))
1125 return true;
1126
1127 // If dependence information is not available from LoopAccessInfo,
1128 // conservatively assume the instructions can't be reordered.
1129 if (!areDependencesValid())
1130 return false;
1131
1132 // If we know there is a dependence from source to sink, assume the
1133 // instructions can't be reordered. Otherwise, reordering is legal.
1134 return Dependences.find(Src) == Dependences.end() ||
1135 !Dependences.lookup(Src).count(Sink);
1136 }
1137
1138 /// Collect the dependences from LoopAccessInfo.
1139 ///
1140 /// We process the dependences once during the interleaved access analysis to
1141 /// enable constant-time dependence queries.
1142 void collectDependences() {
1143 if (!areDependencesValid())
1144 return;
1145 auto *Deps = LAI->getDepChecker().getDependences();
1146 for (auto Dep : *Deps)
1147 Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI));
1148 }
1149 };
1150
1151 } // end anonymous namespace
1152781
1153782 static void emitMissedWarning(Function *F, Loop *L,
1154783 const LoopVectorizeHints &LH,
22871916 Type *ScalarTy = getMemInstValueType(Instr);
22881917 unsigned InterleaveFactor = Group->getFactor();
22891918 Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
2290 Type *PtrTy = VecTy->getPointerTo(getMemInstAddressSpace(Instr));
1919 Type *PtrTy = VecTy->getPointerTo(getLoadStoreAddressSpace(Instr));
22911920
22921921 // Prepare for the new pointers.
22931922 setDebugLocFromInst(Builder, Ptr);
24302059 Type *ScalarDataTy = getMemInstValueType(Instr);
24312060 Type *DataTy = VectorType::get(ScalarDataTy, VF);
24322061 Value *Ptr = getLoadStorePointerOperand(Instr);
2433 unsigned Alignment = getMemInstAlignment(Instr);
2062 unsigned Alignment = getLoadStoreAlignment(Instr);
24342063 // An alignment of 0 means target abi alignment. We need to use the scalar's
24352064 // target abi alignment in such a case.
24362065 const DataLayout &DL = Instr->getModule()->getDataLayout();
24372066 if (!Alignment)
24382067 Alignment = DL.getABITypeAlignment(ScalarDataTy);
2439 unsigned AddressSpace = getMemInstAddressSpace(Instr);
2068 unsigned AddressSpace = getLoadStoreAddressSpace(Instr);
24402069
24412070 // Determine if the pointer operand of the access is either consecutive or
24422071 // reverse consecutive.
46994328 Uniforms[VF].insert(Worklist.begin(), Worklist.end());
47004329 }
47014330
4702 void InterleavedAccessInfo::collectConstStrideAccesses(
4703 MapVector &AccessStrideInfo,
4704 const ValueToValueMap &Strides) {
4705 auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
4706
4707 // Since it's desired that the load/store instructions be maintained in
4708 // "program order" for the interleaved access analysis, we have to visit the
4709 // blocks in the loop in reverse postorder (i.e., in a topological order).
4710 // Such an ordering will ensure that any load/store that may be executed
4711 // before a second load/store will precede the second load/store in
4712 // AccessStrideInfo.
4713 LoopBlocksDFS DFS(TheLoop);
4714 DFS.perform(LI);
4715 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
4716 for (auto &I : *BB) {
4717 auto *LI = dyn_cast(&I);
4718 auto *SI = dyn_cast(&I);
4719 if (!LI && !SI)
4720 continue;
4721
4722 Value *Ptr = getLoadStorePointerOperand(&I);
4723 // We don't check wrapping here because we don't know yet if Ptr will be
4724 // part of a full group or a group with gaps. Checking wrapping for all
4725 // pointers (even those that end up in groups with no gaps) will be overly
4726 // conservative. For full groups, wrapping should be ok since if we would
4727 // wrap around the address space we would do a memory access at nullptr
4728 // even without the transformation. The wrapping checks are therefore
4729 // deferred until after we've formed the interleaved groups.
4730 int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
4731 /*Assume=*/true, /*ShouldCheckWrap=*/false);
4732
4733 const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
4734 PointerType *PtrTy = dyn_cast(Ptr->getType());
4735 uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
4736
4737 // An alignment of 0 means target ABI alignment.
4738 unsigned Align = getMemInstAlignment(&I);
4739 if (!Align)
4740 Align = DL.getABITypeAlignment(PtrTy->getElementType());
4741
4742 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
4743 }
4744 }
4745
4746 // Analyze interleaved accesses and collect them into interleaved load and
4747 // store groups.
4748 //
4749 // When generating code for an interleaved load group, we effectively hoist all
4750 // loads in the group to the location of the first load in program order. When
4751 // generating code for an interleaved store group, we sink all stores to the
4752 // location of the last store. This code motion can change the order of load
4753 // and store instructions and may break dependences.
4754 //
4755 // The code generation strategy mentioned above ensures that we won't violate
4756 // any write-after-read (WAR) dependences.
4757 //
4758 // E.g., for the WAR dependence: a = A[i]; // (1)
4759 // A[i] = b; // (2)
4760 //
4761 // The store group of (2) is always inserted at or below (2), and the load
4762 // group of (1) is always inserted at or above (1). Thus, the instructions will
4763 // never be reordered. All other dependences are checked to ensure the
4764 // correctness of the instruction reordering.
4765 //
4766 // The algorithm visits all memory accesses in the loop in bottom-up program
4767 // order. Program order is established by traversing the blocks in the loop in
4768 // reverse postorder when collecting the accesses.
4769 //
4770 // We visit the memory accesses in bottom-up order because it can simplify the
4771 // construction of store groups in the presence of write-after-write (WAW)
4772 // dependences.
4773 //
4774 // E.g., for the WAW dependence: A[i] = a; // (1)
4775 // A[i] = b; // (2)
4776 // A[i + 1] = c; // (3)
4777 //
4778 // We will first create a store group with (3) and (2). (1) can't be added to
4779 // this group because it and (2) are dependent. However, (1) can be grouped
4780 // with other accesses that may precede it in program order. Note that a
4781 // bottom-up order does not imply that WAW dependences should not be checked.
4782 void InterleavedAccessInfo::analyzeInterleaving() {
4783 LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
4784 const ValueToValueMap &Strides = LAI->getSymbolicStrides();
4785
4786 // Holds all accesses with a constant stride.
4787 MapVector AccessStrideInfo;
4788 collectConstStrideAccesses(AccessStrideInfo, Strides);
4789
4790 if (AccessStrideInfo.empty())
4791 return;
4792
4793 // Collect the dependences in the loop.
4794 collectDependences();
4795
4796 // Holds all interleaved store groups temporarily.
4797 SmallSetVector StoreGroups;
4798 // Holds all interleaved load groups temporarily.
4799 SmallSetVector LoadGroups;
4800
4801 // Search in bottom-up program order for pairs of accesses (A and B) that can
4802 // form interleaved load or store groups. In the algorithm below, access A
4803 // precedes access B in program order. We initialize a group for B in the
4804 // outer loop of the algorithm, and then in the inner loop, we attempt to
4805 // insert each A into B's group if:
4806 //
4807 // 1. A and B have the same stride,
4808 // 2. A and B have the same memory object size, and
4809 // 3. A belongs in B's group according to its distance from B.
4810 //
4811 // Special care is taken to ensure group formation will not break any
4812 // dependences.
4813 for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
4814 BI != E; ++BI) {
4815 Instruction *B = BI->first;
4816 StrideDescriptor DesB = BI->second;
4817
4818 // Initialize a group for B if it has an allowable stride. Even if we don't
4819 // create a group for B, we continue with the bottom-up algorithm to ensure
4820 // we don't break any of B's dependences.
4821 InterleaveGroup *Group = nullptr;
4822 if (isStrided(DesB.Stride)) {
4823 Group = getInterleaveGroup(B);
4824 if (!Group) {
4825 LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
4826 << '\n');
4827 Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
4828 }
4829 if (B->mayWriteToMemory())
4830 StoreGroups.insert(Group);
4831 else
4832 LoadGroups.insert(Group);
4833 }
4834
4835 for (auto AI = std::next(BI); AI != E; ++AI) {
4836 Instruction *A = AI->first;
4837 StrideDescriptor DesA = AI->second;
4838
4839 // Our code motion strategy implies that we can't have dependences
4840 // between accesses in an interleaved group and other accesses located
4841 // between the first and last member of the group. Note that this also
4842 // means that a group can't have more than one member at a given offset.
4843 // The accesses in a group can have dependences with other accesses, but
4844 // we must ensure we don't extend the boundaries of the group such that
4845 // we encompass those dependent accesses.
4846 //
4847 // For example, assume we have the sequence of accesses shown below in a
4848 // stride-2 loop:
4849 //
4850 // (1, 2) is a group | A[i] = a; // (1)
4851 // | A[i-1] = b; // (2) |
4852 // A[i-3] = c; // (3)
4853 // A[i] = d; // (4) | (2, 4) is not a group
4854 //
4855 // Because accesses (2) and (3) are dependent, we can group (2) with (1)
4856 // but not with (4). If we did, the dependent access (3) would be within
4857 // the boundaries of the (2, 4) group.
4858 if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
4859 // If a dependence exists and A is already in a group, we know that A
4860 // must be a store since A precedes B and WAR dependences are allowed.
4861 // Thus, A would be sunk below B. We release A's group to prevent this
4862 // illegal code motion. A will then be free to form another group with
4863 // instructions that precede it.
4864 if (isInterleaved(A)) {
4865 InterleaveGroup *StoreGroup = getInterleaveGroup(A);
4866 StoreGroups.remove(StoreGroup);
4867 releaseGroup(StoreGroup);
4868 }
4869
4870 // If a dependence exists and A is not already in a group (or it was
4871 // and we just released it), B might be hoisted above A (if B is a
4872 // load) or another store might be sunk below A (if B is a store). In
4873 // either case, we can't add additional instructions to B's group. B
4874 // will only form a group with instructions that it precedes.
4875 break;
4876 }
4877
4878 // At this point, we've checked for illegal code motion. If either A or B
4879 // isn't strided, there's nothing left to do.
4880 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
4881 continue;
4882
4883 // Ignore A if it's already in a group or isn't the same kind of memory
4884 // operation as B.
4885 // Note that mayReadFromMemory() isn't mutually exclusive to mayWriteToMemory
4886 // in the case of atomic loads. We shouldn't see those here, canVectorizeMemory()
4887 // should have returned false - except for the case we asked for optimization
4888 // remarks.
4889 if (isInterleaved(A) || (A->mayReadFromMemory() != B->mayReadFromMemory())
4890 || (A->mayWriteToMemory() != B->mayWriteToMemory()))
4891 continue;
4892
4893 // Check rules 1 and 2. Ignore A if its stride or size is different from
4894 // that of B.
4895 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
4896 continue;
4897
4898 // Ignore A if the memory object of A and B don't belong to the same
4899 // address space
4900 if (getMemInstAddressSpace(A) != getMemInstAddressSpace(B))
4901 continue;
4902
4903 // Calculate the distance from A to B.
4904 const SCEVConstant *DistToB = dyn_cast(
4905 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
4906 if (!DistToB)
4907 continue;
4908 int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
4909
4910 // Check rule 3. Ignore A if its distance to B is not a multiple of the
4911 // size.
4912 if (DistanceToB % static_cast(DesB.Size))
4913 continue;
4914
4915 // Ignore A if either A or B is in a predicated block. Although we
4916 // currently prevent group formation for predicated accesses, we may be
4917 // able to relax this limitation in the future once we handle more
4918 // complicated blocks.
4919 if (isPredicated(A->getParent()) || isPredicated(B->getParent()))
4920 continue;
4921
4922 // The index of A is the index of B plus A's distance to B in multiples
4923 // of the size.
4924 int IndexA =
4925 Group->getIndex(B) + DistanceToB / static_cast(DesB.Size);
4926
4927 // Try to insert A into B's group.
4928 if (Group->insertMember(A, IndexA, DesA.Align)) {
4929 LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
4930 << " into the interleave group with" << *B
4931 << '\n');
4932 InterleaveGroupMap[A] = Group;
4933
4934 // Set the first load in program order as the insert position.
4935 if (A->mayReadFromMemory())
4936 Group->setInsertPos(A);
4937 }
4938 } // Iteration over A accesses.
4939 } // Iteration over B accesses.
4940
4941 // Remove interleaved store groups with gaps.
4942 for (InterleaveGroup *Group : StoreGroups)
4943 if (Group->getNumMembers() != Group->getFactor()) {
4944 LLVM_DEBUG(
4945 dbgs() << "LV: Invalidate candidate interleaved store group due "
4946 "to gaps.\n");
4947 releaseGroup(Group);
4948 }
4949 // Remove interleaved groups with gaps (currently only loads) whose memory
4950 // accesses may wrap around. We have to revisit the getPtrStride analysis,
4951 // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
4952 // not check wrapping (see documentation there).
4953 // FORNOW we use Assume=false;
4954 // TODO: Change to Assume=true but making sure we don't exceed the threshold
4955 // of runtime SCEV assumptions checks (thereby potentially failing to
4956 // vectorize altogether).
4957 // Additional optional optimizations:
4958 // TODO: If we are peeling the loop and we know that the first pointer doesn't
4959 // wrap then we can deduce that all pointers in the group don't wrap.
4960 // This means that we can forcefully peel the loop in order to only have to
4961 // check the first pointer for no-wrap. When we'll change to use Assume=true
4962 // we'll only need at most one runtime check per interleaved group.
4963 for (InterleaveGroup *Group : LoadGroups) {
4964 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
4965 // load would wrap around the address space we would do a memory access at
4966 // nullptr even without the transformation.
4967 if (Group->getNumMembers() == Group->getFactor())
4968 continue;
4969
4970 // Case 2: If first and last members of the group don't wrap this implies
4971 // that all the pointers in the group don't wrap.
4972 // So we check only group member 0 (which is always guaranteed to exist),
4973 // and group member Factor - 1; If the latter doesn't exist we rely on
4974 // peeling (if it is a non-reveresed accsess -- see Case 3).
4975 Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
4976 if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
4977 /*ShouldCheckWrap=*/true)) {
4978 LLVM_DEBUG(
4979 dbgs() << "LV: Invalidate candidate interleaved group due to "
4980 "first group member potentially pointer-wrapping.\n");
4981 releaseGroup(Group);
4982 continue;
4983 }
4984 Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
4985 if (LastMember) {
4986 Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
4987 if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
4988 /*ShouldCheckWrap=*/true)) {
4989 LLVM_DEBUG(
4990 dbgs() << "LV: Invalidate candidate interleaved group due to "
4991 "last group member potentially pointer-wrapping.\n");
4992 releaseGroup(Group);
4993 }
4994 } else {
4995 // Case 3: A non-reversed interleaved load group with gaps: We need
4996 // to execute at least one scalar epilogue iteration. This will ensure
4997 // we don't speculatively access memory out-of-bounds. We only need
4998 // to look for a member at index factor - 1, since every group must have
4999 // a member at index zero.
5000 if (Group->isReverse()) {
5001 LLVM_DEBUG(
5002 dbgs() << "LV: Invalidate candidate interleaved group due to "
5003 "a reverse access with gaps.\n");
5004 releaseGroup(Group);
5005 continue;
5006 }
5007 LLVM_DEBUG(
5008 dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
5009 RequiresScalarEpilogue = true;
5010 }
5011 }
5012 }
5013
50144331 Optional LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
50154332 if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
50164333 // TODO: It may by useful to do since it's still likely to be dynamically
58125129 Type *ValTy = getMemInstValueType(I);
58135130 auto SE = PSE.getSE();
58145131
5815 unsigned Alignment = getMemInstAlignment(I);
5816 unsigned AS = getMemInstAddressSpace(I);
5132 unsigned Alignment = getLoadStoreAlignment(I);
5133 unsigned AS = getLoadStoreAddressSpace(I);
58175134 Value *Ptr = getLoadStorePointerOperand(I);
58185135 Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
58195136
58515168 unsigned VF) {
58525169 Type *ValTy = getMemInstValueType(I);
58535170 Type *VectorTy = ToVectorTy(ValTy, VF);
5854 unsigned Alignment = getMemInstAlignment(I);
5171 unsigned Alignment = getLoadStoreAlignment(I);
58555172 Value *Ptr = getLoadStorePointerOperand(I);
5856 unsigned AS = getMemInstAddressSpace(I);
5173 unsigned AS = getLoadStoreAddressSpace(I);
58575174 int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
58585175
58595176 assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
58875204 unsigned VF) {
58885205 Type *ValTy = getMemInstValueType(I);
58895206 Type *VectorTy = ToVectorTy(ValTy, VF);
5890 unsigned Alignment = getMemInstAlignment(I);
5207 unsigned Alignment = getLoadStoreAlignment(I);
58915208 Value *Ptr = getLoadStorePointerOperand(I);
58925209
58935210 return TTI.getAddressComputationCost(VectorTy) +
58995216 unsigned VF) {
59005217 Type *ValTy = getMemInstValueType(I);
59015218 Type *VectorTy = ToVectorTy(ValTy, VF);
5902 unsigned AS = getMemInstAddressSpace(I);
5219 unsigned AS = getLoadStoreAddressSpace(I);
59035220
59045221 auto Group = getInterleavedAccessGroup(I);
59055222 assert(Group && "Fail to get an interleaved access group.");
59335250 // moment.
59345251 if (VF == 1) {
59355252 Type *ValTy = getMemInstValueType(I);
5936 unsigned Alignment = getMemInstAlignment(I);
5937 unsigned AS = getMemInstAddressSpace(I);
5253 unsigned Alignment = getLoadStoreAlignment(I);
5254 unsigned AS = getLoadStoreAddressSpace(I);
59385255
59395256 return TTI.getAddressComputationCost(ValTy) +
59405257 TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
0 ; REQUIRES: asserts
11 ; RUN: opt -mtriple=s390x-unknown-linux -mcpu=z13 -loop-vectorize \
2 ; RUN: -force-vector-width=4 -debug-only=loop-vectorize \
2 ; RUN: -force-vector-width=4 -debug-only=loop-vectorize,vectorutils \
33 ; RUN: -disable-output < %s 2>&1 | FileCheck %s
44 ;
55 ; Check that the loop vectorizer performs memory interleaving with accurate