llvm.org GIT mirror llvm / 89419a6
[PM] Port LoopVectorize to the new PM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275000 91177308-0d34-0410-b5e6-96231b3b80d8 Sean Silva 4 years ago
5 changed file(s) with 479 addition(s) and 340 deletion(s). Raw diff Collapse all Expand all
0 //===---- LoopVectorize.h ---------------------------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
10 // and generates target-independent LLVM-IR.
11 // The vectorizer uses the TargetTransformInfo analysis to estimate the costs
12 // of instructions in order to estimate the profitability of vectorization.
13 //
14 // The loop vectorizer combines consecutive loop iterations into a single
15 // 'wide' iteration. After this transformation the index is incremented
16 // by the SIMD vector width, and not by one.
17 //
18 // This pass has three parts:
19 // 1. The main loop pass that drives the different parts.
20 // 2. LoopVectorizationLegality - A unit that checks for the legality
21 // of the vectorization.
22 // 3. InnerLoopVectorizer - A unit that performs the actual
23 // widening of instructions.
24 // 4. LoopVectorizationCostModel - A unit that checks for the profitability
25 // of vectorization. It decides on the optimal vector width, which
26 // can be one, if vectorization is not profitable.
27 //
28 //===----------------------------------------------------------------------===//
29 //
30 // The reduction-variable vectorization is based on the paper:
31 // D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
32 //
33 // Variable uniformity checks are inspired by:
34 // Karrenberg, R. and Hack, S. Whole Function Vectorization.
35 //
36 // The interleaved access vectorization is based on the paper:
37 // Dorit Nuzman, Ira Rosen and Ayal Zaks. Auto-Vectorization of Interleaved
38 // Data for SIMD
39 //
40 // Other ideas/concepts are from:
41 // A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
42 //
43 // S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of
44 // Vectorizing Compilers.
45 //
46 //===----------------------------------------------------------------------===//
47
48 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
49 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
50
51 #include "llvm/ADT/MapVector.h"
52 #include "llvm/Analysis/AliasAnalysis.h"
53 #include "llvm/Analysis/AssumptionCache.h"
54 #include "llvm/Analysis/BasicAliasAnalysis.h"
55 #include "llvm/Analysis/BlockFrequencyInfo.h"
56 #include "llvm/Analysis/DemandedBits.h"
57 #include "llvm/Analysis/LoopAccessAnalysis.h"
58 #include "llvm/Analysis/LoopInfo.h"
59 #include "llvm/Analysis/LoopPassManager.h"
60 #include "llvm/Analysis/ScalarEvolution.h"
61 #include "llvm/Analysis/TargetTransformInfo.h"
62 #include "llvm/IR/Function.h"
63 #include "llvm/IR/PassManager.h"
64 #include
65
66 namespace llvm {
67
68 /// The LoopVectorize Pass.
69 struct LoopVectorizePass : public PassInfoMixin {
70 bool DisableUnrolling = false;
71 /// If true, consider all loops for vectorization.
72 /// If false, only loops that explicitly request vectorization are
73 /// considered.
74 bool AlwaysVectorize = true;
75
76 ScalarEvolution *SE;
77 LoopInfo *LI;
78 TargetTransformInfo *TTI;
79 DominatorTree *DT;
80 BlockFrequencyInfo *BFI;
81 TargetLibraryInfo *TLI;
82 DemandedBits *DB;
83 AliasAnalysis *AA;
84 AssumptionCache *AC;
85 std::function *GetLAA;
86
87 BlockFrequency ColdEntryFreq;
88
89 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
90
91 // Shim for old PM.
92 bool runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_,
93 TargetTransformInfo &TTI_, DominatorTree &DT_,
94 BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
95 DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
96 std::function &GetLAA_);
97
98 bool processLoop(Loop *L);
99 };
100 }
101
102 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H
105105 #include "llvm/Transforms/Utils/Mem2Reg.h"
106106 #include "llvm/Transforms/Utils/MemorySSA.h"
107107 #include "llvm/Transforms/Utils/SimplifyInstructions.h"
108 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
108109 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
109110
110111 #include
147147 FUNCTION_PASS("jump-threading", JumpThreadingPass())
148148 FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
149149 FUNCTION_PASS("lcssa", LCSSAPass())
150 FUNCTION_PASS("loop-vectorize", LoopVectorizePass())
150151 FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
151152 FUNCTION_PASS("print", AssumptionPrinterPass(dbgs()))
152153 FUNCTION_PASS("print", BlockFrequencyPrinterPass(dbgs()))
4545 //
4646 //===----------------------------------------------------------------------===//
4747
48 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
4849 #include "llvm/ADT/DenseMap.h"
4950 #include "llvm/ADT/Hashing.h"
5051 #include "llvm/ADT/MapVector.h"
5455 #include "llvm/ADT/SmallVector.h"
5556 #include "llvm/ADT/Statistic.h"
5657 #include "llvm/ADT/StringExtras.h"
57 #include "llvm/Analysis/AliasAnalysis.h"
58 #include "llvm/Analysis/AssumptionCache.h"
59 #include "llvm/Analysis/BasicAliasAnalysis.h"
60 #include "llvm/Analysis/BlockFrequencyInfo.h"
6158 #include "llvm/Analysis/CodeMetrics.h"
62 #include "llvm/Analysis/DemandedBits.h"
6359 #include "llvm/Analysis/GlobalsModRef.h"
64 #include "llvm/Analysis/LoopAccessAnalysis.h"
6560 #include "llvm/Analysis/LoopInfo.h"
6661 #include "llvm/Analysis/LoopIterator.h"
6762 #include "llvm/Analysis/LoopPass.h"
68 #include "llvm/Analysis/ScalarEvolution.h"
6963 #include "llvm/Analysis/ScalarEvolutionExpander.h"
7064 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
71 #include "llvm/Analysis/TargetTransformInfo.h"
7265 #include "llvm/Analysis/ValueTracking.h"
7366 #include "llvm/Analysis/VectorUtils.h"
7467 #include "llvm/IR/Constants.h"
10093 #include "llvm/Transforms/Utils/LoopVersioning.h"
10194 #include "llvm/Transforms/Vectorize.h"
10295 #include
103 #include
10496 #include
10597 #include
10698
13341326 /// induction variable and the different reduction variables.
13351327 class LoopVectorizationLegality {
13361328 public:
1337 LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE,
1338 DominatorTree *DT, TargetLibraryInfo *TLI,
1339 AliasAnalysis *AA, Function *F,
1340 const TargetTransformInfo *TTI,
1341 LoopAccessLegacyAnalysis *LAA, LoopInfo *LI,
1342 LoopVectorizationRequirements *R,
1343 LoopVectorizeHints *H)
1329 LoopVectorizationLegality(
1330 Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
1331 TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
1332 const TargetTransformInfo *TTI,
1333 std::function *GetLAA, LoopInfo *LI,
1334 LoopVectorizationRequirements *R, LoopVectorizeHints *H)
13441335 : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TheFunction(F),
1345 TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr),
1336 TTI(TTI), DT(DT), GetLAA(GetLAA), LAI(nullptr),
13461337 InterleaveInfo(PSE, L, DT, LI), Induction(nullptr),
13471338 WidestIndTy(nullptr), HasFunNoNaNAttr(false), Requirements(R),
13481339 Hints(H) {}
15351526 /// Dominator Tree.
15361527 DominatorTree *DT;
15371528 // LoopAccess analysis.
1538 LoopAccessLegacyAnalysis *LAA;
1529 std::function *GetLAA;
15391530 // And the loop-accesses info corresponding to this loop. This pointer is
15401531 // null until canVectorizeMemory sets it up.
15411532 const LoopAccessInfo *LAI;
17871778 static char ID;
17881779
17891780 explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true)
1790 : FunctionPass(ID), DisableUnrolling(NoUnrolling),
1791 AlwaysVectorize(AlwaysVectorize) {
1781 : FunctionPass(ID) {
1782 Impl.DisableUnrolling = NoUnrolling;
1783 Impl.AlwaysVectorize = AlwaysVectorize;
17921784 initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
17931785 }
17941786
1795 ScalarEvolution *SE;
1796 LoopInfo *LI;
1797 TargetTransformInfo *TTI;
1798 DominatorTree *DT;
1799 BlockFrequencyInfo *BFI;
1800 TargetLibraryInfo *TLI;
1801 DemandedBits *DB;
1802 AliasAnalysis *AA;
1803 AssumptionCache *AC;
1804 LoopAccessLegacyAnalysis *LAA;
1805 bool DisableUnrolling;
1806 bool AlwaysVectorize;
1807
1808 BlockFrequency ColdEntryFreq;
1787 LoopVectorizePass Impl;
18091788
18101789 bool runOnFunction(Function &F) override {
18111790 if (skipFunction(F))
18121791 return false;
18131792
1814 SE = &getAnalysis().getSE();
1815 LI = &getAnalysis().getLoopInfo();
1816 TTI = &getAnalysis().getTTI(F);
1817 DT = &getAnalysis().getDomTree();
1818 BFI = &getAnalysis().getBFI();
1793 auto *SE = &getAnalysis().getSE();
1794 auto *LI = &getAnalysis().getLoopInfo();
1795 auto *TTI = &getAnalysis().getTTI(F);
1796 auto *DT = &getAnalysis().getDomTree();
1797 auto *BFI = &getAnalysis().getBFI();
18191798 auto *TLIP = getAnalysisIfAvailable();
1820 TLI = TLIP ? &TLIP->getTLI() : nullptr;
1821 AA = &getAnalysis().getAAResults();
1822 AC = &getAnalysis().getAssumptionCache(F);
1823 LAA = &getAnalysis();
1824 DB = &getAnalysis().getDemandedBits();
1825
1826 // Compute some weights outside of the loop over the loops. Compute this
1827 // using a BranchProbability to re-use its scaling math.
1828 const BranchProbability ColdProb(1, 5); // 20%
1829 ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb;
1830
1831 // Don't attempt if
1832 // 1. the target claims to have no vector registers, and
1833 // 2. interleaving won't help ILP.
1834 //
1835 // The second condition is necessary because, even if the target has no
1836 // vector registers, loop vectorization may still enable scalar
1837 // interleaving.
1838 if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
1839 return false;
1840
1841 // Build up a worklist of inner-loops to vectorize. This is necessary as
1842 // the act of vectorizing or partially unrolling a loop creates new loops
1843 // and can invalidate iterators across the loops.
1844 SmallVector Worklist;
1845
1846 for (Loop *L : *LI)
1847 addInnerLoop(*L, Worklist);
1848
1849 LoopsAnalyzed += Worklist.size();
1850
1851 // Now walk the identified inner loops.
1852 bool Changed = false;
1853 while (!Worklist.empty())
1854 Changed |= processLoop(Worklist.pop_back_val());
1855
1856 // Process each loop nest in the function.
1857 return Changed;
1858 }
1859
1860 static void AddRuntimeUnrollDisableMetaData(Loop *L) {
1861 SmallVector MDs;
1862 // Reserve first location for self reference to the LoopID metadata node.
1863 MDs.push_back(nullptr);
1864 bool IsUnrollMetadata = false;
1865 MDNode *LoopID = L->getLoopID();
1866 if (LoopID) {
1867 // First find existing loop unrolling disable metadata.
1868 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
1869 MDNode *MD = dyn_cast(LoopID->getOperand(i));
1870 if (MD) {
1871 const MDString *S = dyn_cast(MD->getOperand(0));
1872 IsUnrollMetadata =
1873 S && S->getString().startswith("llvm.loop.unroll.disable");
1874 }
1875 MDs.push_back(LoopID->getOperand(i));
1876 }
1877 }
1878
1879 if (!IsUnrollMetadata) {
1880 // Add runtime unroll disable metadata.
1881 LLVMContext &Context = L->getHeader()->getContext();
1882 SmallVector DisableOperands;
1883 DisableOperands.push_back(
1884 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
1885 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
1886 MDs.push_back(DisableNode);
1887 MDNode *NewLoopID = MDNode::get(Context, MDs);
1888 // Set operand 0 to refer to the loop id itself.
1889 NewLoopID->replaceOperandWith(0, NewLoopID);
1890 L->setLoopID(NewLoopID);
1891 }
1892 }
1893
1894 bool processLoop(Loop *L) {
1895 assert(L->empty() && "Only process inner loops.");
1896
1897 #ifndef NDEBUG
1898 const std::string DebugLocStr = getDebugLocString(L);
1899 #endif /* NDEBUG */
1900
1901 DEBUG(dbgs() << "\nLV: Checking a loop in \""
1902 << L->getHeader()->getParent()->getName() << "\" from "
1903 << DebugLocStr << "\n");
1904
1905 LoopVectorizeHints Hints(L, DisableUnrolling);
1906
1907 DEBUG(dbgs() << "LV: Loop hints:"
1908 << " force="
1909 << (Hints.getForce() == LoopVectorizeHints::FK_Disabled
1910 ? "disabled"
1911 : (Hints.getForce() == LoopVectorizeHints::FK_Enabled
1912 ? "enabled"
1913 : "?"))
1914 << " width=" << Hints.getWidth()
1915 << " unroll=" << Hints.getInterleave() << "\n");
1916
1917 // Function containing loop
1918 Function *F = L->getHeader()->getParent();
1919
1920 // Looking at the diagnostic output is the only way to determine if a loop
1921 // was vectorized (other than looking at the IR or machine code), so it
1922 // is important to generate an optimization remark for each loop. Most of
1923 // these messages are generated by emitOptimizationRemarkAnalysis. Remarks
1924 // generated by emitOptimizationRemark and emitOptimizationRemarkMissed are
1925 // less verbose reporting vectorized loops and unvectorized loops that may
1926 // benefit from vectorization, respectively.
1927
1928 if (!Hints.allowVectorization(F, L, AlwaysVectorize)) {
1929 DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n");
1930 return false;
1931 }
1932
1933 // Check the loop for a trip count threshold:
1934 // do not vectorize loops with a tiny trip count.
1935 const unsigned TC = SE->getSmallConstantTripCount(L);
1936 if (TC > 0u && TC < TinyTripCountVectorThreshold) {
1937 DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
1938 << "This loop is not worth vectorizing.");
1939 if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
1940 DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
1941 else {
1942 DEBUG(dbgs() << "\n");
1943 emitAnalysisDiag(F, L, Hints, VectorizationReport()
1944 << "vectorization is not beneficial "
1945 "and is not explicitly forced");
1946 return false;
1947 }
1948 }
1949
1950 PredicatedScalarEvolution PSE(*SE, *L);
1951
1952 // Check if it is legal to vectorize the loop.
1953 LoopVectorizationRequirements Requirements;
1954 LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, LAA, LI,
1955 &Requirements, &Hints);
1956 if (!LVL.canVectorize()) {
1957 DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
1958 emitMissedWarning(F, L, Hints);
1959 return false;
1960 }
1961
1962 // Use the cost model.
1963 LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F,
1964 &Hints);
1965 CM.collectValuesToIgnore();
1966
1967 // Check the function attributes to find out if this function should be
1968 // optimized for size.
1969 bool OptForSize =
1970 Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
1971
1972 // Compute the weighted frequency of this loop being executed and see if it
1973 // is less than 20% of the function entry baseline frequency. Note that we
1974 // always have a canonical loop here because we think we *can* vectorize.
1975 // FIXME: This is hidden behind a flag due to pervasive problems with
1976 // exactly what block frequency models.
1977 if (LoopVectorizeWithBlockFrequency) {
1978 BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader());
1979 if (Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
1980 LoopEntryFreq < ColdEntryFreq)
1981 OptForSize = true;
1982 }
1983
1984 // Check the function attributes to see if implicit floats are allowed.
1985 // FIXME: This check doesn't seem possibly correct -- what if the loop is
1986 // an integer loop and the vector instructions selected are purely integer
1987 // vector instructions?
1988 if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
1989 DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
1990 "attribute is used.\n");
1991 emitAnalysisDiag(
1992 F, L, Hints,
1993 VectorizationReport()
1994 << "loop not vectorized due to NoImplicitFloat attribute");
1995 emitMissedWarning(F, L, Hints);
1996 return false;
1997 }
1998
1999 // Check if the target supports potentially unsafe FP vectorization.
2000 // FIXME: Add a check for the type of safety issue (denormal, signaling)
2001 // for the target we're vectorizing for, to make sure none of the
2002 // additional fp-math flags can help.
2003 if (Hints.isPotentiallyUnsafe() &&
2004 TTI->isFPVectorizationPotentiallyUnsafe()) {
2005 DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n");
2006 emitAnalysisDiag(F, L, Hints,
2007 VectorizationReport()
2008 << "loop not vectorized due to unsafe FP support.");
2009 emitMissedWarning(F, L, Hints);
2010 return false;
2011 }
2012
2013 // Select the optimal vectorization factor.
2014 const LoopVectorizationCostModel::VectorizationFactor VF =
2015 CM.selectVectorizationFactor(OptForSize);
2016
2017 // Select the interleave count.
2018 unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
2019
2020 // Get user interleave count.
2021 unsigned UserIC = Hints.getInterleave();
2022
2023 // Identify the diagnostic messages that should be produced.
2024 std::string VecDiagMsg, IntDiagMsg;
2025 bool VectorizeLoop = true, InterleaveLoop = true;
2026
2027 if (Requirements.doesNotMeet(F, L, Hints)) {
2028 DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
2029 "requirements.\n");
2030 emitMissedWarning(F, L, Hints);
2031 return false;
2032 }
2033
2034 if (VF.Width == 1) {
2035 DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
2036 VecDiagMsg =
2037 "the cost-model indicates that vectorization is not beneficial";
2038 VectorizeLoop = false;
2039 }
2040
2041 if (IC == 1 && UserIC <= 1) {
2042 // Tell the user interleaving is not beneficial.
2043 DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
2044 IntDiagMsg =
2045 "the cost-model indicates that interleaving is not beneficial";
2046 InterleaveLoop = false;
2047 if (UserIC == 1)
2048 IntDiagMsg +=
2049 " and is explicitly disabled or interleave count is set to 1";
2050 } else if (IC > 1 && UserIC == 1) {
2051 // Tell the user interleaving is beneficial, but it explicitly disabled.
2052 DEBUG(dbgs()
2053 << "LV: Interleaving is beneficial but is explicitly disabled.");
2054 IntDiagMsg = "the cost-model indicates that interleaving is beneficial "
2055 "but is explicitly disabled or interleave count is set to 1";
2056 InterleaveLoop = false;
2057 }
2058
2059 // Override IC if user provided an interleave count.
2060 IC = UserIC > 0 ? UserIC : IC;
2061
2062 // Emit diagnostic messages, if any.
2063 const char *VAPassName = Hints.vectorizeAnalysisPassName();
2064 if (!VectorizeLoop && !InterleaveLoop) {
2065 // Do not vectorize or interleaving the loop.
2066 emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
2067 L->getStartLoc(), VecDiagMsg);
2068 emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
2069 L->getStartLoc(), IntDiagMsg);
2070 return false;
2071 } else if (!VectorizeLoop && InterleaveLoop) {
2072 DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
2073 emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
2074 L->getStartLoc(), VecDiagMsg);
2075 } else if (VectorizeLoop && !InterleaveLoop) {
2076 DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
2077 << DebugLocStr << '\n');
2078 emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
2079 L->getStartLoc(), IntDiagMsg);
2080 } else if (VectorizeLoop && InterleaveLoop) {
2081 DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
2082 << DebugLocStr << '\n');
2083 DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
2084 }
2085
2086 if (!VectorizeLoop) {
2087 assert(IC > 1 && "interleave count should not be 1 or 0");
2088 // If we decided that it is not legal to vectorize the loop, then
2089 // interleave it.
2090 InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, IC);
2091 Unroller.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore);
2092
2093 emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
2094 Twine("interleaved loop (interleaved count: ") +
2095 Twine(IC) + ")");
2096 } else {
2097 // If we decided that it is *legal* to vectorize the loop, then do it.
2098 InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, VF.Width, IC);
2099 LB.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore);
2100 ++LoopsVectorized;
2101
2102 // Add metadata to disable runtime unrolling a scalar loop when there are
2103 // no runtime checks about strides and memory. A scalar loop that is
2104 // rarely used is not worth unrolling.
2105 if (!LB.areSafetyChecksAdded())
2106 AddRuntimeUnrollDisableMetaData(L);
2107
2108 // Report the vectorization decision.
2109 emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
2110 Twine("vectorized loop (vectorization width: ") +
2111 Twine(VF.Width) + ", interleaved count: " +
2112 Twine(IC) + ")");
2113 }
2114
2115 // Mark the loop as already vectorized to avoid vectorizing again.
2116 Hints.setAlreadyVectorized();
2117
2118 DEBUG(verifyFunction(*L->getHeader()->getParent()));
2119 return true;
1799 auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
1800 auto *AA = &getAnalysis().getAAResults();
1801 auto *AC = &getAnalysis().getAssumptionCache(F);
1802 auto *LAA = &getAnalysis();
1803 auto *DB = &getAnalysis().getDemandedBits();
1804
1805 std::function GetLAA =
1806 [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
1807
1808 return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
1809 GetLAA);
21201810 }
21211811
21221812 void getAnalysisUsage(AnalysisUsage &AU) const override {
51254815 }
51264816
51274817 bool LoopVectorizationLegality::canVectorizeMemory() {
5128 LAI = &LAA->getInfo(TheLoop);
4818 LAI = &(*GetLAA)(*TheLoop);
51294819 InterleaveInfo.setLAI(LAI);
51304820 auto &OptionalReport = LAI->getReport();
51314821 if (OptionalReport)
66626352 Constant *C = ConstantInt::get(ITy, StartIdx);
66636353 return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
66646354 }
6355
6356 static void AddRuntimeUnrollDisableMetaData(Loop *L) {
6357 SmallVector MDs;
6358 // Reserve first location for self reference to the LoopID metadata node.
6359 MDs.push_back(nullptr);
6360 bool IsUnrollMetadata = false;
6361 MDNode *LoopID = L->getLoopID();
6362 if (LoopID) {
6363 // First find existing loop unrolling disable metadata.
6364 for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
6365 MDNode *MD = dyn_cast(LoopID->getOperand(i));
6366 if (MD) {
6367 const MDString *S = dyn_cast(MD->getOperand(0));
6368 IsUnrollMetadata =
6369 S && S->getString().startswith("llvm.loop.unroll.disable");
6370 }
6371 MDs.push_back(LoopID->getOperand(i));
6372 }
6373 }
6374
6375 if (!IsUnrollMetadata) {
6376 // Add runtime unroll disable metadata.
6377 LLVMContext &Context = L->getHeader()->getContext();
6378 SmallVector DisableOperands;
6379 DisableOperands.push_back(
6380 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
6381 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
6382 MDs.push_back(DisableNode);
6383 MDNode *NewLoopID = MDNode::get(Context, MDs);
6384 // Set operand 0 to refer to the loop id itself.
6385 NewLoopID->replaceOperandWith(0, NewLoopID);
6386 L->setLoopID(NewLoopID);
6387 }
6388 }
6389
6390 bool LoopVectorizePass::processLoop(Loop *L) {
6391 assert(L->empty() && "Only process inner loops.");
6392
6393 #ifndef NDEBUG
6394 const std::string DebugLocStr = getDebugLocString(L);
6395 #endif /* NDEBUG */
6396
6397 DEBUG(dbgs() << "\nLV: Checking a loop in \""
6398 << L->getHeader()->getParent()->getName() << "\" from "
6399 << DebugLocStr << "\n");
6400
6401 LoopVectorizeHints Hints(L, DisableUnrolling);
6402
6403 DEBUG(dbgs() << "LV: Loop hints:"
6404 << " force="
6405 << (Hints.getForce() == LoopVectorizeHints::FK_Disabled
6406 ? "disabled"
6407 : (Hints.getForce() == LoopVectorizeHints::FK_Enabled
6408 ? "enabled"
6409 : "?"))
6410 << " width=" << Hints.getWidth()
6411 << " unroll=" << Hints.getInterleave() << "\n");
6412
6413 // Function containing loop
6414 Function *F = L->getHeader()->getParent();
6415
6416 // Looking at the diagnostic output is the only way to determine if a loop
6417 // was vectorized (other than looking at the IR or machine code), so it
6418 // is important to generate an optimization remark for each loop. Most of
6419 // these messages are generated by emitOptimizationRemarkAnalysis. Remarks
6420 // generated by emitOptimizationRemark and emitOptimizationRemarkMissed are
6421 // less verbose reporting vectorized loops and unvectorized loops that may
6422 // benefit from vectorization, respectively.
6423
6424 if (!Hints.allowVectorization(F, L, AlwaysVectorize)) {
6425 DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n");
6426 return false;
6427 }
6428
6429 // Check the loop for a trip count threshold:
6430 // do not vectorize loops with a tiny trip count.
6431 const unsigned TC = SE->getSmallConstantTripCount(L);
6432 if (TC > 0u && TC < TinyTripCountVectorThreshold) {
6433 DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
6434 << "This loop is not worth vectorizing.");
6435 if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
6436 DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
6437 else {
6438 DEBUG(dbgs() << "\n");
6439 emitAnalysisDiag(F, L, Hints, VectorizationReport()
6440 << "vectorization is not beneficial "
6441 "and is not explicitly forced");
6442 return false;
6443 }
6444 }
6445
6446 PredicatedScalarEvolution PSE(*SE, *L);
6447
6448 // Check if it is legal to vectorize the loop.
6449 LoopVectorizationRequirements Requirements;
6450 LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI,
6451 &Requirements, &Hints);
6452 if (!LVL.canVectorize()) {
6453 DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
6454 emitMissedWarning(F, L, Hints);
6455 return false;
6456 }
6457
6458 // Use the cost model.
6459 LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F,
6460 &Hints);
6461 CM.collectValuesToIgnore();
6462
6463 // Check the function attributes to find out if this function should be
6464 // optimized for size.
6465 bool OptForSize =
6466 Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
6467
6468 // Compute the weighted frequency of this loop being executed and see if it
6469 // is less than 20% of the function entry baseline frequency. Note that we
6470 // always have a canonical loop here because we think we *can* vectorize.
6471 // FIXME: This is hidden behind a flag due to pervasive problems with
6472 // exactly what block frequency models.
6473 if (LoopVectorizeWithBlockFrequency) {
6474 BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader());
6475 if (Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
6476 LoopEntryFreq < ColdEntryFreq)
6477 OptForSize = true;
6478 }
6479
6480 // Check the function attributes to see if implicit floats are allowed.
6481 // FIXME: This check doesn't seem possibly correct -- what if the loop is
6482 // an integer loop and the vector instructions selected are purely integer
6483 // vector instructions?
6484 if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
6485 DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
6486 "attribute is used.\n");
6487 emitAnalysisDiag(
6488 F, L, Hints,
6489 VectorizationReport()
6490 << "loop not vectorized due to NoImplicitFloat attribute");
6491 emitMissedWarning(F, L, Hints);
6492 return false;
6493 }
6494
6495 // Check if the target supports potentially unsafe FP vectorization.
6496 // FIXME: Add a check for the type of safety issue (denormal, signaling)
6497 // for the target we're vectorizing for, to make sure none of the
6498 // additional fp-math flags can help.
6499 if (Hints.isPotentiallyUnsafe() &&
6500 TTI->isFPVectorizationPotentiallyUnsafe()) {
6501 DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n");
6502 emitAnalysisDiag(F, L, Hints,
6503 VectorizationReport()
6504 << "loop not vectorized due to unsafe FP support.");
6505 emitMissedWarning(F, L, Hints);
6506 return false;
6507 }
6508
6509 // Select the optimal vectorization factor.
6510 const LoopVectorizationCostModel::VectorizationFactor VF =
6511 CM.selectVectorizationFactor(OptForSize);
6512
6513 // Select the interleave count.
6514 unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
6515
6516 // Get user interleave count.
6517 unsigned UserIC = Hints.getInterleave();
6518
6519 // Identify the diagnostic messages that should be produced.
6520 std::string VecDiagMsg, IntDiagMsg;
6521 bool VectorizeLoop = true, InterleaveLoop = true;
6522
6523 if (Requirements.doesNotMeet(F, L, Hints)) {
6524 DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
6525 "requirements.\n");
6526 emitMissedWarning(F, L, Hints);
6527 return false;
6528 }
6529
6530 if (VF.Width == 1) {
6531 DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
6532 VecDiagMsg =
6533 "the cost-model indicates that vectorization is not beneficial";
6534 VectorizeLoop = false;
6535 }
6536
6537 if (IC == 1 && UserIC <= 1) {
6538 // Tell the user interleaving is not beneficial.
6539 DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
6540 IntDiagMsg =
6541 "the cost-model indicates that interleaving is not beneficial";
6542 InterleaveLoop = false;
6543 if (UserIC == 1)
6544 IntDiagMsg +=
6545 " and is explicitly disabled or interleave count is set to 1";
6546 } else if (IC > 1 && UserIC == 1) {
6547 // Tell the user interleaving is beneficial, but it explicitly disabled.
6548 DEBUG(dbgs()
6549 << "LV: Interleaving is beneficial but is explicitly disabled.");
6550 IntDiagMsg = "the cost-model indicates that interleaving is beneficial "
6551 "but is explicitly disabled or interleave count is set to 1";
6552 InterleaveLoop = false;
6553 }
6554
6555 // Override IC if user provided an interleave count.
6556 IC = UserIC > 0 ? UserIC : IC;
6557
6558 // Emit diagnostic messages, if any.
6559 const char *VAPassName = Hints.vectorizeAnalysisPassName();
6560 if (!VectorizeLoop && !InterleaveLoop) {
6561 // Do not vectorize or interleaving the loop.
6562 emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
6563 L->getStartLoc(), VecDiagMsg);
6564 emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
6565 L->getStartLoc(), IntDiagMsg);
6566 return false;
6567 } else if (!VectorizeLoop && InterleaveLoop) {
6568 DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
6569 emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
6570 L->getStartLoc(), VecDiagMsg);
6571 } else if (VectorizeLoop && !InterleaveLoop) {
6572 DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
6573 << DebugLocStr << '\n');
6574 emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
6575 L->getStartLoc(), IntDiagMsg);
6576 } else if (VectorizeLoop && InterleaveLoop) {
6577 DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
6578 << DebugLocStr << '\n');
6579 DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
6580 }
6581
6582 if (!VectorizeLoop) {
6583 assert(IC > 1 && "interleave count should not be 1 or 0");
6584 // If we decided that it is not legal to vectorize the loop, then
6585 // interleave it.
6586 InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, IC);
6587 Unroller.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore);
6588
6589 emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
6590 Twine("interleaved loop (interleaved count: ") +
6591 Twine(IC) + ")");
6592 } else {
6593 // If we decided that it is *legal* to vectorize the loop, then do it.
6594 InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, VF.Width, IC);
6595 LB.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore);
6596 ++LoopsVectorized;
6597
6598 // Add metadata to disable runtime unrolling a scalar loop when there are
6599 // no runtime checks about strides and memory. A scalar loop that is
6600 // rarely used is not worth unrolling.
6601 if (!LB.areSafetyChecksAdded())
6602 AddRuntimeUnrollDisableMetaData(L);
6603
6604 // Report the vectorization decision.
6605 emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
6606 Twine("vectorized loop (vectorization width: ") +
6607 Twine(VF.Width) + ", interleaved count: " +
6608 Twine(IC) + ")");
6609 }
6610
6611 // Mark the loop as already vectorized to avoid vectorizing again.
6612 Hints.setAlreadyVectorized();
6613
6614 DEBUG(verifyFunction(*L->getHeader()->getParent()));
6615 return true;
6616 }
6617
6618 bool LoopVectorizePass::runImpl(
6619 Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_,
6620 DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
6621 DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
6622 std::function &GetLAA_) {
6623
6624 SE = &SE_;
6625 LI = &LI_;
6626 TTI = &TTI_;
6627 DT = &DT_;
6628 BFI = &BFI_;
6629 TLI = TLI_;
6630 AA = &AA_;
6631 AC = &AC_;
6632 GetLAA = &GetLAA_;
6633 DB = &DB_;
6634
6635 // Compute some weights outside of the loop over the loops. Compute this
6636 // using a BranchProbability to re-use its scaling math.
6637 const BranchProbability ColdProb(1, 5); // 20%
6638 ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb;
6639
6640 // Don't attempt if
6641 // 1. the target claims to have no vector registers, and
6642 // 2. interleaving won't help ILP.
6643 //
6644 // The second condition is necessary because, even if the target has no
6645 // vector registers, loop vectorization may still enable scalar
6646 // interleaving.
6647 if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
6648 return false;
6649
6650 // Build up a worklist of inner-loops to vectorize. This is necessary as
6651 // the act of vectorizing or partially unrolling a loop creates new loops
6652 // and can invalidate iterators across the loops.
6653 SmallVector Worklist;
6654
6655 for (Loop *L : *LI)
6656 addInnerLoop(*L, Worklist);
6657
6658 LoopsAnalyzed += Worklist.size();
6659
6660 // Now walk the identified inner loops.
6661 bool Changed = false;
6662 while (!Worklist.empty())
6663 Changed |= processLoop(Worklist.pop_back_val());
6664
6665 // Process each loop nest in the function.
6666 return Changed;
6667
6668 }
6669
6670
6671 PreservedAnalyses LoopVectorizePass::run(Function &F,
6672 FunctionAnalysisManager &AM) {
6673 auto &SE = AM.getResult(F);
6674 auto &LI = AM.getResult(F);
6675 auto &TTI = AM.getResult(F);
6676 auto &DT = AM.getResult(F);
6677 auto &BFI = AM.getResult(F);
6678 auto *TLI = AM.getCachedResult(F);
6679 auto &AA = AM.getResult(F);
6680 auto &AC = AM.getResult(F);
6681 auto &DB = AM.getResult(F);
6682
6683 auto &LAM = AM.getResult(F).getManager();
6684 std::function GetLAA =
6685 [&](Loop &L) -> const LoopAccessInfo & {
6686 return LAM.getResult(L);
6687 };
6688 bool Changed = runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, AC, GetLAA);
6689 if (!Changed)
6690 return PreservedAnalyses::all();
6691 PreservedAnalyses PA;
6692 PA.preserve();
6693 PA.preserve();
6694 PA.preserve();
6695 PA.preserve();
6696 return PA;
6697 }
0 ; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
1 ; RUN: opt < %s -passes=loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
12
23 ; CHECK: remark: source.c:2:8: loop not vectorized: store that is conditionally executed prevents vectorization
34