llvm.org GIT mirror llvm / 87c977a
Rename getMaximumUnrollFactor -> getMaxInterleaveFactor; also rename option names controlling this variable. "Unroll" is not the appropriate name for this variable. Clang already uses the term "interleave" in pragmas and metadata for this. Differential Revision: http://reviews.llvm.org/D5066 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217528 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 6 years ago
92 changed file(s) with 150 addition(s) and 150 deletion(s). Raw diff Collapse all Expand all
345345 /// \return The width of the largest scalar or vector register type.
346346 virtual unsigned getRegisterBitWidth(bool Vector) const;
347347
348 /// \return The maximum unroll factor that the vectorizer should try to
348 /// \return The maximum interleave factor that any transform should try to
349349 /// perform for this target. This number depends on the level of parallelism
350350 /// and the number of execution units in the CPU.
351 virtual unsigned getMaximumUnrollFactor() const;
351 virtual unsigned getMaxInterleaveFactor() const;
352352
353353 /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
354354 virtual unsigned
166166 return PrevTTI->getRegisterBitWidth(Vector);
167167 }
168168
169 unsigned TargetTransformInfo::getMaximumUnrollFactor() const {
170 return PrevTTI->getMaximumUnrollFactor();
169 unsigned TargetTransformInfo::getMaxInterleaveFactor() const {
170 return PrevTTI->getMaxInterleaveFactor();
171171 }
172172
173173 unsigned TargetTransformInfo::getArithmeticInstrCost(
564564 return 32;
565565 }
566566
567 unsigned getMaximumUnrollFactor() const override {
567 unsigned getMaxInterleaveFactor() const override {
568568 return 1;
569569 }
570570
100100 /// @{
101101
102102 unsigned getNumberOfRegisters(bool Vector) const override;
103 unsigned getMaximumUnrollFactor() const override;
103 unsigned getMaxInterleaveFactor() const override;
104104 unsigned getRegisterBitWidth(bool Vector) const override;
105105 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
106106 OperandValueKind, OperandValueProperties,
283283 return 32;
284284 }
285285
286 unsigned BasicTTI::getMaximumUnrollFactor() const {
286 unsigned BasicTTI::getMaxInterleaveFactor() const {
287287 return 1;
288288 }
289289
103103 return 64;
104104 }
105105
106 unsigned getMaximumUnrollFactor() const override;
106 unsigned getMaxInterleaveFactor() const override;
107107
108108 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
109109 override;
515515 return Cost;
516516 }
517517
518 unsigned AArch64TTI::getMaximumUnrollFactor() const {
518 unsigned AArch64TTI::getMaxInterleaveFactor() const {
519519 if (ST->isCortexA57() || ST->isCyclone())
520520 return 4;
521521 return 2;
103103 return 32;
104104 }
105105
106 unsigned getMaximumUnrollFactor() const override {
106 unsigned getMaxInterleaveFactor() const override {
107107 // These are out of order CPUs:
108108 if (ST->isCortexA15() || ST->isSwift())
109109 return 2;
8989
9090 unsigned getNumberOfRegisters(bool Vector) const override;
9191 unsigned getRegisterBitWidth(bool Vector) const override;
92 unsigned getMaximumUnrollFactor() const override;
92 unsigned getMaxInterleaveFactor() const override;
9393 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
9494 OperandValueKind, OperandValueProperties,
9595 OperandValueProperties) const override;
294294
295295 }
296296
297 unsigned PPCTTI::getMaximumUnrollFactor() const {
297 unsigned PPCTTI::getMaxInterleaveFactor() const {
298298 unsigned Directive = ST->getDarwinDirective();
299299 // The 440 has no SIMD support, but floating-point instructions
300300 // have a 5-cycle latency, so unroll by 5x for latency hiding.
8080
8181 unsigned getNumberOfRegisters(bool Vector) const override;
8282 unsigned getRegisterBitWidth(bool Vector) const override;
83 unsigned getMaximumUnrollFactor() const override;
83 unsigned getMaxInterleaveFactor() const override;
8484
8585 /// @}
8686 };
152152 return 32;
153153 }
154154
155 unsigned AMDGPUTTI::getMaximumUnrollFactor() const {
155 unsigned AMDGPUTTI::getMaxInterleaveFactor() const {
156156 // Semi-arbitrary large amount.
157157 return 64;
158158 }
8181
8282 unsigned getNumberOfRegisters(bool Vector) const override;
8383 unsigned getRegisterBitWidth(bool Vector) const override;
84 unsigned getMaximumUnrollFactor() const override;
84 unsigned getMaxInterleaveFactor() const override;
8585 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
8686 OperandValueKind, OperandValueProperties,
8787 OperandValueProperties) const override;
166166
167167 }
168168
169 unsigned X86TTI::getMaximumUnrollFactor() const {
169 unsigned X86TTI::getMaxInterleaveFactor() const {
170170 if (ST->isAtom())
171171 return 1;
172172
107107 cl::desc("Sets the SIMD width. Zero is autoselect."));
108108
109109 static cl::opt
110 VectorizationUnroll("force-vector-unroll", cl::init(0), cl::Hidden,
111 cl::desc("Sets the vectorization unroll count. "
110 VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden,
111 cl::desc("Sets the vectorization interleave count. "
112112 "Zero is autoselect."));
113113
114114 static cl::opt
156156 "force-target-num-vector-regs", cl::init(0), cl::Hidden,
157157 cl::desc("A flag that overrides the target's number of vector registers."));
158158
159 /// Maximum vectorization unroll count.
160 static const unsigned MaxUnrollFactor = 16;
161
162 static cl::opt ForceTargetMaxScalarUnrollFactor(
163 "force-target-max-scalar-unroll", cl::init(0), cl::Hidden,
164 cl::desc("A flag that overrides the target's max unroll factor for scalar "
165 "loops."));
166
167 static cl::opt ForceTargetMaxVectorUnrollFactor(
168 "force-target-max-vector-unroll", cl::init(0), cl::Hidden,
169 cl::desc("A flag that overrides the target's max unroll factor for "
159 /// Maximum vectorization interleave count.
160 static const unsigned MaxInterleaveFactor = 16;
161
162 static cl::opt ForceTargetMaxScalarInterleaveFactor(
163 "force-target-max-scalar-interleave", cl::init(0), cl::Hidden,
164 cl::desc("A flag that overrides the target's max interleave factor for "
165 "scalar loops."));
166
167 static cl::opt ForceTargetMaxVectorInterleaveFactor(
168 "force-target-max-vector-interleave", cl::init(0), cl::Hidden,
169 cl::desc("A flag that overrides the target's max interleave factor for "
170170 "vectorized loops."));
171171
172172 static cl::opt ForceTargetInstructionCost(
10021002 case HK_WIDTH:
10031003 return isPowerOf2_32(Val) && Val <= MaxVectorWidth;
10041004 case HK_UNROLL:
1005 return isPowerOf2_32(Val) && Val <= MaxUnrollFactor;
1005 return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
10061006 case HK_FORCE:
10071007 return (Val <= 1);
10081008 }
10121012
10131013 /// Vectorization width.
10141014 Hint Width;
1015 /// Vectorization unroll factor.
1016 Hint Unroll;
1015 /// Vectorization interleave factor.
1016 Hint Interleave;
10171017 /// Vectorization forced
10181018 Hint Force;
10191019 /// Array to help iterating through all hints.
10291029 FK_Enabled = 1, ///< Forcing enabled.
10301030 };
10311031
1032 LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
1032 LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
10331033 : Width("vectorize.width", VectorizationFactor, HK_WIDTH),
1034 Unroll("interleave.count", DisableUnrolling, HK_UNROLL),
1034 Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
10351035 Force("vectorize.enable", FK_Undefined, HK_FORCE),
10361036 TheLoop(L) {
10371037 // FIXME: Move this up initialisation when MSVC requirement is 2013+
10381038 Hints[0] = &Width;
1039 Hints[1] = &Unroll;
1039 Hints[1] = &Interleave;
10401040 Hints[2] = &Force;
10411041
10421042 // Populate values with existing loop metadata.
10431043 getHintsFromMetadata();
10441044
1045 // force-vector-unroll overrides DisableUnrolling.
1046 if (VectorizationUnroll.getNumOccurrences() > 0)
1047 Unroll.Value = VectorizationUnroll;
1048
1049 DEBUG(if (DisableUnrolling && Unroll.Value == 1) dbgs()
1050 << "LV: Unrolling disabled by the pass manager\n");
1045 // force-vector-interleave overrides DisableInterleaving.
1046 if (VectorizationInterleave.getNumOccurrences() > 0)
1047 Interleave.Value = VectorizationInterleave;
1048
1049 DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
1050 << "LV: Interleaving disabled by the pass manager\n");
10511051 }
10521052
10531053 /// Mark the loop L as already vectorized by setting the width to 1.
10541054 void setAlreadyVectorized() {
1055 Width.Value = Unroll.Value = 1;
1055 Width.Value = Interleave.Value = 1;
10561056 // FIXME: Change all lines below for this when we can use MSVC 2013+
10571057 //writeHintsToMetadata({ Width, Unroll });
10581058 std::vector hints;
10591059 hints.reserve(2);
10601060 hints.emplace_back(Width);
1061 hints.emplace_back(Unroll);
1061 hints.emplace_back(Interleave);
10621062 writeHintsToMetadata(std::move(hints));
10631063 }
10641064
10731073 R << " (Force=true";
10741074 if (Width.Value != 0)
10751075 R << ", Vector Width=" << Width.Value;
1076 if (Unroll.Value != 0)
1077 R << ", Interleave Count=" << Unroll.Value;
1076 if (Interleave.Value != 0)
1077 R << ", Interleave Count=" << Interleave.Value;
10781078 R << ")";
10791079 }
10801080 }
10831083 }
10841084
10851085 unsigned getWidth() const { return Width.Value; }
1086 unsigned getUnroll() const { return Unroll.Value; }
1086 unsigned getInterleave() const { return Interleave.Value; }
10871087 enum ForceKind getForce() const { return (ForceKind)Force.Value; }
10881088
10891089 private:
12151215 emitLoopVectorizeWarning(
12161216 F->getContext(), *F, L->getStartLoc(),
12171217 "failed explicitly specified loop vectorization");
1218 else if (LH.getUnroll() != 1)
1218 else if (LH.getInterleave() != 1)
12191219 emitLoopInterleaveWarning(
12201220 F->getContext(), *F, L->getStartLoc(),
12211221 "failed explicitly specified loop interleaving");
13211321 : (Hints.getForce() == LoopVectorizeHints::FK_Enabled
13221322 ? "enabled"
13231323 : "?")) << " width=" << Hints.getWidth()
1324 << " unroll=" << Hints.getUnroll() << "\n");
1324 << " unroll=" << Hints.getInterleave() << "\n");
13251325
13261326 // Function containing loop
13271327 Function *F = L->getHeader()->getParent();
13481348 return false;
13491349 }
13501350
1351 if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) {
1351 if (Hints.getWidth() == 1 && Hints.getInterleave() == 1) {
13521352 DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
13531353 emitOptimizationRemarkAnalysis(
13541354 F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
46274627
46284628 // Bail out early if passed-in parameters make vectorization not feasible.
46294629 unsigned ForcedFactor = VectorizationFactor ? VectorizationFactor : 1;
4630 unsigned ForcedUnroll = VectorizationUnroll ? VectorizationUnroll : 1;
4630 unsigned ForcedUnroll = VectorizationInterleave ? VectorizationInterleave : 1;
46314631
46324632 // The distance must be bigger than the size needed for a vectorized version
46334633 // of the operation and the size of the vectorized operation must not be
55045504 // to the increased register pressure.
55055505
55065506 // Use the user preference, unless 'auto' is selected.
5507 int UserUF = Hints->getUnroll();
5507 int UserUF = Hints->getInterleave();
55085508 if (UserUF != 0)
55095509 return UserUF;
55105510
55575557 std::max(1U, (R.MaxLocalUsers - 1)));
55585558
55595559 // Clamp the unroll factor ranges to reasonable factors.
5560 unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
5560 unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor();
55615561
55625562 // Check if the user has overridden the unroll max.
55635563 if (VF == 1) {
5564 if (ForceTargetMaxScalarUnrollFactor.getNumOccurrences() > 0)
5565 MaxUnrollSize = ForceTargetMaxScalarUnrollFactor;
5564 if (ForceTargetMaxScalarInterleaveFactor.getNumOccurrences() > 0)
5565 MaxInterleaveSize = ForceTargetMaxScalarInterleaveFactor;
55665566 } else {
5567 if (ForceTargetMaxVectorUnrollFactor.getNumOccurrences() > 0)
5568 MaxUnrollSize = ForceTargetMaxVectorUnrollFactor;
5567 if (ForceTargetMaxVectorInterleaveFactor.getNumOccurrences() > 0)
5568 MaxInterleaveSize = ForceTargetMaxVectorInterleaveFactor;
55695569 }
55705570
55715571 // If we did not calculate the cost for VF (because the user selected the VF)
55755575
55765576 // Clamp the calculated UF to be between the 1 and the max unroll factor
55775577 // that the target allows.
5578 if (UF > MaxUnrollSize)
5579 UF = MaxUnrollSize;
5578 if (UF > MaxInterleaveSize)
5579 UF = MaxInterleaveSize;
55805580 else if (UF < 1)
55815581 UF = 1;
55825582
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce
11
22 ; Check that we don't fall into an infinite loop.
33 define void @test() nounwind {
None ; RUN: opt < %s -loop-vectorize -dce -force-vector-unroll=1 -force-vector-width=4
0 ; RUN: opt < %s -loop-vectorize -dce -force-vector-interleave=1 -force-vector-width=4
11
22 ; Check that we don't crash.
33
0 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -S | FileCheck %s
1 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
1 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-interleave=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
22
33 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
44 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-unknown-linux-gnu"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-unknown-linux-gnu"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-unknown-linux-gnu"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-unknown-linux-gnu"
None ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s
0 ; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -mcpu=prescott < %s | FileCheck %s
11
22 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
33 target triple = "i386-unknown-freebsd11.0"
None ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S \
0 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-interleave=0 -dce -S \
11 ; RUN: | FileCheck %s --check-prefix=CHECK-VECTOR
2 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-unroll=0 -dce -S \
2 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-interleave=0 -dce -S \
33 ; RUN: | FileCheck %s --check-prefix=CHECK-SCALAR
44
55 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
None ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-interleave=0 -dce -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
0 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s
1 ; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
2 ; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
1 ; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
2 ; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
33
44 ; This code has all the !dbg annotations needed to track source line information,
55 ; but is missing the llvm.dbg.cu annotation. This prevents code generation from
None ; RUN: opt -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S < %s | FileCheck %s
0 ; RUN: opt -O3 -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.7.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -S -mtriple=xcore | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S -mtriple=xcore | FileCheck %s
11
22 target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
33 target triple = "xcore"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=2 < %s | FileCheck %s
0 ; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=2 < %s | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.9.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 ; rdar://problem/12848162
33
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine | FileCheck %s
0 ; RUN: opt < %s -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine | FileCheck %s
11 ; Make sure we vectorize with debugging turned on.
22
33 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
None ; RUN: opt -S < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 | FileCheck %s
0 ; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
0 ; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33
None ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
1 ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
0 ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
1 ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-width=4 -force-vector-interleave=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
22
33 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
44 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -O1 -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
33
None ; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
0 ; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.9.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.9.0"
None ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-unroll=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
1 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-unroll=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
0 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
1 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
22 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.9.0"
44
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33
2727 ret void
2828 }
2929
30 ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
30 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
3131
3232 ; Make sure we remove unneeded vectorization of induction variables.
3333 ; In order for instcombine to cleanup the vectorized induction variables that we
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-unknown-linux-gnu"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-unknown-linux-gnu"
None ; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
0 ; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33
None ; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S | FileCheck %s
1 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -S | FileCheck %s -check-prefix=WIDTH
0 ; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S | FileCheck %s
1 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s -check-prefix=WIDTH
22
33 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
44
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-unknown-linux-gnu"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
11 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
22 target triple = "x86_64-unknown-linux-gnu"
33
None ; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
0 ; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33
None ; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
0 ; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s
11
22 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.9.0"
None ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 ; From a simple program with two address spaces:
33 ; char Y[4*10000] __attribute__((address_space(1)));
None ; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
0 ; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s
11 @a = common global [128 x i32] zeroinitializer, align 16
22
33 ;; Must not vectorize division reduction. Division is lossy.
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 ; int __attribute__((noinline)) sum_array(int *A, int n) {
33 ; return std::accumulate(A, A + n, 0);
None ; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s
0 ; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s
11
22 ; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
33 ; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 ; Make sure that we don't vectorize functions with 'noimplicitfloat' attributes.
33
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4
11
22 target datalayout =
33 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
None ; RUN: opt -S -O3 -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=LOOPVEC %s
1 ; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s
0 ; RUN: opt -S -O3 -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck --check-prefix=LOOPVEC %s
1 ; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s
22
33 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
44 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
0 ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=4 -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
0 ; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
11
22 ; Check vectorization that would ordinarily require a runtime bounds
33 ; check on the pointers when mixing address spaces. For now we cannot
None ; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
0 ; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
11
22 ; Artificial datalayout
33 target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.9.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 < %s | FileCheck %s
0 ; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
11 target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
22
33
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.9.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=8 -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=8 -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx"
None ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt -S -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
0 ; RUN: opt -S -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.9.0"
None ; RUN: opt < %s -tbaa -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s
1 ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s --check-prefix=CHECK-NOTBAA
0 ; RUN: opt < %s -tbaa -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s
1 ; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s --check-prefix=CHECK-NOTBAA
22 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
33 target triple = "x86_64-unknown-linux-gnu"
44
None ; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 < %s | FileCheck %s
0 ; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33
None ; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-unroll=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-interleave=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine < %s | FileCheck %s
0 ; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine < %s | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33
None ; RUN: opt < %s -O2 -force-vector-unroll=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
0 ; RUN: opt < %s -O2 -force-vector-interleave=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
11 ; REQUIRES: asserts
22
33 ; Loop from "rotated"
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
11 ; REQUIRES: asserts
22
33 ;
6161
6262 for.end:
6363 ret void
64 }
64 }
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"
None ; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-unroll=1 < %s -S | FileCheck %s
0 ; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 < %s -S | FileCheck %s
11
22 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
33
None ; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
0 ; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
33 target triple = "x86_64-apple-macosx10.8.0"