llvm.org GIT mirror llvm / 0dd30f8
Add a late IR expansion pass for the experimental reduction intrinsics. This pass uses a new target hook to decide whether or not to expand a particular intrinsic to the shuffevector sequence. Differential Revision: https://reviews.llvm.org/D32245 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302631 91177308-0d34-0410-b5e6-96231b3b80d8 Amara Emerson 3 years ago
16 changed file(s) with 441 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
752752 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
753753 ReductionFlags Flags) const;
754754
755 /// \returns True if the target wants to expand the given reduction intrinsic
756 /// into a shuffle sequence.
757 bool shouldExpandReduction(const IntrinsicInst *II) const;
755758 /// @}
756759
757760 private:
909912 VectorType *VecTy) const = 0;
910913 virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
911914 ReductionFlags) const = 0;
915 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
912916 };
913917
914918 template
12171221 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
12181222 ReductionFlags Flags) const override {
12191223 return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1224 }
1225 bool shouldExpandReduction(const IntrinsicInst *II) const override {
1226 return Impl.shouldExpandReduction(II);
12201227 }
12211228 };
12221229
459459 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
460460 TTI::ReductionFlags Flags) const {
461461 return false;
462 }
463
464 bool shouldExpandReduction(const IntrinsicInst *II) const {
465 return true;
462466 }
463467
464468 protected:
0 //===----- ExpandReductions.h - Expand experimental reduction intrinsics --===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_CODEGEN_EXPANDREDUCTIONS_H
10 #define LLVM_CODEGEN_EXPANDREDUCTIONS_H
11
12 #include "llvm/IR/PassManager.h"
13
14 namespace llvm {
15
16 class ExpandReductionsPass
17 : public PassInfoMixin {
18 public:
19 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
20 };
21 } // end namespace llvm
22
23 #endif // LLVM_CODEGEN_EXPANDREDUCTIONS_H
403403 /// This pass performs outlining on machine instructions directly before
404404 /// printing assembly.
405405 ModulePass *createMachineOutlinerPass();
406
407 /// This pass expands the experimental reduction intrinsics into sequences of
408 /// shuffles.
409 FunctionPass *createExpandReductionsPass();
406410
407411 } // End llvm namespace
408412
129129 void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&);
130130 void initializeExpandISelPseudosPass(PassRegistry&);
131131 void initializeExpandPostRAPass(PassRegistry&);
132 void initializeExpandReductionsPass(PassRegistry&);
132133 void initializeExternalAAWrapperPassPass(PassRegistry&);
133134 void initializeFEntryInserterPass(PassRegistry&);
134135 void initializeFinalizeMachineBundlesPass(PassRegistry&);
490490 LoopSafetyInfo *SafetyInfo,
491491 OptimizationRemarkEmitter *ORE = nullptr);
492492
493 /// Generates a vector reduction using shufflevectors to reduce the value.
494 Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
495 RecurrenceDescriptor::MinMaxRecurrenceKind
496 MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
497 ArrayRef RedOps = ArrayRef());
498
493499 /// Create a target reduction of the given vector. The reduction operation
494500 /// is described by the \p Opcode parameter. min/max reductions require
495501 /// additional information supplied in \p Flags.
504504 return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
505505 }
506506
507 bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
508 return TTIImpl->shouldExpandReduction(II);
509 }
507510
508511 TargetTransformInfo::Concept::~Concept() {}
509512
2222 ExecutionDepsFix.cpp
2323 ExpandISelPseudos.cpp
2424 ExpandPostRAPseudos.cpp
25 ExpandReductions.cpp
2526 FaultMaps.cpp
2627 FEntryInserter.cpp
2728 FuncletLayout.cpp
0 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements IR expansion for reduction intrinsics, allowing targets
10 // to enable the experimental intrinsics until just before codegen.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Analysis/TargetTransformInfo.h"
15 #include "llvm/CodeGen/ExpandReductions.h"
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/InstIterator.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/IR/IntrinsicInst.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/Transforms/Utils/LoopUtils.h"
24 #include "llvm/Pass.h"
25
26 using namespace llvm;
27
28 namespace {
29
30 unsigned getOpcode(Intrinsic::ID ID) {
31 switch (ID) {
32 case Intrinsic::experimental_vector_reduce_fadd:
33 return Instruction::FAdd;
34 case Intrinsic::experimental_vector_reduce_fmul:
35 return Instruction::FMul;
36 case Intrinsic::experimental_vector_reduce_add:
37 return Instruction::Add;
38 case Intrinsic::experimental_vector_reduce_mul:
39 return Instruction::Mul;
40 case Intrinsic::experimental_vector_reduce_and:
41 return Instruction::And;
42 case Intrinsic::experimental_vector_reduce_or:
43 return Instruction::Or;
44 case Intrinsic::experimental_vector_reduce_xor:
45 return Instruction::Xor;
46 case Intrinsic::experimental_vector_reduce_smax:
47 case Intrinsic::experimental_vector_reduce_smin:
48 case Intrinsic::experimental_vector_reduce_umax:
49 case Intrinsic::experimental_vector_reduce_umin:
50 return Instruction::ICmp;
51 case Intrinsic::experimental_vector_reduce_fmax:
52 case Intrinsic::experimental_vector_reduce_fmin:
53 return Instruction::FCmp;
54 default:
55 llvm_unreachable("Unexpected ID");
56 }
57 }
58
59 RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
60 switch (ID) {
61 case Intrinsic::experimental_vector_reduce_smax:
62 return RecurrenceDescriptor::MRK_SIntMax;
63 case Intrinsic::experimental_vector_reduce_smin:
64 return RecurrenceDescriptor::MRK_SIntMin;
65 case Intrinsic::experimental_vector_reduce_umax:
66 return RecurrenceDescriptor::MRK_UIntMax;
67 case Intrinsic::experimental_vector_reduce_umin:
68 return RecurrenceDescriptor::MRK_UIntMin;
69 case Intrinsic::experimental_vector_reduce_fmax:
70 return RecurrenceDescriptor::MRK_FloatMax;
71 case Intrinsic::experimental_vector_reduce_fmin:
72 return RecurrenceDescriptor::MRK_FloatMin;
73 default:
74 return RecurrenceDescriptor::MRK_Invalid;
75 }
76 }
77
78 bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
79 bool Changed = false;
80 SmallVector Worklist;
81 for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
82 if (auto II = dyn_cast(&*I))
83 Worklist.push_back(II);
84
85 for (auto *II : Worklist) {
86 IRBuilder<> Builder(II);
87 Value *Vec = nullptr;
88 auto ID = II->getIntrinsicID();
89 auto MRK = RecurrenceDescriptor::MRK_Invalid;
90 switch (ID) {
91 case Intrinsic::experimental_vector_reduce_fadd:
92 case Intrinsic::experimental_vector_reduce_fmul:
93 // FMFs must be attached to the call, otherwise it's an ordered reduction
94 // and it can't be handled by generating this shuffle sequence.
95 // TODO: Implement scalarization of ordered reductions here for targets
96 // without native support.
97 if (!II->getFastMathFlags().unsafeAlgebra())
98 continue;
99 Vec = II->getArgOperand(1);
100 break;
101 case Intrinsic::experimental_vector_reduce_add:
102 case Intrinsic::experimental_vector_reduce_mul:
103 case Intrinsic::experimental_vector_reduce_and:
104 case Intrinsic::experimental_vector_reduce_or:
105 case Intrinsic::experimental_vector_reduce_xor:
106 case Intrinsic::experimental_vector_reduce_smax:
107 case Intrinsic::experimental_vector_reduce_smin:
108 case Intrinsic::experimental_vector_reduce_umax:
109 case Intrinsic::experimental_vector_reduce_umin:
110 case Intrinsic::experimental_vector_reduce_fmax:
111 case Intrinsic::experimental_vector_reduce_fmin:
112 Vec = II->getArgOperand(0);
113 MRK = getMRK(ID);
114 break;
115 default:
116 continue;
117 }
118 if (!TTI->shouldExpandReduction(II))
119 continue;
120 auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
121 II->replaceAllUsesWith(Rdx);
122 II->eraseFromParent();
123 Changed = true;
124 }
125 return Changed;
126 }
127
128 class ExpandReductions : public FunctionPass {
129 public:
130 static char ID;
131 ExpandReductions() : FunctionPass(ID) {
132 initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
133 }
134
135 bool runOnFunction(Function &F) override {
136 const auto *TTI =&getAnalysis().getTTI(F);
137 return expandReductions(F, TTI);
138 }
139
140 void getAnalysisUsage(AnalysisUsage &AU) const override {
141 AU.addRequired();
142 AU.setPreservesCFG();
143 }
144 };
145 }
146
147 char ExpandReductions::ID;
148 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
149 "Expand reduction intrinsics", false, false)
150 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
151 INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
152 "Expand reduction intrinsics", false, false)
153
154 FunctionPass *llvm::createExpandReductionsPass() {
155 return new ExpandReductions();
156 }
157
158 PreservedAnalyses ExpandReductionsPass::run(Function &F,
159 FunctionAnalysisManager &AM) {
160 const auto &TTI = AM.getResult(F);
161 if (!expandReductions(F, &TTI))
162 return PreservedAnalyses::all();
163 PreservedAnalyses PA;
164 PA.preserveSet();
165 return PA;
166 }
486486
487487 // Insert calls to mcount-like functions.
488488 addPass(createCountingFunctionInserterPass());
489
490 // Expand reduction intrinsics into shuffle sequences if the target wants to.
491 addPass(createExpandReductionsPass());
489492 }
490493
491494 /// Turn exception handling constructs into something the code generators can
136136 unsigned getMinPrefetchStride();
137137
138138 unsigned getMaxPrefetchIterationsAhead();
139
140 bool shouldExpandReduction(const IntrinsicInst *II) const {
141 return false;
142 }
139143 /// @}
140144 };
141145
11241124 }
11251125
11261126 // Helper to generate a log2 shuffle reduction.
1127 static Value *
1128 getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
1129 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
1130 RecurrenceDescriptor::MRK_Invalid,
1131 ArrayRef RedOps = ArrayRef()) {
1127 Value *
1128 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
1129 RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
1130 ArrayRef RedOps) {
11321131 unsigned VF = Src->getType()->getVectorNumElements();
11331132 // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
11341133 // and vector ops, reducing the set of values being computed by half each
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -expand-reductions -S | FileCheck %s
2 ; Tests without a target which should expand all reductions
3 declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
4 declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>)
5 declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>)
6 declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>)
7 declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>)
8
9 declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
10 declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
11
12 declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
13 declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>)
14 declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>)
15 declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>)
16
17 declare double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double>)
18 declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>)
19
20
21 define i64 @add_i64(<2 x i64> %vec) {
22 ; CHECK-LABEL: @add_i64(
23 ; CHECK-NEXT: entry:
24 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32>
25 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]]
26 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
27 ; CHECK-NEXT: ret i64 [[TMP0]]
28 ;
29 entry:
30 %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %vec)
31 ret i64 %r
32 }
33
34 define i64 @mul_i64(<2 x i64> %vec) {
35 ; CHECK-LABEL: @mul_i64(
36 ; CHECK-NEXT: entry:
37 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32>
38 ; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]]
39 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
40 ; CHECK-NEXT: ret i64 [[TMP0]]
41 ;
42 entry:
43 %r = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> %vec)
44 ret i64 %r
45 }
46
47 define i64 @and_i64(<2 x i64> %vec) {
48 ; CHECK-LABEL: @and_i64(
49 ; CHECK-NEXT: entry:
50 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32>
51 ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]]
52 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
53 ; CHECK-NEXT: ret i64 [[TMP0]]
54 ;
55 entry:
56 %r = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %vec)
57 ret i64 %r
58 }
59
60 define i64 @or_i64(<2 x i64> %vec) {
61 ; CHECK-LABEL: @or_i64(
62 ; CHECK-NEXT: entry:
63 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32>
64 ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]]
65 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
66 ; CHECK-NEXT: ret i64 [[TMP0]]
67 ;
68 entry:
69 %r = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %vec)
70 ret i64 %r
71 }
72
73 define i64 @xor_i64(<2 x i64> %vec) {
74 ; CHECK-LABEL: @xor_i64(
75 ; CHECK-NEXT: entry:
76 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32>
77 ; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]]
78 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
79 ; CHECK-NEXT: ret i64 [[TMP0]]
80 ;
81 entry:
82 %r = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> %vec)
83 ret i64 %r
84 }
85
86 define float @fadd_f32(<4 x float> %vec) {
87 ; CHECK-LABEL: @fadd_f32(
88 ; CHECK-NEXT: entry:
89 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32>
90 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
91 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
92 ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
93 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
94 ; CHECK-NEXT: ret float [[TMP0]]
95 ;
96 entry:
97 %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
98 ret float %r
99 }
100
101 define float @fadd_f32_strict(<4 x float> %vec) {
102 ; CHECK-LABEL: @fadd_f32_strict(
103 ; CHECK-NEXT: entry:
104 ; CHECK-NEXT: [[R:%.*]] = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float undef, <4 x float> [[VEC:%.*]])
105 ; CHECK-NEXT: ret float [[R]]
106 ;
107 entry:
108 %r = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
109 ret float %r
110 }
111
112 define float @fmul_f32(<4 x float> %vec) {
113 ; CHECK-LABEL: @fmul_f32(
114 ; CHECK-NEXT: entry:
115 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32>
116 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
117 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
118 ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
119 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
120 ; CHECK-NEXT: ret float [[TMP0]]
121 ;
122 entry:
123 %r = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec)
124 ret float %r
125 }
126
127 define i64 @smax_i64(<2 x i64> %vec) {
128 ; CHECK-LABEL: @smax_i64(
129 ; CHECK-NEXT: entry:
130 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32>
131 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]]
132 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
133 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
134 ; CHECK-NEXT: ret i64 [[TMP0]]
135 ;
136 entry:
137 %r = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> %vec)
138 ret i64 %r
139 }
140
141 define i64 @smin_i64(<2 x i64> %vec) {
142 ; CHECK-LABEL: @smin_i64(
143 ; CHECK-NEXT: entry:
144 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32>
145 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]]
146 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
147 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
148 ; CHECK-NEXT: ret i64 [[TMP0]]
149 ;
150 entry:
151 %r = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> %vec)
152 ret i64 %r
153 }
154
155 define i64 @umax_i64(<2 x i64> %vec) {
156 ; CHECK-LABEL: @umax_i64(
157 ; CHECK-NEXT: entry:
158 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32>
159 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]]
160 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
161 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
162 ; CHECK-NEXT: ret i64 [[TMP0]]
163 ;
164 entry:
165 %r = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> %vec)
166 ret i64 %r
167 }
168
169 define i64 @umin_i64(<2 x i64> %vec) {
170 ; CHECK-LABEL: @umin_i64(
171 ; CHECK-NEXT: entry:
172 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32>
173 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]]
174 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
175 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
176 ; CHECK-NEXT: ret i64 [[TMP0]]
177 ;
178 entry:
179 %r = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> %vec)
180 ret i64 %r
181 }
182
183 define double @fmax_f64(<2 x double> %vec) {
184 ; CHECK-LABEL: @fmax_f64(
185 ; CHECK-NEXT: entry:
186 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32>
187 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <2 x double> [[VEC]], [[RDX_SHUF]]
188 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
189 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
190 ; CHECK-NEXT: ret double [[TMP0]]
191 ;
192 entry:
193 %r = call double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %vec)
194 ret double %r
195 }
196
197 define double @fmin_f64(<2 x double> %vec) {
198 ; CHECK-LABEL: @fmin_f64(
199 ; CHECK-NEXT: entry:
200 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32>
201 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <2 x double> [[VEC]], [[RDX_SHUF]]
202 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
203 ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
204 ; CHECK-NEXT: ret double [[TMP0]]
205 ;
206 entry:
207 %r = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %vec)
208 ret double %r
209 }
2222 ; CHECK-NEXT: Shadow Stack GC Lowering
2323 ; CHECK-NEXT: Remove unreachable blocks from the CFG
2424 ; CHECK-NEXT: Inserts calls to mcount-like functions
25 ; CHECK-NEXT: Expand reduction intrinsics
2526 ; CHECK-NEXT: Rewrite Symbols
2627 ; CHECK-NEXT: FunctionPass Manager
2728 ; CHECK-NEXT: Dominator Tree Construction
300300 initializeConstantHoistingLegacyPassPass(*Registry);
301301 initializeScalarOpts(*Registry);
302302 initializeVectorization(*Registry);
303 initializeExpandReductionsPass(*Registry);
303304
304305 // Register the target printer for --version.
305306 cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
396396 initializeInterleavedAccessPass(Registry);
397397 initializeCountingFunctionInserterPass(Registry);
398398 initializeUnreachableBlockElimLegacyPassPass(Registry);
399 initializeExpandReductionsPass(Registry);
399400
400401 #ifdef LINK_POLLY_INTO_TOOLS
401402 polly::initializePollyPasses(Registry);