llvm.org GIT mirror llvm / a4fc021
[SLP] Fix for PR31847: Assertion failed: (isLoopInvariant(Operands[i], L) && "SCEVAddRecExpr operand is not loop-invariant!") Initially SLP vectorizer replaced all going-to-be-vectorized instructions with Undef values. It may break ScalarEvaluation and may cause a crash. Reworked SLP vectorizer so that it does not replace vectorized instructions by UndefValue anymore. Instead vectorized instructions are marked for deletion inside if BoUpSLP class and deleted upon class destruction. Reviewers: mzolotukhin, mkuper, hfinkel, RKSimon, davide, spatel Subscribers: RKSimon, Gerolf, anemet, hans, majnemer, llvm-commits, sanjoy Differential Revision: https://reviews.llvm.org/D29641 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373166 91177308-0d34-0410-b5e6-96231b3b80d8 Alexey Bataev 1 year, 2 months ago
23 changed file(s) with 919 addition(s) and 1336 deletion(s). Raw diff Collapse all Expand all
2323 #include "llvm/ADT/SmallVector.h"
2424 #include "llvm/Analysis/AliasAnalysis.h"
2525 #include "llvm/IR/PassManager.h"
26 #include "llvm/IR/ValueHandle.h"
2726
2827 namespace llvm {
2928
5958 struct SLPVectorizerPass : public PassInfoMixin {
6059 using StoreList = SmallVector;
6160 using StoreListMap = MapVector;
62 using WeakTrackingVHList = SmallVector;
63 using WeakTrackingVHListMap = MapVector>;
61 using GEPList = SmallVector>;
62 using GEPListMap = MapVector;
6463
6564 ScalarEvolution *SE = nullptr;
6665 TargetTransformInfo *TTI = nullptr;
130129
131130 /// Tries to vectorize constructs started from CmpInst, InsertValueInst or
132131 /// InsertElementInst instructions.
133 bool vectorizeSimpleInstructions(SmallVectorImpl<WeakVH> &Instructions,
132 bool vectorizeSimpleInstructions(SmallVectorImpl<Instruction *> &Instructions,
134133 BasicBlock *BB, slpvectorizer::BoUpSLP &R);
135134
136135 /// Scan the basic block and look for patterns that are likely to start
146145 StoreListMap Stores;
147146
148147 /// The getelementptr instructions in a basic block organized by base pointer.
149 WeakTrackingVHListMap GEPs;
148 GEPListMap GEPs;
150149 };
151150
152151 } // end namespace llvm
11201120 #endif
11211121 };
11221122
1123 /// Checks if the instruction is marked for deletion.
1124 bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); }
1125
1126 /// Marks values operands for later deletion by replacing them with Undefs.
1127 void eraseInstructions(ArrayRef AV);
1128
1129 ~BoUpSLP();
1130
11231131 private:
11241132 /// Checks if all users of \p I are the part of the vectorization tree.
11251133 bool areAllUsersVectorized(Instruction *I) const;
14891497 /// This is required to ensure that there are no incorrect collisions in the
14901498 /// AliasCache, which can happen if a new instruction is allocated at the
14911499 /// same address as a previously deleted instruction.
1492 void eraseInstruction(Instruction *I) {
1493 I->removeFromParent();
1494 I->dropAllReferences();
1495 DeletedInstructions.emplace_back(I);
1500 void eraseInstruction(Instruction *I, bool ReplaceOpsWithUndef = false) {
1501 auto It = DeletedInstructions.try_emplace(I, ReplaceOpsWithUndef).first;
1502 It->getSecond() = It->getSecond() && ReplaceOpsWithUndef;
14961503 }
14971504
14981505 /// Temporary store for deleted instructions. Instructions will be deleted
14991506 /// eventually when the BoUpSLP is destructed.
1500 SmallVector> DeletedInstructions;
1507 DenseMap> DeletedInstructions;
15011508
15021509 /// A list of values that need to extracted out of the tree.
15031510 /// This list holds pairs of (Internal Scalar : External User). External User
20532060 };
20542061
20552062 } // end namespace llvm
2063
2064 BoUpSLP::~BoUpSLP() {
2065 for (const auto &Pair : DeletedInstructions) {
2066 // Replace operands of ignored instructions with Undefs in case if they were
2067 // marked for deletion.
2068 if (Pair.getSecond()) {
2069 Value *Undef = UndefValue::get(Pair.getFirst()->getType());
2070 Pair.getFirst()->replaceAllUsesWith(Undef);
2071 }
2072 Pair.getFirst()->dropAllReferences();
2073 }
2074 for (const auto &Pair : DeletedInstructions) {
2075 assert(Pair.getFirst()->use_empty() &&
2076 "trying to erase instruction with users.");
2077 Pair.getFirst()->eraseFromParent();
2078 }
2079 }
2080
2081 void BoUpSLP::eraseInstructions(ArrayRef AV) {
2082 for (auto *V : AV) {
2083 if (auto *I = dyn_cast(V))
2084 eraseInstruction(I, /*ReplaceWithUndef=*/true);
2085 };
2086 }
20562087
20572088 void BoUpSLP::buildTree(ArrayRef Roots,
20582089 ArrayRef UserIgnoreLst) {
35403571 // Generate the 'InsertElement' instruction.
35413572 for (unsigned i = 0; i < Ty->getNumElements(); ++i) {
35423573 Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
3543 if (Instruction *Insrt = dyn_cast>(Vec)) {
3574 if (auto *Insrt = dyn_cast>(Vec)) {
35443575 GatherSeq.insert(Insrt);
35453576 CSEBlocks.insert(Insrt->getParent());
35463577
42894320 for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
42904321 Value *Scalar = Entry->Scalars[Lane];
42914322
4323 #ifndef NDEBUG
42924324 Type *Ty = Scalar->getType();
42934325 if (!Ty->isVoidTy()) {
4294 #ifndef NDEBUG
42954326 for (User *U : Scalar->users()) {
42964327 LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
42974328
4298 // It is legal to replace users in the ignorelist by undef.
4329 // It is legal to delete users in the ignorelist.
42994330 assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) &&
4300 "Replacing out-of-tree value with undef");
4331 "Deleting out-of-tree value");
43014332 }
4333 }
43024334 #endif
4303 Value *Undef = UndefValue::get(Ty);
4304 Scalar->replaceAllUsesWith(Undef);
4305 }
43064335 LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
43074336 eraseInstruction(cast(Scalar));
43084337 }
43184347 << " gather sequences instructions.\n");
43194348 // LICM InsertElementInst sequences.
43204349 for (Instruction *I : GatherSeq) {
4321 if (!isa(I) && !isa(I))
4350 if (isDeleted(I))
43224351 continue;
43234352
43244353 // Check if this block is inside a loop.
43724401 // For all instructions in blocks containing gather sequences:
43734402 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
43744403 Instruction *In = &*it++;
4404 if (isDeleted(In))
4405 continue;
43754406 if (!isa(In) && !isa(In))
43764407 continue;
43774408
52545285 return Changed;
52555286 }
52565287
5257 /// Check that the Values in the slice in VL array are still existent in
5258 /// the WeakTrackingVH array.
5259 /// Vectorization of part of the VL array may cause later values in the VL array
5260 /// to become invalid. We track when this has happened in the WeakTrackingVH
5261 /// array.
5262 static bool hasValueBeenRAUWed(ArrayRef VL,
5263 ArrayRef VH, unsigned SliceBegin,
5264 unsigned SliceSize) {
5265 VL = VL.slice(SliceBegin, SliceSize);
5266 VH = VH.slice(SliceBegin, SliceSize);
5267 return !std::equal(VL.begin(), VL.end(), VH.begin());
5268 }
5269
52705288 bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R,
52715289 unsigned VecRegSize) {
52725290 const unsigned ChainLen = Chain.size();
52785296 if (!isPowerOf2_32(Sz) || VF < 2)
52795297 return false;
52805298
5281 // Keep track of values that were deleted by vectorizing in the loop below.
5282 const SmallVector TrackValues(Chain.begin(), Chain.end());
5283
52845299 bool Changed = false;
52855300 // Look for profitable vectorizable trees at all offsets, starting at zero.
52865301 for (unsigned i = 0, e = ChainLen; i + VF <= e; ++i) {
52875302
5303 ArrayRef Operands = Chain.slice(i, VF);
52885304 // Check that a previous iteration of this loop did not delete the Value.
5289 if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
5305 if (llvm::any_of(Operands, [&R](Value *V) {
5306 auto *I = dyn_cast(V);
5307 return I && R.isDeleted(I);
5308 }))
52905309 continue;
52915310
52925311 LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
52935312 << "\n");
5294 ArrayRef Operands = Chain.slice(i, VF);
52955313
52965314 R.buildTree(Operands);
52975315 if (R.isTreeTinyAndNotFullyVectorizable())
54835501 bool CandidateFound = false;
54845502 int MinCost = SLPCostThreshold;
54855503
5486 // Keep track of values that were deleted by vectorizing in the loop below.
5487 SmallVector TrackValues(VL.begin(), VL.end());
5488
54895504 unsigned NextInst = 0, MaxInst = VL.size();
54905505 for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) {
54915506 // No actual vectorization should happen, if number of parts is the same as
55055520 if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
55065521 break;
55075522
5523 ArrayRef Ops = VL.slice(I, OpsWidth);
55085524 // Check that a previous iteration of this loop did not delete the Value.
5509 if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth))
5525 if (llvm::any_of(Ops, [&R](Value *V) {
5526 auto *I = dyn_cast(V);
5527 return I && R.isDeleted(I);
5528 }))
55105529 continue;
55115530
55125531 LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
55135532 << "\n");
5514 ArrayRef Ops = VL.slice(I, OpsWidth);
55155533
55165534 R.buildTree(Ops);
55175535 Optional> Order = R.bestOrder();
57325750 case RK_Min:
57335751 Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS)
57345752 : Builder.CreateFCmpOLT(LHS, RHS);
5735 break;
5753 return Builder.CreateSelect(Cmp, LHS, RHS, Name);
57365754 case RK_Max:
57375755 Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS)
57385756 : Builder.CreateFCmpOGT(LHS, RHS);
5739 break;
5757 return Builder.CreateSelect(Cmp, LHS, RHS, Name);
57405758 case RK_UMin:
57415759 assert(Opcode == Instruction::ICmp && "Expected integer types.");
57425760 Cmp = Builder.CreateICmpULT(LHS, RHS);
5743 break;
5761 return Builder.CreateSelect(Cmp, LHS, RHS, Name);
57445762 case RK_UMax:
57455763 assert(Opcode == Instruction::ICmp && "Expected integer types.");
57465764 Cmp = Builder.CreateICmpUGT(LHS, RHS);
5765 return Builder.CreateSelect(Cmp, LHS, RHS, Name);
5766 case RK_None:
57475767 break;
5748 case RK_None:
5749 llvm_unreachable("Unknown reduction operation.");
5750 }
5751 return Builder.CreateSelect(Cmp, LHS, RHS, Name);
5768 }
5769 llvm_unreachable("Unknown reduction operation.");
57525770 }
57535771
57545772 public:
64286446 }
64296447 // Update users.
64306448 ReductionRoot->replaceAllUsesWith(VectorizedTree);
6449 // Mark all scalar reduction ops for deletion, they are replaced by the
6450 // vector reductions.
6451 V.eraseInstructions(IgnoreList);
64316452 }
64326453 return VectorizedTree != nullptr;
64336454 }
66826703 // horizontal reduction.
66836704 // Interrupt the process if the Root instruction itself was vectorized or all
66846705 // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
6685 SmallVectorWeakTrackingVH, unsigned>, 8> Stack(1, {Root, 0});
6706 SmallVectorInstruction *, unsigned>, 8> Stack(1, {Root, 0});
66866707 SmallPtrSet VisitedInstrs;
66876708 bool Res = false;
66886709 while (!Stack.empty()) {
6689 Value *V;
6710 Instruction *Inst;
66906711 unsigned Level;
6691 std::tie(V, Level) = Stack.pop_back_val();
6692 if (!V)
6693 continue;
6694 auto *Inst = dyn_cast(V);
6695 if (!Inst)
6696 continue;
6712 std::tie(Inst, Level) = Stack.pop_back_val();
66976713 auto *BI = dyn_cast(Inst);
66986714 auto *SI = dyn_cast(Inst);
66996715 if (BI || SI) {
67346750 for (auto *Op : Inst->operand_values())
67356751 if (VisitedInstrs.insert(Op).second)
67366752 if (auto *I = dyn_cast(Op))
6737 if (!isa(I) && I->getParent() == BB)
6738 Stack.emplace_back(Op, Level);
6753 if (!isa(I) && !R.isDeleted(I) && I->getParent() == BB)
6754 Stack.emplace_back(I, Level);
67396755 }
67406756 return Res;
67416757 }
68046820 }
68056821
68066822 bool SLPVectorizerPass::vectorizeSimpleInstructions(
6807 SmallVectorImpl<WeakVH> &Instructions, BasicBlock *BB, BoUpSLP &R) {
6823 SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R) {
68086824 bool OpsChanged = false;
6809 for (auto &VH : reverse(Instructions)) {
6810 auto *I = dyn_cast_or_null(VH);
6811 if (!I)
6825 for (auto *I : reverse(Instructions)) {
6826 if (R.isDeleted(I))
68126827 continue;
68136828 if (auto *LastInsertValue = dyn_cast(I))
68146829 OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
68376852 if (!P)
68386853 break;
68396854
6840 if (!VisitedInstrs.count(P))
6855 if (!VisitedInstrs.count(P) && !R.isDeleted(P))
68416856 Incoming.push_back(P);
68426857 }
68436858
68816896
68826897 VisitedInstrs.clear();
68836898
6884 SmallVector<WeakVH, 8> PostProcessInstructions;
6899 SmallVector<Instruction *, 8> PostProcessInstructions;
68856900 SmallDenseSet KeyNodes;
68866901 for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
6902 // Skip instructions marked for the deletion.
6903 if (R.isDeleted(&*it))
6904 continue;
68876905 // We may go through BB multiple times so skip the one we have checked.
68886906 if (!VisitedInstrs.insert(&*it).second) {
68896907 if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
69766994 SetVector Candidates(GEPList.begin(), GEPList.end());
69776995
69786996 // Some of the candidates may have already been vectorized after we
6979 // initially collected them. If so, the WeakTrackingVHs will have
6980 // nullified the
6981 // values, so remove them from the set of candidates.
6982 Candidates.remove(nullptr);
6997 // initially collected them. If so, they are marked as deleted, so remove
6998 // them from the set of candidates.
6999 Candidates.remove_if(
7000 [&R](Value *I) { return R.isDeleted(cast(I)); });
69837001
69847002 // Remove from the set of candidates all pairs of getelementptrs with
69857003 // constant differences. Such getelementptrs are likely not good
69877005 // computed from the other. We also ensure all candidate getelementptr
69887006 // indices are unique.
69897007 for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) {
6990 auto *GEPI = cast(GEPList[I]);
7008 auto *GEPI = GEPList[I];
69917009 if (!Candidates.count(GEPI))
69927010 continue;
69937011 auto *SCEVI = SE->getSCEV(GEPList[I]);
69947012 for (int J = I + 1; J < E && Candidates.size() > 1; ++J) {
6995 auto *GEPJ = cast(GEPList[J]);
7013 auto *GEPJ = GEPList[J];
69967014 auto *SCEVJ = SE->getSCEV(GEPList[J]);
69977015 if (isa(SE->getMinusSCEV(SCEVI, SCEVJ))) {
6998 Candidates.remove(GEPList[I]);
6999 Candidates.remove(GEPList[J]);
7016 Candidates.remove(GEPI);
7017 Candidates.remove(GEPJ);
70007018 } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) {
7001 Candidates.remove(GEPList[J]);
7019 Candidates.remove(GEPJ);
70027020 }
70037021 }
70047022 }
1616 ; DEFAULT: for.body:
1717 ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
1818 ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32>
19 ; DEFAULT-NEXT: [[P20:%.*]] = add i32 [[P17]], undef
20 ; DEFAULT-NEXT: [[P22:%.*]] = add i32 [[P20]], undef
21 ; DEFAULT-NEXT: [[P24:%.*]] = add i32 [[P22]], undef
22 ; DEFAULT-NEXT: [[P26:%.*]] = add i32 [[P24]], undef
23 ; DEFAULT-NEXT: [[P28:%.*]] = add i32 [[P26]], undef
24 ; DEFAULT-NEXT: [[P30:%.*]] = add i32 [[P28]], undef
25 ; DEFAULT-NEXT: [[P32:%.*]] = add i32 [[P30]], undef
2619 ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
2720 ; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[P17]]
28 ; DEFAULT-NEXT: [[P34:%.*]] = add i32 [[P32]], undef
2921 ; DEFAULT-NEXT: br label [[FOR_BODY]]
3022 ;
3123 ; GATHER-LABEL: @PR28330(
3527 ; GATHER-NEXT: br label [[FOR_BODY:%.*]]
3628 ; GATHER: for.body:
3729 ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
38 ; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
39 ; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0
40 ; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
41 ; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1
42 ; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
43 ; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2
44 ; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
45 ; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3
46 ; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
47 ; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4
48 ; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
49 ; GATHER-NEXT: [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5
50 ; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
51 ; GATHER-NEXT: [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6
52 ; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
53 ; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7
30 ; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
31 ; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
32 ; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> undef, i1 [[TMP3]], i32 0
33 ; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
34 ; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1
35 ; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
36 ; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1 [[TMP7]], i32 2
37 ; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
38 ; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1 [[TMP9]], i32 3
39 ; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
40 ; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP11]], i32 4
41 ; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
42 ; GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1 [[TMP13]], i32 5
43 ; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
44 ; GATHER-NEXT: [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1 [[TMP15]], i32 6
45 ; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1 [[TMP2]], i32 7
5446 ; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> , <8 x i32>
5547 ; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0
56 ; GATHER-NEXT: [[P20:%.*]] = add i32 [[P17]], [[TMP19]]
5748 ; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1
58 ; GATHER-NEXT: [[P22:%.*]] = add i32 [[P20]], [[TMP20]]
5949 ; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2
60 ; GATHER-NEXT: [[P24:%.*]] = add i32 [[P22]], [[TMP21]]
6150 ; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3
62 ; GATHER-NEXT: [[P26:%.*]] = add i32 [[P24]], [[TMP22]]
6351 ; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4
64 ; GATHER-NEXT: [[P28:%.*]] = add i32 [[P26]], [[TMP23]]
6552 ; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5
66 ; GATHER-NEXT: [[P30:%.*]] = add i32 [[P28]], [[TMP24]]
6753 ; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6
68 ; GATHER-NEXT: [[P32:%.*]] = add i32 [[P30]], [[TMP25]]
6954 ; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
7055 ; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1
7156 ; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2
7762 ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7
7863 ; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
7964 ; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], [[P17]]
80 ; GATHER-NEXT: [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
8165 ; GATHER-NEXT: br label [[FOR_BODY]]
8266 ;
8367 ; MAX-COST-LABEL: @PR28330(
168152 ; DEFAULT: for.body:
169153 ; DEFAULT-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
170154 ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32>
171 ; DEFAULT-NEXT: [[P20:%.*]] = add i32 -5, undef
172 ; DEFAULT-NEXT: [[P22:%.*]] = add i32 [[P20]], undef
173 ; DEFAULT-NEXT: [[P24:%.*]] = add i32 [[P22]], undef
174 ; DEFAULT-NEXT: [[P26:%.*]] = add i32 [[P24]], undef
175 ; DEFAULT-NEXT: [[P28:%.*]] = add i32 [[P26]], undef
176 ; DEFAULT-NEXT: [[P30:%.*]] = add i32 [[P28]], undef
177 ; DEFAULT-NEXT: [[P32:%.*]] = add i32 [[P30]], undef
178155 ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
179156 ; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5
180 ; DEFAULT-NEXT: [[P34:%.*]] = add i32 [[P32]], undef
181157 ; DEFAULT-NEXT: br label [[FOR_BODY]]
182158 ;
183159 ; GATHER-LABEL: @PR32038(
187163 ; GATHER-NEXT: br label [[FOR_BODY:%.*]]
188164 ; GATHER: for.body:
189165 ; GATHER-NEXT: [[P17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
190 ; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
191 ; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0
192 ; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
193 ; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1
194 ; GATHER-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
195 ; GATHER-NEXT: [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2
196 ; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
197 ; GATHER-NEXT: [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3
198 ; GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
199 ; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4
200 ; GATHER-NEXT: [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
201 ; GATHER-NEXT: [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5
202 ; GATHER-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
203 ; GATHER-NEXT: [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6
204 ; GATHER-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
205 ; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7
166 ; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
167 ; GATHER-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
168 ; GATHER-NEXT: [[TMP4:%.*]] = insertelement <8 x i1> undef, i1 [[TMP3]], i32 0
169 ; GATHER-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
170 ; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> [[TMP4]], i1 [[TMP5]], i32 1
171 ; GATHER-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
172 ; GATHER-NEXT: [[TMP8:%.*]] = insertelement <8 x i1> [[TMP6]], i1 [[TMP7]], i32 2
173 ; GATHER-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
174 ; GATHER-NEXT: [[TMP10:%.*]] = insertelement <8 x i1> [[TMP8]], i1 [[TMP9]], i32 3
175 ; GATHER-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
176 ; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP11]], i32 4
177 ; GATHER-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
178 ; GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i1> [[TMP12]], i1 [[TMP13]], i32 5
179 ; GATHER-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
180 ; GATHER-NEXT: [[TMP16:%.*]] = insertelement <8 x i1> [[TMP14]], i1 [[TMP15]], i32 6
181 ; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP16]], i1 [[TMP2]], i32 7
206182 ; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> , <8 x i32>
207183 ; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0
208 ; GATHER-NEXT: [[P20:%.*]] = add i32 -5, [[TMP19]]
209184 ; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1
210 ; GATHER-NEXT: [[P22:%.*]] = add i32 [[P20]], [[TMP20]]
211185 ; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2
212 ; GATHER-NEXT: [[P24:%.*]] = add i32 [[P22]], [[TMP21]]
213186 ; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3
214 ; GATHER-NEXT: [[P26:%.*]] = add i32 [[P24]], [[TMP22]]
215187 ; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4
216 ; GATHER-NEXT: [[P28:%.*]] = add i32 [[P26]], [[TMP23]]
217188 ; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5
218 ; GATHER-NEXT: [[P30:%.*]] = add i32 [[P28]], [[TMP24]]
219189 ; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6
220 ; GATHER-NEXT: [[P32:%.*]] = add i32 [[P30]], [[TMP25]]
221190 ; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
222191 ; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1
223192 ; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2
229198 ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7
230199 ; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP34]])
231200 ; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], -5
232 ; GATHER-NEXT: [[P34:%.*]] = add i32 [[P32]], [[TMP33]]
233201 ; GATHER-NEXT: br label [[FOR_BODY]]
234202 ;
235203 ; MAX-COST-LABEL: @PR32038(
258226 ; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[P5]], i32 2
259227 ; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[P7]], i32 3
260228 ; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> , <4 x i32>
261 ; MAX-COST-NEXT: [[P20:%.*]] = add i32 -5, undef
262 ; MAX-COST-NEXT: [[P22:%.*]] = add i32 [[P20]], undef
263 ; MAX-COST-NEXT: [[P24:%.*]] = add i32 [[P22]], undef
264 ; MAX-COST-NEXT: [[P26:%.*]] = add i32 [[P24]], undef
265229 ; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80
266 ; MAX-COST-NEXT: [[P28:%.*]] = add i32 [[P26]], [[P27]]
267230 ; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80
268231 ; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
269232 ; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[P27]]
270233 ; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[P29]]
271234 ; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5
272 ; MAX-COST-NEXT: [[P30:%.*]] = add i32 [[P28]], [[P29]]
273235 ; MAX-COST-NEXT: [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80
274236 ; MAX-COST-NEXT: [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]]
275237 ; MAX-COST-NEXT: [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80
4545 ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], zeroinitializer
4646 ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP4]]
4747 ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
48 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_026]]
49 ; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD]], undef
50 ; CHECK-NEXT: [[ADD19:%.*]] = add nsw i32 [[ADD11]], undef
5148 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
5249 ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP8]], [[S_026]]
53 ; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD19]], undef
5450 ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i32, i32* [[P1_023]], i64 [[IDX_EXT]]
5551 ; CHECK-NEXT: [[ADD_PTR29]] = getelementptr inbounds i32, i32* [[P2_024]], i64 [[IDX_EXT]]
5652 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[J_025]], 1
172168 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[P2_018]] to <4 x i32>*
173169 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
174170 ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], [[TMP1]]
175 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_020]]
176 ; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], undef
177 ; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD5]], undef
178171 ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
179172 ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP5]], [[S_020]]
180 ; CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD9]], undef
181173 ; CHECK-NEXT: [[CMP14:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
182174 ; CHECK-NEXT: br i1 [[CMP14]], label [[IF_END]], label [[FOR_END_LOOPEXIT:%.*]]
183175 ; CHECK: if.end:
292284 ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[TMP6]], zeroinitializer
293285 ; CHECK-NEXT: [[TMP8:%.*]] = sub nsw <8 x i32> zeroinitializer, [[TMP6]]
294286 ; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> [[TMP6]]
295 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, [[S_047]]
296 ; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD]], undef
297 ; CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 [[ADD16]], undef
298 ; CHECK-NEXT: [[ADD38:%.*]] = add nsw i32 [[ADD27]], undef
299 ; CHECK-NEXT: [[ADD49:%.*]] = add nsw i32 [[ADD38]], undef
300 ; CHECK-NEXT: [[ADD60:%.*]] = add nsw i32 [[ADD49]], undef
301 ; CHECK-NEXT: [[ADD71:%.*]] = add nsw i32 [[ADD60]], undef
302287 ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
303288 ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP10]], [[S_047]]
304 ; CHECK-NEXT: [[ADD82:%.*]] = add nsw i32 [[ADD71]], undef
305289 ; CHECK-NEXT: [[CMP83:%.*]] = icmp slt i32 [[OP_EXTRA]], [[LIM:%.*]]
306290 ; CHECK-NEXT: br i1 [[CMP83]], label [[IF_END_86]], label [[FOR_END_LOOPEXIT:%.*]]
307291 ; CHECK: if.end.86:
1212 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[I:%.*]], metadata !19, metadata !DIExpression()), !dbg !24
1313 ; CHECK-NEXT: call void @llvm.dbg.value(metadata %struct.S* [[P:%.*]], metadata !20, metadata !DIExpression()), !dbg !25
1414 ; CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P]], i64 [[N]], i32 0, !dbg !26
15 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata !21, metadata !DIExpression()), !dbg !27
15 ; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, metadata !21, metadata !DIExpression()), !dbg !27
1616 ; CHECK-NEXT: [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[N]], i32 1, !dbg !28
1717 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg !26
1818 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg !26, !tbaa !29
19 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata !22, metadata !DIExpression()), !dbg !33
19 ; CHECK-NEXT: call void @llvm.dbg.value(metadata !2, metadata !22, metadata !DIExpression()), !dbg !33
2020 ; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 0, !dbg !34
2121 ; CHECK-NEXT: [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 1, !dbg !35
2222 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg !36
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt -slp-vectorizer -S -o - -mtriple=i386 -mcpu=haswell < %s | FileCheck %s
2 target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
3
4 @shift = common local_unnamed_addr global [10 x i32] zeroinitializer, align 4
5 @data = common local_unnamed_addr global [10 x i8*] zeroinitializer, align 4
6
7 define void @flat(i32 %intensity) {
8 ; CHECK-LABEL: @flat(
9 ; CHECK-NEXT: entry:
10 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4
11 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4
12 ; CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4
13 ; CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4
14 ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 1, [[TMP0]]
15 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 [[SHR]]
16 ; CHECK-NEXT: [[SHR1:%.*]] = lshr i32 1, [[TMP1]]
17 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 [[SHR1]]
18 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
19 ; CHECK: for.cond.cleanup:
20 ; CHECK-NEXT: ret void
21 ; CHECK: for.body:
22 ; CHECK-NEXT: [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ]
23 ; CHECK-NEXT: [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
24 ; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
25 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP4]] to i32
26 ; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[CONV]], -128
27 ; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
28 ; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP5]] to i32
29 ; CHECK-NEXT: [[SUB4:%.*]] = add nsw i32 [[CONV3]], -128
30 ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[SUB]], -1
31 ; CHECK-NEXT: [[SUB7:%.*]] = sub nsw i32 128, [[CONV]]
32 ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP5]], i32 [[SUB]], i32 [[SUB7]]
33 ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[SUB4]], -1
34 ; CHECK-NEXT: [[SUB12:%.*]] = sub nsw i32 128, [[CONV3]]
35 ; CHECK-NEXT: [[COND14:%.*]] = select i1 [[CMP8]], i32 [[SUB4]], i32 [[SUB12]]
36 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[COND14]], [[COND]]
37 ; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]]
38 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[IDX_NEG]]
39 ; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[ADD_PTR]], align 1
40 ; CHECK-NEXT: [[CONV15:%.*]] = zext i8 [[TMP6]] to i32
41 ; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], [[INTENSITY:%.*]]
42 ; CHECK-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8
43 ; CHECK-NEXT: store i8 [[CONV17]], i8* [[ADD_PTR]], align 1
44 ; CHECK-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[ADD]]
45 ; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[ADD_PTR18]], align 1
46 ; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP7]], 0
47 ; CHECK-NEXT: [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8
48 ; CHECK-NEXT: store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1
49 ; CHECK-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[TMP1]]
50 ; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
51 ; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP8]] to i32
52 ; CHECK-NEXT: [[SUB_1:%.*]] = add nsw i32 [[CONV_1]], -128
53 ; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1
54 ; CHECK-NEXT: [[CONV3_1:%.*]] = zext i8 [[TMP9]] to i32
55 ; CHECK-NEXT: [[SUB4_1:%.*]] = add nsw i32 [[CONV3_1]], -128
56 ; CHECK-NEXT: [[CMP5_1:%.*]] = icmp sgt i32 [[SUB_1]], -1
57 ; CHECK-NEXT: [[SUB7_1:%.*]] = sub nsw i32 128, [[CONV_1]]
58 ; CHECK-NEXT: [[COND_1:%.*]] = select i1 [[CMP5_1]], i32 [[SUB_1]], i32 [[SUB7_1]]
59 ; CHECK-NEXT: [[CMP8_1:%.*]] = icmp sgt i32 [[SUB4_1]], -1
60 ; CHECK-NEXT: [[SUB12_1:%.*]] = sub nsw i32 128, [[CONV3_1]]
61 ; CHECK-NEXT: [[COND14_1:%.*]] = select i1 [[CMP8_1]], i32 [[SUB4_1]], i32 [[SUB12_1]]
62 ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[COND14_1]], [[COND_1]]
63 ; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]]
64 ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[IDX_NEG_1]]
65 ; CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1
66 ; CHECK-NEXT: [[CONV15_1:%.*]] = zext i8 [[TMP10]] to i32
67 ; CHECK-NEXT: [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]], [[INTENSITY]]
68 ; CHECK-NEXT: [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8
69 ; CHECK-NEXT: store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1
70 ; CHECK-NEXT: [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[ADD_1]]
71 ; CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1
72 ; CHECK-NEXT: [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP11]], 0
73 ; CHECK-NEXT: [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8
74 ; CHECK-NEXT: store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1
75 ; CHECK-NEXT: [[ADD_PTR23_1]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[TMP1]]
76 ; CHECK-NEXT: [[INC_1]] = add nsw i32 [[Y_045]], 2
77 ; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128
78 ; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
79 ;
80 entry:
81 %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4
82 %1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4
83 %2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4
84 %3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4
85 %shr = lshr i32 1, %0
86 %arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr
87 %shr1 = lshr i32 1, %1
88 %arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1
89 br label %for.body
90
91 for.cond.cleanup: ; preds = %for.body
92 ret void
93
94 for.body: ; preds = %for.body, %entry
95 %d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ]
96 %y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ]
97 %4 = load i8, i8* %arrayidx, align 1
98 %conv = zext i8 %4 to i32
99 %sub = add nsw i32 %conv, -128
100 %5 = load i8, i8* %arrayidx2, align 1
101 %conv3 = zext i8 %5 to i32
102 %sub4 = add nsw i32 %conv3, -128
103 %cmp5 = icmp sgt i32 %sub, -1
104 %sub7 = sub nsw i32 128, %conv
105 %cond = select i1 %cmp5, i32 %sub, i32 %sub7
106 %cmp8 = icmp sgt i32 %sub4, -1
107 %sub12 = sub nsw i32 128, %conv3
108 %cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12
109 %add = add nsw i32 %cond14, %cond
110 %idx.neg = sub nsw i32 0, %add
111 %add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg
112 %6 = load i8, i8* %add.ptr, align 1
113 %conv15 = zext i8 %6 to i32
114 %add16 = add nsw i32 %conv15, %intensity
115 %conv17 = trunc i32 %add16 to i8
116 store i8 %conv17, i8* %add.ptr, align 1
117 %add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add
118 %7 = load i8, i8* %add.ptr18, align 1
119 %not.tobool = icmp eq i8 %7, 0
120 %conv21 = zext i1 %not.tobool to i8
121 store i8 %conv21, i8* %add.ptr18, align 1
122 %add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1
123 %8 = load i8, i8* %arrayidx, align 1
124 %conv.1 = zext i8 %8 to i32
125 %sub.1 = add nsw i32 %conv.1, -128
126 %9 = load i8, i8* %arrayidx2, align 1
127 %conv3.1 = zext i8 %9 to i32
128 %sub4.1 = add nsw i32 %conv3.1, -128
129 %cmp5.1 = icmp sgt i32 %sub.1, -1
130 %sub7.1 = sub nsw i32 128, %conv.1
131 %cond.1 = select i1 %cmp5.1, i32 %sub.1, i32 %sub7.1
132 %cmp8.1 = icmp sgt i32 %sub4.1, -1
133 %sub12.1 = sub nsw i32 128, %conv3.1
134 %cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1
135 %add.1 = add nsw i32 %cond14.1, %cond.1
136 %idx.neg.1 = sub nsw i32 0, %add.1
137 %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1
138 %10 = load i8, i8* %add.ptr.1, align 1
139 %conv15.1 = zext i8 %10 to i32
140 %add16.1 = add nsw i32 %conv15.1, %intensity
141 %conv17.1 = trunc i32 %add16.1 to i8
142 store i8 %conv17.1, i8* %add.ptr.1, align 1
143 %add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1
144 %11 = load i8, i8* %add.ptr18.1, align 1
145 %not.tobool.1 = icmp eq i8 %11, 0
146 %conv21.1 = zext i1 %not.tobool.1 to i8
147 store i8 %conv21.1, i8* %add.ptr18.1, align 1
148 %add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1
149 %inc.1 = add nsw i32 %y.045, 2
150 %exitcond.1 = icmp eq i32 %inc.1, 128
151 br i1 %exitcond.1, label %for.cond.cleanup, label %for.body
152 }
1717 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
1818 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
1919 ; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP4]], [[TMP4]]
20 ; CHECK-NEXT: [[TMP9:%.*]] = add i32 1, undef
21 ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP7]]
22 ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], undef
23 ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], [[TMP6]]
24 ; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], undef
25 ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP6]] to i64
26 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP5]]
20 ; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
2721 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> undef, <4 x i32>
2822 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP8]], [[RDX_SHUF]]
2923 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32>
3024 ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
31 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
32 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP16]], 1
25 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
26 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP10]], 1
3327 ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add i32 [[OP_EXTRA]], [[TMP7]]
3428 ; CHECK-NEXT: [[OP_EXTRA4:%.*]] = add i32 [[OP_EXTRA3]], [[TMP6]]
3529 ; CHECK-NEXT: [[OP_EXTRA5]] = add i32 [[OP_EXTRA4]], [[TMP5]]
36 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP15]], undef
3730 ; CHECK-NEXT: br label [[LOOP]]
3831 ; CHECK: bail_out:
3932 ; CHECK-NEXT: ret void
1919 ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32
2020 ; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> , [[TMP5]]
2121 ; CHECK-NEXT: [[TMP9:%.*]] = ashr exact <4 x i64> [[TMP8]],
22 ; CHECK-NEXT: [[SUM1:%.*]] = add i64 undef, undef
23 ; CHECK-NEXT: [[SUM2:%.*]] = add i64 [[SUM1]], undef
24 ; CHECK-NEXT: [[ZSUM:%.*]] = add i64 [[SUM2]], 0
25 ; CHECK-NEXT: [[JOIN:%.*]] = add i64 [[TMP6]], [[ZSUM]]
2622 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> undef, <4 x i32>
2723 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP9]], [[RDX_SHUF]]
2824 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32>
3026 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[BIN_RDX2]], i32 0
3127 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add i64 [[TMP10]], 0
3228 ; CHECK-NEXT: [[OP_EXTRA3]] = add i64 [[OP_EXTRA]], [[TMP6]]
33 ; CHECK-NEXT: [[LAST:%.*]] = add i64 [[JOIN]], undef
3429 ; CHECK-NEXT: br label [[LOOP]]
3530 ;
3631 entry:
1010 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32>
1111 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
1212 ; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]],
13 ; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
14 ; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
15 ; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
16 ; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
17 ; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
18 ; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
19 ; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
20 ; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
21 ; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
22 ; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
23 ; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
24 ; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
25 ; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
26 ; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
27 ; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
28 ; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
29 ; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], undef
30 ; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
31 ; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
32 ; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
33 ; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
34 ; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
35 ; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
36 ; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
37 ; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
38 ; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
39 ; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
40 ; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
41 ; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
42 ; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], undef
43 ; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
44 ; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
45 ; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
46 ; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], undef
4713 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32>
4814 ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP3]], [[RDX_SHUF]]
4915 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
5117 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32>
5218 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
5319 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
54 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0]]
20 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP4]], [[TMP0:%.*]]
5521 ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
5622 ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = and i32 [[OP_EXTRA5]], [[TMP0]]
5723 ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = and i32 [[OP_EXTRA6]], [[TMP0]]
7844 ; CHECK-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
7945 ; CHECK-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP0]]
8046 ; CHECK-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
81 ; CHECK-NEXT: [[VAL_42:%.*]] = and i32 [[VAL_40]], undef
8247 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0
8348 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 14910, i32 1
8449 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
10065 ; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32>
10166 ; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
10267 ; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]],
103 ; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef
104 ; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
105 ; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
106 ; FORCE_REDUCTION-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
107 ; FORCE_REDUCTION-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
108 ; FORCE_REDUCTION-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], undef
109 ; FORCE_REDUCTION-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
110 ; FORCE_REDUCTION-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
111 ; FORCE_REDUCTION-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
112 ; FORCE_REDUCTION-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], undef
113 ; FORCE_REDUCTION-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
114 ; FORCE_REDUCTION-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
115 ; FORCE_REDUCTION-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
116 ; FORCE_REDUCTION-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
117 ; FORCE_REDUCTION-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
118 ; FORCE_REDUCTION-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], undef
11968 ; FORCE_REDUCTION-NEXT: [[VAL_20:%.*]] = add i32 [[TMP2]], 1496
120 ; FORCE_REDUCTION-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]]
121 ; FORCE_REDUCTION-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
122 ; FORCE_REDUCTION-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
123 ; FORCE_REDUCTION-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
124 ; FORCE_REDUCTION-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
125 ; FORCE_REDUCTION-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
126 ; FORCE_REDUCTION-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
127 ; FORCE_REDUCTION-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
128 ; FORCE_REDUCTION-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
129 ; FORCE_REDUCTION-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
130 ; FORCE_REDUCTION-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
131 ; FORCE_REDUCTION-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
132 ; FORCE_REDUCTION-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
13369 ; FORCE_REDUCTION-NEXT: [[VAL_34:%.*]] = add i32 [[TMP2]], 8555
134 ; FORCE_REDUCTION-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]]
135 ; FORCE_REDUCTION-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
136 ; FORCE_REDUCTION-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
13770 ; FORCE_REDUCTION-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> undef, <4 x i32>
13871 ; FORCE_REDUCTION-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP3]], [[RDX_SHUF]]
13972 ; FORCE_REDUCTION-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32>
14174 ; FORCE_REDUCTION-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
14275 ; FORCE_REDUCTION-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], [[VAL_20]]
14376 ; FORCE_REDUCTION-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], [[VAL_34]]
144 ; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0]]
77 ; FORCE_REDUCTION-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP6]], [[TMP0:%.*]]
14578 ; FORCE_REDUCTION-NEXT: [[OP_EXTRA3:%.*]] = and i32 [[OP_EXTRA]], [[TMP0]]
14679 ; FORCE_REDUCTION-NEXT: [[OP_EXTRA4:%.*]] = and i32 [[OP_EXTRA3]], [[TMP0]]
14780 ; FORCE_REDUCTION-NEXT: [[OP_EXTRA5:%.*]] = and i32 [[OP_EXTRA4]], [[TMP0]]
169102 ; FORCE_REDUCTION-NEXT: [[OP_EXTRA27:%.*]] = and i32 [[OP_EXTRA26]], [[TMP0]]
170103 ; FORCE_REDUCTION-NEXT: [[OP_EXTRA28:%.*]] = and i32 [[OP_EXTRA27]], [[TMP0]]
171104 ; FORCE_REDUCTION-NEXT: [[OP_EXTRA29:%.*]] = and i32 [[OP_EXTRA28]], [[TMP2]]
172 ; FORCE_REDUCTION-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
173105 ; FORCE_REDUCTION-NEXT: [[VAL_39:%.*]] = add i32 [[TMP2]], 12529
174106 ; FORCE_REDUCTION-NEXT: [[VAL_40:%.*]] = and i32 [[OP_EXTRA29]], [[VAL_39]]
175107 ; FORCE_REDUCTION-NEXT: [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
1212 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15
1313 ; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered, align 4
1414 ; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]],
15 ; CHECK-NEXT: [[V14:%.*]] = and i32 [[TMP2]], undef
16 ; CHECK-NEXT: [[V16:%.*]] = and i32 undef, [[V14]]
17 ; CHECK-NEXT: [[V18:%.*]] = and i32 undef, [[V16]]
18 ; CHECK-NEXT: [[V20:%.*]] = and i32 undef, [[V18]]
19 ; CHECK-NEXT: [[V22:%.*]] = and i32 undef, [[V20]]
20 ; CHECK-NEXT: [[V24:%.*]] = and i32 undef, [[V22]]
21 ; CHECK-NEXT: [[V26:%.*]] = and i32 undef, [[V24]]
22 ; CHECK-NEXT: [[V28:%.*]] = and i32 undef, [[V26]]
23 ; CHECK-NEXT: [[V30:%.*]] = and i32 undef, [[V28]]
24 ; CHECK-NEXT: [[V32:%.*]] = and i32 undef, [[V30]]
25 ; CHECK-NEXT: [[V34:%.*]] = and i32 undef, [[V32]]
26 ; CHECK-NEXT: [[V36:%.*]] = and i32 undef, [[V34]]
27 ; CHECK-NEXT: [[V38:%.*]] = and i32 undef, [[V36]]
28 ; CHECK-NEXT: [[V40:%.*]] = and i32 undef, [[V38]]
29 ; CHECK-NEXT: [[V42:%.*]] = and i32 undef, [[V40]]
3015 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> undef, <16 x i32>
3116 ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <16 x i32> [[TMP4]], [[RDX_SHUF]]
3217 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32>
3722 ; CHECK-NEXT: [[BIN_RDX6:%.*]] = and <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
3823 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
3924 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = and i32 [[TMP5]], [[TMP2]]
40 ; CHECK-NEXT: [[V43:%.*]] = and i32 undef, [[V42]]
4125 ; CHECK-NEXT: [[V44:%.*]] = add i32 [[TMP2]], 16
4226 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> undef, i32 [[V44]], i32 0
4327 ; CHECK-NEXT: [[TMP7]] = insertelement <2 x i32> [[TMP6]], i32 [[OP_EXTRA]], i32 1
2929 ; CHECK-NEXT: [[SH4:%.*]] = shl nuw nsw i64 [[Z4]], 24
3030 ; CHECK-NEXT: [[SH5:%.*]] = shl nuw nsw i64 [[Z5]], 16
3131 ; CHECK-NEXT: [[SH6:%.*]] = shl nuw nsw i64 [[Z6]], 8
32 ; CHECK-NEXT: [[OR01:%.*]] = or i64 undef, undef
33 ; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], undef
34 ; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], undef
35 ; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], [[SH4]]
36 ; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], [[SH5]]
37 ; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], [[SH6]]
3832 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32>
3933 ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
4034 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32>
4438 ; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[SH5]]
4539 ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[SH6]]
4640 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z7]]
47 ; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], [[Z7]]
4841 ; CHECK-NEXT: ret i64 [[OP_EXTRA]]
4942 ;
5043 %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0
107100 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
108101 ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
109102 ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]],
110 ; CHECK-NEXT: [[OR01:%.*]] = or i64 undef, undef
111 ; CHECK-NEXT: [[OR012:%.*]] = or i64 [[OR01]], undef
112 ; CHECK-NEXT: [[OR0123:%.*]] = or i64 [[OR012]], undef
113 ; CHECK-NEXT: [[OR01234:%.*]] = or i64 [[OR0123]], undef
114 ; CHECK-NEXT: [[OR012345:%.*]] = or i64 [[OR01234]], undef
115 ; CHECK-NEXT: [[OR0123456:%.*]] = or i64 [[OR012345]], undef
116103 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32>
117104 ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
118105 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32>
120107 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32>
121108 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]]
122109 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0
123 ; CHECK-NEXT: [[OR01234567:%.*]] = or i64 [[OR0123456]], undef
124110 ; CHECK-NEXT: ret i64 [[TMP5]]
125111 ;
126112 %g0 = getelementptr inbounds %v8i8, %v8i8* %p, i64 0, i32 0
195181 ; CHECK-NEXT: [[S5:%.*]] = shl nuw nsw i64 [[Z5]], 40
196182 ; CHECK-NEXT: [[S6:%.*]] = shl nuw nsw i64 [[Z6]], 48
197183 ; CHECK-NEXT: [[S7:%.*]] = shl nuw i64 [[Z7]], 56
198 ; CHECK-NEXT: [[O1:%.*]] = or i64 undef, [[Z0]]
199 ; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], undef
200 ; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], undef
201 ; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], undef
202 ; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], [[S5]]
203 ; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], [[S6]]
204184 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> undef, <4 x i32>
205185 ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i64> [[TMP4]], [[RDX_SHUF]]
206186 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i64> [[BIN_RDX]], <4 x i64> undef, <4 x i32>
210190 ; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[S6]]
211191 ; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP7]], [[S7]]
212192 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = or i64 [[TMP8]], [[Z0]]
213 ; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], [[S7]]
214193 ; CHECK-NEXT: ret i64 [[OP_EXTRA]]
215194 ;
216195 %g1 = getelementptr inbounds i8, i8* %arg, i64 1
271250 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
272251 ; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64>
273252 ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw <8 x i64> [[TMP3]],
274 ; CHECK-NEXT: [[O1:%.*]] = or i64 undef, undef
275 ; CHECK-NEXT: [[O2:%.*]] = or i64 [[O1]], undef
276 ; CHECK-NEXT: [[O3:%.*]] = or i64 [[O2]], undef
277 ; CHECK-NEXT: [[O4:%.*]] = or i64 [[O3]], undef
278 ; CHECK-NEXT: [[O5:%.*]] = or i64 [[O4]], undef
279 ; CHECK-NEXT: [[O6:%.*]] = or i64 [[O5]], undef
280253 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> undef, <8 x i32>
281254 ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i64> [[TMP4]], [[RDX_SHUF]]
282255 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i64> [[BIN_RDX]], <8 x i64> undef, <8 x i32>
284257 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i64> [[BIN_RDX2]], <8 x i64> undef, <8 x i32>
285258 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i64> [[BIN_RDX2]], [[RDX_SHUF3]]
286259 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[BIN_RDX4]], i32 0
287 ; CHECK-NEXT: [[O7:%.*]] = or i64 [[O6]], undef
288260 ; CHECK-NEXT: ret i64 [[TMP5]]
289261 ;
290262 %g1 = getelementptr inbounds i8, i8* %arg, i64 1
9999 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16
100100 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
101101 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
102 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
103 ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
104 ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
105 ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
106102 ; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
107103 ; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
108 ; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
109 ; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
110 ; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
111 ; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
112104 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32>
113105 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]]
114106 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32>
118110 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
119111 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
120112 ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]]
121 ; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
122113 ; CHECK-NEXT: store float [[OP_EXTRA5]], float* @res, align 4
123114 ; CHECK-NEXT: ret float [[OP_EXTRA5]]
124115 ;
130121 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16
131122 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16
132123 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]]
133 ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
134 ; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
135 ; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
136 ; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
137124 ; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
138125 ; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
139 ; THRESHOLD-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
140 ; THRESHOLD-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]]
141 ; THRESHOLD-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]]
142 ; THRESHOLD-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]]
143126 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32>
144127 ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]]
145128 ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32>
149132 ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
150133 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]]
151134 ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]]
152 ; THRESHOLD-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]]
153135 ; THRESHOLD-NEXT: store float [[OP_EXTRA5]], float* @res, align 4
154136 ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]]
155137 ;
204186 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
205187 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
206188 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
207 ; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef
208 ; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
209189 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32>
210190 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
211191 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
212192 ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
213 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
214 ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
215 ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
216 ; CHECK-NEXT: store float [[TMP8]], float* @res, align 4
217 ; CHECK-NEXT: ret float [[TMP8]]
193 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
194 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
195 ; CHECK-NEXT: store float [[TMP5]], float* @res, align 4
196 ; CHECK-NEXT: ret float [[TMP5]]
218197 ;
219198 ; THRESHOLD-LABEL: @bazzz(
220199 ; THRESHOLD-NEXT: entry:
223202 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
224203 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
225204 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
226 ; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef
227 ; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
228205 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32>
229206 ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
230207 ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
231208 ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
232 ; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
233 ; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
234 ; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
235 ; THRESHOLD-NEXT: store float [[TMP8]], float* @res, align 4
236 ; THRESHOLD-NEXT: ret float [[TMP8]]
209 ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
210 ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
211 ; THRESHOLD-NEXT: store float [[TMP5]], float* @res, align 4
212 ; THRESHOLD-NEXT: ret float [[TMP5]]
237213 ;
238214 entry:
239215 %0 = load i32, i32* @n, align 4
266242 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
267243 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
268244 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
269 ; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef
270 ; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
271245 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32>
272246 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
273247 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
274248 ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
275 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
276 ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
277 ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
278 ; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
249 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
250 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
251 ; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
279252 ; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4
280253 ; CHECK-NEXT: ret i32 [[CONV4]]
281254 ;
286259 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
287260 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
288261 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]]
289 ; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef
290 ; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]]
291262 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32>
292263 ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
293264 ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
294265 ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
295 ; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
296 ; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]]
297 ; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]]
298 ; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32
266 ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
267 ; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]]
268 ; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32
299269 ; THRESHOLD-NEXT: store i32 [[CONV4]], i32* @n, align 4
300270 ; THRESHOLD-NEXT: ret i32 [[CONV4]]
301271 ;
329299 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
330300 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
331301 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
332 ; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef
333 ; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef
334 ; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef
335 ; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef
336 ; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef
337302 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32>
338303 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]]
339304 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
341306 ; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
342307 ; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]]
343308 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0
344 ; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef
345309 ; CHECK-NEXT: store float [[TMP3]], float* @res, align 4
346310 ; CHECK-NEXT: ret float [[TMP3]]
347311 ;
350314 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16
351315 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16
352316 ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]]
353 ; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float undef, undef
354 ; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef
355 ; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], undef
356 ; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef
357 ; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], undef
358317 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32>
359318 ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]]
360319 ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]]
362321 ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
363322 ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]]
364323 ; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0
365 ; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef
366324 ; THRESHOLD-NEXT: store float [[TMP3]], float* @res, align 4
367325 ; THRESHOLD-NEXT: ret float [[TMP3]]
368326 ;
409367 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
410368 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
411369 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
412 ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef
413 ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
414 ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
415 ; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
416 ; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
417 ; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
418 ; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
419 ; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
420 ; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
421 ; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
422 ; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
423 ; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
424 ; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
425 ; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
426 ; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
427370 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
428371 ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
429372 ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
458401 ; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47
459402 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>*
460403 ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4
461 ; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
462 ; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
463 ; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
464 ; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
465 ; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
466 ; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
467 ; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
468 ; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
469 ; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
470 ; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
471 ; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
472 ; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
473 ; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
474 ; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
475 ; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
476 ; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
477 ; CHECK-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
478 ; CHECK-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
479 ; CHECK-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
480 ; CHECK-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
481 ; CHECK-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
482 ; CHECK-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
483 ; CHECK-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
484 ; CHECK-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
485 ; CHECK-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
486 ; CHECK-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
487 ; CHECK-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
488 ; CHECK-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
489 ; CHECK-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
490 ; CHECK-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
491 ; CHECK-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
492404 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32>
493405 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]]
494406 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32>
510422 ; CHECK-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]]
511423 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0
512424 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
513 ; CHECK-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
514425 ; CHECK-NEXT: ret float [[OP_RDX]]
515426 ;
516427 ; THRESHOLD-LABEL: @f(
532443 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
533444 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>*
534445 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4
535 ; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef
536 ; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
537 ; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
538 ; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
539 ; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
540 ; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
541 ; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
542 ; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
543 ; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
544 ; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
545 ; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
546 ; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
547 ; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
548 ; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
549 ; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
550446 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
551447 ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
552448 ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18
581477 ; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47
582478 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>*
583479 ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4
584 ; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
585 ; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
586 ; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
587 ; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
588 ; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
589 ; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
590 ; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
591 ; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
592 ; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
593 ; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
594 ; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
595 ; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
596 ; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
597 ; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
598 ; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
599 ; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
600 ; THRESHOLD-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]]
601 ; THRESHOLD-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]]
602 ; THRESHOLD-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]]
603 ; THRESHOLD-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]]
604 ; THRESHOLD-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]]
605 ; THRESHOLD-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]]
606 ; THRESHOLD-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]]
607 ; THRESHOLD-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]]
608 ; THRESHOLD-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]]
609 ; THRESHOLD-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]]
610 ; THRESHOLD-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]]
611 ; THRESHOLD-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]]
612 ; THRESHOLD-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]]
613 ; THRESHOLD-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]]
614 ; THRESHOLD-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]]
615480 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32>
616481 ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]]
617482 ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32>
633498 ; THRESHOLD-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]]
634499 ; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0
635500 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
636 ; THRESHOLD-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]]
637501 ; THRESHOLD-NEXT: ret float [[OP_RDX]]
638502 ;
639503 entry:
820684 ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
821685 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
822686 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
823 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
824 ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
825 ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
826 ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
827 ; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
828 ; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
829 ; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
830 ; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
831 ; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
832 ; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
833 ; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
834 ; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
835 ; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
836 ; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
837 ; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
838 ; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
839 ; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
840 ; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
841 ; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
842 ; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
843 ; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
844 ; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
845 ; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
846 ; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
847 ; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
848 ; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
849 ; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
850 ; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
851 ; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
852 ; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
853 ; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
854687 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32>
855688 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]]
856689 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32>
863696 ; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]]
864697 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0
865698 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
866 ; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
867699 ; CHECK-NEXT: ret float [[OP_EXTRA]]
868700 ;
869701 ; THRESHOLD-LABEL: @f1(
903735 ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31
904736 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>*
905737 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4
906 ; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]]
907 ; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
908 ; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
909 ; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
910 ; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
911 ; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
912 ; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
913 ; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
914 ; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
915 ; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
916 ; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
917 ; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
918 ; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
919 ; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
920 ; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
921 ; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
922 ; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
923 ; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
924 ; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
925 ; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
926 ; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
927 ; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
928 ; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
929 ; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
930 ; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
931 ; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
932 ; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
933 ; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
934 ; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
935 ; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
936 ; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]]
937738 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32>
938739 ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]]
939740 ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32>
946747 ; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]]
947748 ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0
948749 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]]
949 ; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]]
950750 ; THRESHOLD-NEXT: ret float [[OP_EXTRA]]
951751 ;
952752 entry:
1057857 ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
1058858 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1059859 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
1060 ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
1061860 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1062861 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1063862 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1064863 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1065864 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>*
1066865 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
1067 ; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
1068 ; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
1069 ; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
1070 ; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
1071866 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1072867 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
1073868 ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
1078873 ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
1079874 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>*
1080875 ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
1081 ; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
1082 ; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
1083 ; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
1084 ; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
1085 ; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
1086 ; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
1087 ; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
1088 ; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
1089876 ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
1090877 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
1091878 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
1104891 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
1105892 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>*
1106893 ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4
1107 ; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
1108 ; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
1109 ; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
1110 ; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
1111 ; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
1112 ; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
1113 ; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
1114 ; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
1115 ; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
1116 ; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
1117 ; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
1118 ; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
1119 ; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
1120 ; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
1121 ; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
1122894 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32>
1123895 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]]
1124896 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32>
1144916 ; CHECK-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
1145917 ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]]
1146918 ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
1147 ; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
1148919 ; CHECK-NEXT: ret float [[TMP12]]
1149920 ;
1150921 ; THRESHOLD-LABEL: @loadadd31(
1153924 ; THRESHOLD-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
1154925 ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2
1155926 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4
1156 ; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
1157927 ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3
1158928 ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4
1159929 ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5
1160930 ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6
1161931 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>*
1162932 ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
1163 ; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
1164 ; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
1165 ; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
1166 ; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
1167933 ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
1168934 ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8
1169935 ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9
1174940 ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14
1175941 ; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>*
1176942 ; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
1177 ; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
1178 ; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
1179 ; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
1180 ; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
1181 ; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
1182 ; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
1183 ; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
1184 ; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
1185943 ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15
1186944 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16
1187945 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17
1200958 ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30
1201959 ; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>*
1202960 ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4
1203 ; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
1204 ; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]]
1205 ; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]]
1206 ; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]]
1207 ; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]]
1208 ; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]]
1209 ; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]]
1210 ; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]]
1211 ; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]]
1212 ; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]]
1213 ; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]]
1214 ; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]]
1215 ; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]]
1216 ; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]]
1217 ; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]]
1218961 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32>
1219962 ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]]
1220963 ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32>
1240983 ; THRESHOLD-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]]
1241984 ; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]]
1242985 ; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]]
1243 ; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]]
1244986 ; THRESHOLD-NEXT: ret float [[TMP12]]
1245987 ;
1246988 entry:
13511093 ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
13521094 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
13531095 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1354 ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1355 ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1356 ; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
1357 ; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
1358 ; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
1359 ; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1360 ; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
1361 ; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
13621096 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32>
13631097 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
13641098 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32>
13681102 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
13691103 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
13701104 ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
1371 ; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
13721105 ; CHECK-NEXT: ret float [[OP_EXTRA5]]
13731106 ;
13741107 ; THRESHOLD-LABEL: @extra_args(
13851118 ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
13861119 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
13871120 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1388 ; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1389 ; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1390 ; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]]
1391 ; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
1392 ; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
1393 ; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1394 ; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
1395 ; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
13961121 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32>
13971122 ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
13981123 ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32>
14021127 ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
14031128 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
14041129 ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
1405 ; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
14061130 ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]]
14071131 ;
14081132 entry:
14511175 ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
14521176 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
14531177 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1454 ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1455 ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1456 ; CHECK-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00
1457 ; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
1458 ; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
1459 ; CHECK-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00
1460 ; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
1461 ; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1462 ; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
1463 ; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
14641178 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32>
14651179 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
14661180 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32>
14721186 ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00
14731187 ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00
14741188 ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]]
1475 ; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
14761189 ; CHECK-NEXT: ret float [[OP_EXTRA7]]
14771190 ;
14781191 ; THRESHOLD-LABEL: @extra_args_same_several_times(
14891202 ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
14901203 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
14911204 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1492 ; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1493 ; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1494 ; THRESHOLD-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00
1495 ; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]]
1496 ; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]]
1497 ; THRESHOLD-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00
1498 ; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]]
1499 ; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1500 ; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]]
1501 ; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
15021205 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32>
15031206 ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
15041207 ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32>
15101213 ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00
15111214 ; THRESHOLD-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00
15121215 ; THRESHOLD-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]]
1513 ; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
15141216 ; THRESHOLD-NEXT: ret float [[OP_EXTRA7]]
15151217 ;
15161218 entry:
15631265 ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
15641266 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
15651267 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1566 ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1567 ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1568 ; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
1569 ; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
1570 ; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1571 ; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
1572 ; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
1573 ; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
15741268 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32>
15751269 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
15761270 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32>
15801274 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
15811275 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
15821276 ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
1583 ; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
15841277 ; CHECK-NEXT: ret float [[OP_EXTRA5]]
15851278 ;
15861279 ; THRESHOLD-LABEL: @extra_args_no_replace(
15991292 ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7
16001293 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>*
16011294 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
1602 ; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]]
1603 ; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]]
1604 ; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]]
1605 ; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]]
1606 ; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]]
1607 ; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]]
1608 ; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]]
1609 ; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]]
16101295 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32>
16111296 ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]]
16121297 ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32>
16161301 ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
16171302 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]]
16181303 ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]]
1619 ; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]]
16201304 ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]]
16211305 ;
16221306 entry:
16671351 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
16681352 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer
16691353 ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
1670 ; CHECK-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef
1671 ; CHECK-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef
1672 ; CHECK-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef
1673 ; CHECK-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef
16741354 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32>
16751355 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]]
16761356 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32>
16781358 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
16791359 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
16801360 ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
1681 ; CHECK-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
16821361 ; CHECK-NEXT: ret i32 [[OP_EXTRA3]]
16831362 ;
16841363 ; THRESHOLD-LABEL: @wobble(
16951374 ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
16961375 ; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer
16971376 ; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
1698 ; THRESHOLD-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef
1699 ; THRESHOLD-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef
1700 ; THRESHOLD-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef
1701 ; THRESHOLD-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef
17021377 ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32>
17031378 ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]]
17041379 ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32>
17061381 ; THRESHOLD-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
17071382 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]]
17081383 ; THRESHOLD-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]]
1709 ; THRESHOLD-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], [[TMP9]]
17101384 ; THRESHOLD-NEXT: ret i32 [[OP_EXTRA3]]
17111385 ;
17121386 bb:
1111 define i32 @maxi8(i32) {
1212 ; CHECK-LABEL: @maxi8(
1313 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16
14 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef
15 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
17 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef
18 ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
19 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef
20 ; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
21 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
22 ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
23 ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
24 ; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
25 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
26 ; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
2714 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32>
2815 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]]
2916 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]]
3320 ; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32>
3421 ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
3522 ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]]
36 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0
37 ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef
38 ; CHECK-NEXT: ret i32 [[TMP16]]
23 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0
24 ; CHECK-NEXT: ret i32 [[TMP3]]
3925 ;
4026 %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
4127 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
6551 define i32 @maxi16(i32) {
6652 ; CHECK-LABEL: @maxi16(
6753 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16
68 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef
69 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
70 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
71 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef
72 ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
73 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef
74 ; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
75 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
76 ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
77 ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
78 ; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
79 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
80 ; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
81 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef
82 ; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
83 ; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef
84 ; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
85 ; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef
86 ; CHECK-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
87 ; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef
88 ; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
89 ; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef
90 ; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
91 ; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef
92 ; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
93 ; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef
94 ; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef
95 ; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef
96 ; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef
9754 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32>
9855 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]]
9956 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]]
10663 ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32>
10764 ; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
10865 ; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]]
109 ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0
110 ; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef
111 ; CHECK-NEXT: ret i32 [[TMP32]]
66 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0
67 ; CHECK-NEXT: ret i32 [[TMP3]]
11268 ;
11369 %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
11470 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
162118 define i32 @maxi32(i32) {
163119 ; CHECK-LABEL: @maxi32(
164120 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16
165 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef
166 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef
167 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef
168 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef
169 ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef
170 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef
171 ; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
172 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
173 ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
174 ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
175 ; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
176 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
177 ; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
178 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef
179 ; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
180 ; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef
181 ; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
182 ; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef
183 ; CHECK-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
184 ; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef
185 ; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
186 ; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef
187 ; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
188 ; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef
189 ; CHECK-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
190 ; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef
191 ; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef
192 ; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef
193 ; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef
194 ; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef
195 ; CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef
196 ; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 undef
197 ; CHECK-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef
198 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 undef
199 ; CHECK-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef
200 ; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 undef
201 ; CHECK-NEXT: [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef
202 ; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 undef
203 ; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef
204 ; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 undef
205 ; CHECK-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef
206 ; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 undef
207 ; CHECK-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef
208 ; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 undef
209 ; CHECK-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef
210 ; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 undef
211 ; CHECK-NEXT: [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef
212 ; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 undef
213 ; CHECK-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef
214 ; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 undef
215 ; CHECK-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef
216 ; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 undef
217 ; CHECK-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef
218 ; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 undef
219 ; CHECK-NEXT: [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef
220 ; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 undef
221 ; CHECK-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef
222 ; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 undef
223 ; CHECK-NEXT: [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef
224 ; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 undef
225 ; CHECK-NEXT: [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef
226121 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32>
227122 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]]
228123 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]]
238133 ; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32>
239134 ; CHECK-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]]
240135 ; CHECK-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]]
241 ; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0
242 ; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef
243 ; CHECK-NEXT: ret i32 [[TMP64]]
136 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0
137 ; CHECK-NEXT: ret i32 [[TMP3]]
244138 ;
245139 %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
246140 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
342236 define float @maxf8(float) {
343237 ; CHECK-LABEL: @maxf8(
344238 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16
345 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
346 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
347 ; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
348 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
349 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
350 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
351 ; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
352 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
353 ; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
354 ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
355 ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
356 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
357 ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
358239 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32>
359240 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]]
360241 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]]
364245 ; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32>
365246 ; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]]
366247 ; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]]
367 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0
368 ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
369 ; CHECK-NEXT: ret float [[TMP16]]
248 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0
249 ; CHECK-NEXT: ret float [[TMP3]]
370250 ;
371251 %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
372252 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
396276 define float @maxf16(float) {
397277 ; CHECK-LABEL: @maxf16(
398278 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16
399 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
400 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
401 ; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
402 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
403 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
404 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
405 ; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
406 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
407 ; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
408 ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
409 ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
410 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
411 ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
412 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
413 ; CHECK-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
414 ; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef
415 ; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
416 ; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef
417 ; CHECK-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
418 ; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef
419 ; CHECK-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
420 ; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef
421 ; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
422 ; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef
423 ; CHECK-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
424 ; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef
425 ; CHECK-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
426 ; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef
427 ; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
428279 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32>
429280 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]]
430281 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]]
437288 ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32>
438289 ; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]]
439290 ; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]]
440 ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0
441 ; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef
442 ; CHECK-NEXT: ret float [[TMP32]]
291 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0
292 ; CHECK-NEXT: ret float [[TMP3]]
443293 ;
444294 %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
445295 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
493343 define float @maxf32(float) {
494344 ; CHECK-LABEL: @maxf32(
495345 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16
496 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt float undef, undef
497 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float undef, float undef
498 ; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP4]], undef
499 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float [[TMP4]], float undef
500 ; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP6]], undef
501 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP6]], float undef
502 ; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt float [[TMP8]], undef
503 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP8]], float undef
504 ; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP10]], undef
505 ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP10]], float undef
506 ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], undef
507 ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP12]], float undef
508 ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP14]], undef
509 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float [[TMP14]], float undef
510 ; CHECK-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP16]], undef
511 ; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP16]], float undef
512 ; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], undef
513 ; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP18]], float undef
514 ; CHECK-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], undef
515 ; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], float [[TMP20]], float undef
516 ; CHECK-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], undef
517 ; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP22]], float undef
518 ; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], undef
519 ; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP24]], float undef
520 ; CHECK-NEXT: [[TMP27:%.*]] = fcmp fast ogt float [[TMP26]], undef
521 ; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], float [[TMP26]], float undef
522 ; CHECK-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP28]], undef
523 ; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP28]], float undef
524 ; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP30]], undef
525 ; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP30]], float undef
526 ; CHECK-NEXT: [[TMP33:%.*]] = fcmp fast ogt float [[TMP32]], undef
527 ; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], float [[TMP32]], float undef
528 ; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP34]], undef
529 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP34]], float undef
530 ; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP36]], undef
531 ; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP36]], float undef
532 ; CHECK-NEXT: [[TMP39:%.*]] = fcmp fast ogt float [[TMP38]], undef
533 ; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], float [[TMP38]], float undef
534 ; CHECK-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP40]], undef
535 ; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP40]], float undef
536 ; CHECK-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP42]], undef
537 ; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP42]], float undef
538 ; CHECK-NEXT: [[TMP45:%.*]] = fcmp fast ogt float [[TMP44]], undef
539 ; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], float [[TMP44]], float undef
540 ; CHECK-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP46]], undef
541 ; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP46]], float undef
542 ; CHECK-NEXT: [[TMP49:%.*]] = fcmp fast ogt float [[TMP48]], undef
543 ; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP48]], float undef
544 ; CHECK-NEXT: [[TMP51:%.*]] = fcmp fast ogt float [[TMP50]], undef
545 ; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], float [[TMP50]], float undef
546 ; CHECK-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP52]], undef
547 ; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP52]], float undef
548 ; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast ogt float [[TMP54]], undef
549 ; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP54]], float undef
550 ; CHECK-NEXT: [[TMP57:%.*]] = fcmp fast ogt float [[TMP56]], undef
551 ; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], float [[TMP56]], float undef
552 ; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP58]], undef
553 ; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP58]], float undef
554 ; CHECK-NEXT: [[TMP61:%.*]] = fcmp fast ogt float [[TMP60]], undef
555 ; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP60]], float undef
556 ; CHECK-NEXT: [[TMP63:%.*]] = fcmp fast ogt float [[TMP62]], undef
557346 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32>
558347 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]]
559348 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]]
569358 ; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32>
570359 ; CHECK-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]]
571360 ; CHECK-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]]
572 ; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0
573 ; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], float [[TMP62]], float undef
574 ; CHECK-NEXT: ret float [[TMP64]]
361 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0
362 ; CHECK-NEXT: ret float [[TMP3]]
575363 ;
576364 %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16
577365 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4
677465 ; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
678466 ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
679467 ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
680 ; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
681 ; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
682 ; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
683 ; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
684 ; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
685 ; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
686 ; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
687 ; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
688 ; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
689 ; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
468 ; SSE-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
690469 ; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32>
691470 ; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
692471 ; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
693472 ; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32>
694473 ; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
695474 ; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
696 ; SSE-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
697 ; SSE-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
698 ; SSE-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]]
699 ; SSE-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
700 ; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]]
701 ; SSE-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
702 ; SSE-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
703 ; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
704 ; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]]
705 ; SSE-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
706 ; SSE-NEXT: store i32 [[TMP25]], i32* @var, align 8
707 ; SSE-NEXT: ret i32 [[TMP24]]
475 ; SSE-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
476 ; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
477 ; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]]
478 ; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
479 ; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]]
480 ; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
481 ; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
482 ; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]]
483 ; SSE-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
484 ; SSE-NEXT: store i32 [[TMP15]], i32* @var, align 8
485 ; SSE-NEXT: ret i32 [[TMP14]]
708486 ;
709487 ; AVX-LABEL: @maxi8_mutiple_uses(
710488 ; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
712490 ; AVX-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
713491 ; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
714492 ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
715 ; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
716 ; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
717 ; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
718 ; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
719 ; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
720 ; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
721 ; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
722 ; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
723 ; AVX-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
724 ; AVX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
493 ; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
725494 ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32>
726495 ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
727496 ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
728497 ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32>
729498 ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
730499 ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
731 ; AVX-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
732 ; AVX-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
733 ; AVX-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]]
734 ; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
735 ; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]]
736 ; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
737 ; AVX-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
738 ; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
739 ; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]]
740 ; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
741 ; AVX-NEXT: store i32 [[TMP25]], i32* @var, align 8
742 ; AVX-NEXT: ret i32 [[TMP24]]
500 ; AVX-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
501 ; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
502 ; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]]
503 ; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
504 ; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]]
505 ; AVX-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
506 ; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
507 ; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]]
508 ; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
509 ; AVX-NEXT: store i32 [[TMP15]], i32* @var, align 8
510 ; AVX-NEXT: ret i32 [[TMP14]]
743511 ;
744512 ; AVX2-LABEL: @maxi8_mutiple_uses(
745513 ; AVX2-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
747515 ; AVX2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
748516 ; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
749517 ; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
750 ; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
751 ; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
752 ; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
753 ; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
754 ; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
755 ; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
756 ; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
757 ; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
758 ; AVX2-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
759 ; AVX2-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
518 ; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
760519 ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32>
761520 ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
762521 ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
763522 ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32>
764523 ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
765524 ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
766 ; AVX2-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
767 ; AVX2-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
768 ; AVX2-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]]
769 ; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
770 ; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]]
771 ; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
772 ; AVX2-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
773 ; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
774 ; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]]
775 ; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
776 ; AVX2-NEXT: store i32 [[TMP25]], i32* @var, align 8
777 ; AVX2-NEXT: ret i32 [[TMP24]]
525 ; AVX2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
526 ; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], [[TMP7]]
527 ; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[TMP7]]
528 ; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP5]]
529 ; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP5]]
530 ; AVX2-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
531 ; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP12]]
532 ; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[OP_EXTRA]], i32 [[TMP12]]
533 ; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP4]], i32 3, i32 4
534 ; AVX2-NEXT: store i32 [[TMP15]], i32* @var, align 8
535 ; AVX2-NEXT: ret i32 [[TMP14]]
778536 ;
779537 ; SKX-LABEL: @maxi8_mutiple_uses(
780538 ; SKX-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* bitcast ([32 x i32]* @arr to <2 x i32>*), align 16
796554 ; SKX-NEXT: [[TMP12:%.*]] = icmp sgt <2 x i32> [[TMP9]], [[TMP11]]
797555 ; SKX-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP12]], <2 x i32> [[TMP9]], <2 x i32> [[TMP11]]
798556 ; SKX-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
799 ; SKX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef
800 ; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef
801 ; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef
802 ; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef
803 ; SKX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef
804 ; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef
805 ; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
806 ; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef
807 ; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP6]]
808 ; SKX-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
809 ; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP14]]
810 ; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP14]]
811 ; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP6]]
812 ; SKX-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
813 ; SKX-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP27]]
814 ; SKX-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[OP_EXTRA]], i32 [[TMP27]]
815 ; SKX-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
816 ; SKX-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 3, i32 4
817 ; SKX-NEXT: store i32 [[TMP31]], i32* @var, align 8
818 ; SKX-NEXT: ret i32 [[TMP29]]
557 ; SKX-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
558 ; SKX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], [[TMP14]]
559 ; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 [[TMP14]]
560 ; SKX-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
561 ; SKX-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP17]]
562 ; SKX-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[OP_EXTRA]], i32 [[TMP17]]
563 ; SKX-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
564 ; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 3, i32 4
565 ; SKX-NEXT: store i32 [[TMP21]], i32* @var, align 8
566 ; SKX-NEXT: ret i32 [[TMP19]]
819567 ;
820568 %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
821569 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
853601 ; SSE: pp:
854602 ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
855603 ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
856 ; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
857 ; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
858 ; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
859 ; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
860 ; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
861 ; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
862 ; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
863 ; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
864 ; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
865 ; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
866 ; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
867 ; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
868 ; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
604 ; SSE-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
605 ; SSE-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
869606 ; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32>
870607 ; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
871608 ; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
872609 ; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32>
873610 ; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
874611 ; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
875 ; SSE-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
876 ; SSE-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
877 ; SSE-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]]
878 ; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
879 ; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]]
880 ; SSE-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
881 ; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]]
882 ; SSE-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
612 ; SSE-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
613 ; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
614 ; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
615 ; SSE-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
616 ; SSE-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
617 ; SSE-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
618 ; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]]
883619 ; SSE-NEXT: ret i32 [[OP_EXTRA]]
884620 ;
885621 ; AVX-LABEL: @maxi8_wrong_parent(
890626 ; AVX: pp:
891627 ; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
892628 ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
893 ; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
894 ; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
895 ; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
896 ; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
897 ; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
898 ; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
899 ; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
900 ; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
901 ; AVX-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
902 ; AVX-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
903 ; AVX-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
904 ; AVX-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
905 ; AVX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
629 ; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
630 ; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
906631 ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32>
907632 ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
908633 ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
909634 ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32>
910635 ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
911636 ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
912 ; AVX-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
913 ; AVX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
914 ; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]]
915 ; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
916 ; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]]
917 ; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
918 ; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]]
919 ; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
637 ; AVX-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
638 ; AVX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
639 ; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
640 ; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
641 ; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
642 ; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
643 ; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]]
920644 ; AVX-NEXT: ret i32 [[OP_EXTRA]]
921645 ;
922646 ; AVX2-LABEL: @maxi8_wrong_parent(
927651 ; AVX2: pp:
928652 ; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
929653 ; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
930 ; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
931 ; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
932 ; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
933 ; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
934 ; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
935 ; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
936 ; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
937 ; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
938 ; AVX2-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
939 ; AVX2-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
940 ; AVX2-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
941 ; AVX2-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
942 ; AVX2-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
654 ; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
655 ; AVX2-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
943656 ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32>
944657 ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
945658 ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
946659 ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32>
947660 ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
948661 ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
949 ; AVX2-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
950 ; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
951 ; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]]
952 ; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
953 ; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]]
954 ; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
955 ; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]]
956 ; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
662 ; AVX2-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
663 ; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]]
664 ; AVX2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]]
665 ; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]]
666 ; AVX2-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]]
667 ; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]]
668 ; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]]
957669 ; AVX2-NEXT: ret i32 [[OP_EXTRA]]
958670 ;
959671 ; SKX-LABEL: @maxi8_wrong_parent(
984696 ; SKX-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1
985697 ; SKX-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]]
986698 ; SKX-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1
987 ; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef
988 ; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef
989 ; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef
990 ; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef
991 ; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef
992 ; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef
993 ; SKX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef
994 ; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef
995 ; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP7]]
996 ; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP7]]
997 ; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], [[TMP8]]
998 ; SKX-NEXT: [[TMP32:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
999 ; SKX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP20]]
1000 ; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP20]]
1001 ; SKX-NEXT: [[TMP34:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 [[TMP8]]
699 ; SKX-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0
700 ; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]]
701 ; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP20]]
1002702 ; SKX-NEXT: ret i32 [[OP_EXTRA]]
1003703 ;
1004704 %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
3636 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
3737 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
3838 ; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]],
39 ; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef
40 ; CHECK-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef
4139 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32>
4240 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
4341 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
4442 ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
4543 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
46 ; CHECK-NEXT: [[ADD16:%.*]] = fadd fast float [[ADD11]], undef
4744 ; CHECK-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
4845 ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_033]], 1
4946 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
7673 ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
7774 ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
7875 ; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]],
79 ; STORE-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef
80 ; STORE-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef
8176 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32>
8277 ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]]
8378 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
8479 ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
8580 ; STORE-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
86 ; STORE-NEXT: [[ADD16:%.*]] = fadd fast float [[ADD11]], undef
8781 ; STORE-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
8882 ; STORE-NEXT: [[INC]] = add nsw i64 [[I_033]], 1
8983 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
177171 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
178172 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
179173 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
180 ; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef
181 ; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
182174 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32>
183175 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]]
184176 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
185177 ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
186178 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
187 ; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
188179 ; CHECK-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
189180 ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_040]], 1
190181 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
222213 ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
223214 ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
224215 ; STORE-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
225 ; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef
226 ; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
227216 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32>
228217 ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]]
229218 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
230219 ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
231220 ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
232 ; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
233221 ; STORE-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
234222 ; STORE-NEXT: [[INC]] = add nsw i64 [[I_040]], 1
235223 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
349337 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>*
350338 ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
351339 ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
352 ; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef
353 ; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
354 ; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
355 ; CHECK-NEXT: [[ADD26:%.*]] = fadd fast float [[ADD20]], undef
356 ; CHECK-NEXT: [[ADD32:%.*]] = fadd fast float [[ADD26]], undef
357 ; CHECK-NEXT: [[ADD38:%.*]] = fadd fast float [[ADD32]], undef
358 ; CHECK-NEXT: [[ADD44:%.*]] = fadd fast float [[ADD38]], undef
359340 ; CHECK-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
360341 ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]]
361342 ; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
368349 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
369350 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
370351 ; CHECK-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
371 ; CHECK-NEXT: [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]]
372352 ; CHECK-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
373353 ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_083]], 1
374354 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
420400 ; STORE-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>*
421401 ; STORE-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
422402 ; STORE-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
423 ; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef
424 ; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
425 ; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
426 ; STORE-NEXT: [[ADD26:%.*]] = fadd fast float [[ADD20]], undef
427 ; STORE-NEXT: [[ADD32:%.*]] = fadd fast float [[ADD26]], undef
428 ; STORE-NEXT: [[ADD38:%.*]] = fadd fast float [[ADD32]], undef
429 ; STORE-NEXT: [[ADD44:%.*]] = fadd fast float [[ADD38]], undef
430403 ; STORE-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
431404 ; STORE-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]]
432405 ; STORE-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
439412 ; STORE-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
440413 ; STORE-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
441414 ; STORE-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
442 ; STORE-NEXT: [[ADD50:%.*]] = fadd fast float [[ADD44]], [[MUL49]]
443415 ; STORE-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
444416 ; STORE-NEXT: [[INC]] = add nsw i64 [[I_083]], 1
445417 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
575547 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
576548 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
577549 ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
578 ; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[SUM_042]], undef
579 ; CHECK-NEXT: [[ADD9:%.*]] = fadd fast float [[ADD]], undef
580 ; CHECK-NEXT: [[ADD15:%.*]] = fadd fast float [[ADD9]], undef
581550 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32>
582551 ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]]
583552 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
584553 ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
585554 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
586555 ; CHECK-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
587 ; CHECK-NEXT: [[ADD21:%.*]] = fadd fast float [[ADD15]], undef
588556 ; CHECK-NEXT: [[INC]] = add nsw i64 [[I_043]], 1
589557 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
590558 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
621589 ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
622590 ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
623591 ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
624 ; STORE-NEXT: [[ADD:%.*]] = fadd fast float [[SUM_042]], undef
625 ; STORE-NEXT: [[ADD9:%.*]] = fadd fast float [[ADD]], undef
626 ; STORE-NEXT: [[ADD15:%.*]] = fadd fast float [[ADD9]], undef
627592 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32>
628593 ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]]
629594 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
630595 ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
631596 ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
632597 ; STORE-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
633 ; STORE-NEXT: [[ADD21:%.*]] = fadd fast float [[ADD15]], undef
634598 ; STORE-NEXT: [[INC]] = add nsw i64 [[I_043]], 1
635599 ; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
636600 ; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
10861050 ; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
10871051 ; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
10881052 ; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]]
1089 ; STORE-NEXT: [[ADD8:%.*]] = fadd fast float undef, undef
1090 ; STORE-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], undef
10911053 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32>
10921054 ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP5]], [[RDX_SHUF]]
10931055 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
10941056 ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
10951057 ; STORE-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
1096 ; STORE-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], undef
10971058 ; STORE-NEXT: store float [[TMP6]], float* [[C_ADDR_038]], align 4
10981059 ; STORE-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1
10991060 ; STORE-NEXT: [[INC]] = add nsw i64 [[I_039]], 1
11681129 ; STORE-LABEL: @float_red_example4(
11691130 ; STORE-NEXT: entry:
11701131 ; STORE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([32 x float]* @arr_float to <4 x float>*), align 16
1171 ; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef
1172 ; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
11731132 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <4 x i32>
11741133 ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP0]], [[RDX_SHUF]]
11751134 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32>
11761135 ; STORE-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
11771136 ; STORE-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
1178 ; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
11791137 ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16
11801138 ; STORE-NEXT: ret void
11811139 ;
12151173 ; STORE-LABEL: @float_red_example8(
12161174 ; STORE-NEXT: entry:
12171175 ; STORE-NEXT: [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr_float to <8 x float>*), align 16
1218 ; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef
1219 ; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
1220 ; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
1221 ; STORE-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
1222 ; STORE-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
1223 ; STORE-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
12241176 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> undef, <8 x i32>
12251177 ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP0]], [[RDX_SHUF]]
12261178 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32>
12281180 ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x float> [[BIN_RDX2]], <8 x float> undef, <8 x i32>
12291181 ; STORE-NEXT: [[BIN_RDX4:%.*]] = fadd fast <8 x float> [[BIN_RDX2]], [[RDX_SHUF3]]
12301182 ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0
1231 ; STORE-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
12321183 ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16
12331184 ; STORE-NEXT: ret void
12341185 ;
12921243 ; STORE-LABEL: @float_red_example16(
12931244 ; STORE-NEXT: entry:
12941245 ; STORE-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr_float to <16 x float>*), align 16
1295 ; STORE-NEXT: [[ADD:%.*]] = fadd fast float undef, undef
1296 ; STORE-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]]
1297 ; STORE-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]]
1298 ; STORE-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]]
1299 ; STORE-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]]
1300 ; STORE-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]]
1301 ; STORE-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]]
1302 ; STORE-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]]
1303 ; STORE-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]]
1304 ; STORE-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]]
1305 ; STORE-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]]
1306 ; STORE-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]]
1307 ; STORE-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]]
1308 ; STORE-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]]
13091246 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> undef, <16 x i32>
13101247 ; STORE-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP0]], [[RDX_SHUF]]
13111248 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32>
13151252 ; STORE-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x float> [[BIN_RDX4]], <16 x float> undef, <16 x i32>
13161253 ; STORE-NEXT: [[BIN_RDX6:%.*]] = fadd fast <16 x float> [[BIN_RDX4]], [[RDX_SHUF5]]
13171254 ; STORE-NEXT: [[TMP1:%.*]] = extractelement <16 x float> [[BIN_RDX6]], i32 0
1318 ; STORE-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]]
13191255 ; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16
13201256 ; STORE-NEXT: ret void
13211257 ;
13711307 ; STORE-LABEL: @i32_red_example4(
13721308 ; STORE-NEXT: entry:
13731309 ; STORE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16
1374 ; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
1375 ; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
13761310 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> undef, <4 x i32>
13771311 ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <4 x i32> [[TMP0]], [[RDX_SHUF]]
13781312 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32>
13791313 ; STORE-NEXT: [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
13801314 ; STORE-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
1381 ; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
13821315 ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
13831316 ; STORE-NEXT: ret void
13841317 ;
14181351 ; STORE-LABEL: @i32_red_example8(
14191352 ; STORE-NEXT: entry:
14201353 ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
1421 ; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
1422 ; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
1423 ; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
1424 ; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
1425 ; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
1426 ; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
14271354 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32>
14281355 ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
14291356 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
14311358 ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32>
14321359 ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
14331360 ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
1434 ; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
14351361 ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
14361362 ; STORE-NEXT: ret void
14371363 ;
14951421 ; STORE-LABEL: @i32_red_example16(
14961422 ; STORE-NEXT: entry:
14971423 ; STORE-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr_i32 to <16 x i32>*), align 16
1498 ; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
1499 ; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
1500 ; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
1501 ; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
1502 ; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
1503 ; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
1504 ; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
1505 ; STORE-NEXT: [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]]
1506 ; STORE-NEXT: [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]]
1507 ; STORE-NEXT: [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]]
1508 ; STORE-NEXT: [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]]
1509 ; STORE-NEXT: [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]]
1510 ; STORE-NEXT: [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]]
1511 ; STORE-NEXT: [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]]
15121424 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> undef, <16 x i32>
15131425 ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <16 x i32> [[TMP0]], [[RDX_SHUF]]
15141426 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[BIN_RDX]], <16 x i32> undef, <16 x i32>
15181430 ; STORE-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <16 x i32> [[BIN_RDX4]], <16 x i32> undef, <16 x i32>
15191431 ; STORE-NEXT: [[BIN_RDX6:%.*]] = add nsw <16 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
15201432 ; STORE-NEXT: [[TMP1:%.*]] = extractelement <16 x i32> [[BIN_RDX6]], i32 0
1521 ; STORE-NEXT: [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]]
15221433 ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
15231434 ; STORE-NEXT: ret void
15241435 ;
16301541 ; STORE-LABEL: @i32_red_example32(
16311542 ; STORE-NEXT: entry:
16321543 ; STORE-NEXT: [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr_i32 to <32 x i32>*), align 16
1633 ; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
1634 ; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
1635 ; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
1636 ; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
1637 ; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
1638 ; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
1639 ; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
1640 ; STORE-NEXT: [[ADD_7:%.*]] = add nsw i32 undef, [[ADD_6]]
1641 ; STORE-NEXT: [[ADD_8:%.*]] = add nsw i32 undef, [[ADD_7]]
1642 ; STORE-NEXT: [[ADD_9:%.*]] = add nsw i32 undef, [[ADD_8]]
1643 ; STORE-NEXT: [[ADD_10:%.*]] = add nsw i32 undef, [[ADD_9]]
1644 ; STORE-NEXT: [[ADD_11:%.*]] = add nsw i32 undef, [[ADD_10]]
1645 ; STORE-NEXT: [[ADD_12:%.*]] = add nsw i32 undef, [[ADD_11]]
1646 ; STORE-NEXT: [[ADD_13:%.*]] = add nsw i32 undef, [[ADD_12]]
1647 ; STORE-NEXT: [[ADD_14:%.*]] = add nsw i32 undef, [[ADD_13]]
1648 ; STORE-NEXT: [[ADD_15:%.*]] = add nsw i32 undef, [[ADD_14]]
1649 ; STORE-NEXT: [[ADD_16:%.*]] = add nsw i32 undef, [[ADD_15]]
1650 ; STORE-NEXT: [[ADD_17:%.*]] = add nsw i32 undef, [[ADD_16]]
1651 ; STORE-NEXT: [[ADD_18:%.*]] = add nsw i32 undef, [[ADD_17]]
1652 ; STORE-NEXT: [[ADD_19:%.*]] = add nsw i32 undef, [[ADD_18]]
1653 ; STORE-NEXT: [[ADD_20:%.*]] = add nsw i32 undef, [[ADD_19]]
1654 ; STORE-NEXT: [[ADD_21:%.*]] = add nsw i32 undef, [[ADD_20]]
1655 ; STORE-NEXT: [[ADD_22:%.*]] = add nsw i32 undef, [[ADD_21]]
1656 ; STORE-NEXT: [[ADD_23:%.*]] = add nsw i32 undef, [[ADD_22]]
1657 ; STORE-NEXT: [[ADD_24:%.*]] = add nsw i32 undef, [[ADD_23]]
1658 ; STORE-NEXT: [[ADD_25:%.*]] = add nsw i32 undef, [[ADD_24]]
1659 ; STORE-NEXT: [[ADD_26:%.*]] = add nsw i32 undef, [[ADD_25]]
1660 ; STORE-NEXT: [[ADD_27:%.*]] = add nsw i32 undef, [[ADD_26]]
1661 ; STORE-NEXT: [[ADD_28:%.*]] = add nsw i32 undef, [[ADD_27]]
1662 ; STORE-NEXT: [[ADD_29:%.*]] = add nsw i32 undef, [[ADD_28]]
16631544 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> undef, <32 x i32>
16641545 ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <32 x i32> [[TMP0]], [[RDX_SHUF]]
16651546 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[BIN_RDX]], <32 x i32> undef, <32 x i32>
16711552 ; STORE-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[BIN_RDX6]], <32 x i32> undef, <32 x i32>
16721553 ; STORE-NEXT: [[BIN_RDX8:%.*]] = add nsw <32 x i32> [[BIN_RDX6]], [[RDX_SHUF7]]
16731554 ; STORE-NEXT: [[TMP1:%.*]] = extractelement <32 x i32> [[BIN_RDX8]], i32 0
1674 ; STORE-NEXT: [[ADD_30:%.*]] = add nsw i32 undef, [[ADD_29]]
16751555 ; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
16761556 ; STORE-NEXT: ret void
16771557 ;
17491629 ; CHECK-LABEL: @i32_red_call(
17501630 ; CHECK-NEXT: entry:
17511631 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
1752 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
1753 ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
1754 ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
1755 ; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
1756 ; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
1757 ; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
17581632 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32>
17591633 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
17601634 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
17621636 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32>
17631637 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
17641638 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
1765 ; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
17661639 ; CHECK-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
17671640 ; CHECK-NEXT: ret void
17681641 ;
17691642 ; STORE-LABEL: @i32_red_call(
17701643 ; STORE-NEXT: entry:
17711644 ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
1772 ; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
1773 ; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
1774 ; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
1775 ; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
1776 ; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
1777 ; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
17781645 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32>
17791646 ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
17801647 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
17821649 ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32>
17831650 ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
17841651 ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
1785 ; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
17861652 ; STORE-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
17871653 ; STORE-NEXT: ret void
17881654 ;
18101676 ; CHECK-LABEL: @i32_red_invoke(
18111677 ; CHECK-NEXT: entry:
18121678 ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
1813 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
1814 ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
1815 ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
1816 ; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
1817 ; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
1818 ; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
18191679 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32>
18201680 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
18211681 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
18231683 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32>
18241684 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
18251685 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
1826 ; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
18271686 ; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
18281687 ; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
18291688 ; CHECK: exception:
18361695 ; STORE-LABEL: @i32_red_invoke(
18371696 ; STORE-NEXT: entry:
18381697 ; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
1839 ; STORE-NEXT: [[ADD:%.*]] = add nsw i32 undef, undef
1840 ; STORE-NEXT: [[ADD_1:%.*]] = add nsw i32 undef, [[ADD]]
1841 ; STORE-NEXT: [[ADD_2:%.*]] = add nsw i32 undef, [[ADD_1]]
1842 ; STORE-NEXT: [[ADD_3:%.*]] = add nsw i32 undef, [[ADD_2]]
1843 ; STORE-NEXT: [[ADD_4:%.*]] = add nsw i32 undef, [[ADD_3]]
1844 ; STORE-NEXT: [[ADD_5:%.*]] = add nsw i32 undef, [[ADD_4]]
18451698 ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> undef, <8 x i32>
18461699 ; STORE-NEXT: [[BIN_RDX:%.*]] = add nsw <8 x i32> [[TMP0]], [[RDX_SHUF]]
18471700 ; STORE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
18491702 ; STORE-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32>
18501703 ; STORE-NEXT: [[BIN_RDX4:%.*]] = add nsw <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
18511704 ; STORE-NEXT: [[TMP1:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
1852 ; STORE-NEXT: [[ADD_6:%.*]] = add nsw i32 undef, [[ADD_5]]
18531705 ; STORE-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
18541706 ; STORE-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
18551707 ; STORE: exception:
1111 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>*
1212 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 1
1313 ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]],
14 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
15 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i8> undef, i8 [[TMP3]], i32 0
16 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
17 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP4]], i8 [[TMP5]], i32 1
14 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1
15 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0
16 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i8> undef, i8 [[TMP4]], i32 0
17 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP5]], i8 [[TMP3]], i32 1
1818 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double>
1919 ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]]
2020 ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]],
44 ; CHECK-LABEL: @Foo(
55 ; CHECK-NEXT: entry:
66 ; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, <32 x i8>* [[__V:%.*]], align 32
7 ; CHECK-NEXT: [[ADD_I_1_I:%.*]] = add i8 undef, undef
8 ; CHECK-NEXT: [[ADD_I_2_I:%.*]] = add i8 [[ADD_I_1_I]], undef
9 ; CHECK-NEXT: [[ADD_I_3_I:%.*]] = add i8 [[ADD_I_2_I]], undef
10 ; CHECK-NEXT: [[ADD_I_4_I:%.*]] = add i8 [[ADD_I_3_I]], undef
11 ; CHECK-NEXT: [[ADD_I_5_I:%.*]] = add i8 [[ADD_I_4_I]], undef
12 ; CHECK-NEXT: [[ADD_I_6_I:%.*]] = add i8 [[ADD_I_5_I]], undef
13 ; CHECK-NEXT: [[ADD_I_7_I:%.*]] = add i8 [[ADD_I_6_I]], undef
14 ; CHECK-NEXT: [[ADD_I_8_I:%.*]] = add i8 [[ADD_I_7_I]], undef
15 ; CHECK-NEXT: [[ADD_I_9_I:%.*]] = add i8 [[ADD_I_8_I]], undef
16 ; CHECK-NEXT: [[ADD_I_10_I:%.*]] = add i8 [[ADD_I_9_I]], undef
17 ; CHECK-NEXT: [[ADD_I_11_I:%.*]] = add i8 [[ADD_I_10_I]], undef
18 ; CHECK-NEXT: [[ADD_I_12_I:%.*]] = add i8 [[ADD_I_11_I]], undef
19 ; CHECK-NEXT: [[ADD_I_13_I:%.*]] = add i8 [[ADD_I_12_I]], undef
20 ; CHECK-NEXT: [[ADD_I_14_I:%.*]] = add i8 [[ADD_I_13_I]], undef
21 ; CHECK-NEXT: [[ADD_I_15_I:%.*]] = add i8 [[ADD_I_14_I]], undef
22 ; CHECK-NEXT: [[ADD_I_16_I:%.*]] = add i8 [[ADD_I_15_I]], undef
23 ; CHECK-NEXT: [[ADD_I_17_I:%.*]] = add i8 [[ADD_I_16_I]], undef
24 ; CHECK-NEXT: [[ADD_I_18_I:%.*]] = add i8 [[ADD_I_17_I]], undef
25 ; CHECK-NEXT: [[ADD_I_19_I:%.*]] = add i8 [[ADD_I_18_I]], undef
26 ; CHECK-NEXT: [[ADD_I_20_I:%.*]] = add i8 [[ADD_I_19_I]], undef
27 ; CHECK-NEXT: [[ADD_I_21_I:%.*]] = add i8 [[ADD_I_20_I]], undef
28 ; CHECK-NEXT: [[ADD_I_22_I:%.*]] = add i8 [[ADD_I_21_I]], undef
29 ; CHECK-NEXT: [[ADD_I_23_I:%.*]] = add i8 [[ADD_I_22_I]], undef
30 ; CHECK-NEXT: [[ADD_I_24_I:%.*]] = add i8 [[ADD_I_23_I]], undef
31 ; CHECK-NEXT: [[ADD_I_25_I:%.*]] = add i8 [[ADD_I_24_I]], undef
32 ; CHECK-NEXT: [[ADD_I_26_I:%.*]] = add i8 [[ADD_I_25_I]], undef
33 ; CHECK-NEXT: [[ADD_I_27_I:%.*]] = add i8 [[ADD_I_26_I]], undef
34 ; CHECK-NEXT: [[ADD_I_28_I:%.*]] = add i8 [[ADD_I_27_I]], undef
35 ; CHECK-NEXT: [[ADD_I_29_I:%.*]] = add i8 [[ADD_I_28_I]], undef
36 ; CHECK-NEXT: [[ADD_I_30_I:%.*]] = add i8 [[ADD_I_29_I]], undef
377 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i8> [[TMP0]], <32 x i8> undef, <32 x i32>
388 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <32 x i8> [[TMP0]], [[RDX_SHUF]]
399 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i8> [[BIN_RDX]], <32 x i8> undef, <32 x i32>
4515 ; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i8> [[BIN_RDX6]], <32 x i8> undef, <32 x i32>
4616 ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <32 x i8> [[BIN_RDX6]], [[RDX_SHUF7]]
4717 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <32 x i8> [[BIN_RDX8]], i32 0
48 ; CHECK-NEXT: [[ADD_I_31_I:%.*]] = add i8 [[ADD_I_30_I]], undef
4918 ; CHECK-NEXT: ret i8 [[TMP1]]
5019 ;
5120 entry:
3434 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
3535 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
3636 ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]],
37 ; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]]
38 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]]
39 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
40 ; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
41 ; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
42 ; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
43 ; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
4437 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32>
4538 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP2]], [[RDX_SHUF]]
4639 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
4942 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
5043 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
5144 ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]]
52 ; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
5345 ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
5446 ; CHECK: for.end:
5547 ; CHECK-NEXT: ret i32 [[OP_EXTRA]]
137129 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
138130 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
139131 ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]]
140 ; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]]
141 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]]
142 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
143 ; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
144 ; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
145 ; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
146 ; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
147132 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> undef, <8 x i32>
148133 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
149134 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
152137 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
153138 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
154139 ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
155 ; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
156140 ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
157141 ; CHECK: for.end:
158142 ; CHECK-NEXT: ret i32 [[OP_EXTRA]]
257241 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
258242 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
259243 ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[REORDER_SHUFFLE]], [[TMP3]]
260 ; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]]
261 ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]]
262 ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
263 ; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
264 ; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
265 ; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
266 ; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
267244 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> undef, <8 x i32>
268245 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
269246 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
272249 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
273250 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
274251 ; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
275 ; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
276252 ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
277253 ; CHECK: for.end:
278254 ; CHECK-NEXT: ret i32 [[OP_EXTRA]]
2525 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
2626 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
2727 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
28 ; CHECK-NEXT: [[MUL_18:%.*]] = add i32 undef, undef
29 ; CHECK-NEXT: [[MUL_29:%.*]] = add i32 undef, [[MUL_18]]
30 ; CHECK-NEXT: [[MUL_310:%.*]] = add i32 undef, [[MUL_29]]
31 ; CHECK-NEXT: [[MUL_411:%.*]] = add i32 undef, [[MUL_310]]
32 ; CHECK-NEXT: [[MUL_512:%.*]] = add i32 undef, [[MUL_411]]
33 ; CHECK-NEXT: [[MUL_613:%.*]] = add i32 undef, [[MUL_512]]
3428 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32>
3529 ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP1]], [[RDX_SHUF]]
3630 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
3832 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32>
3933 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
4034 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
41 ; CHECK-NEXT: [[MUL_714:%.*]] = add i32 undef, [[MUL_613]]
4235 ; CHECK-NEXT: ret i32 [[TMP2]]
4336 ;
4437 entry:
146139 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
147140 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
148141 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
149 ; CHECK-NEXT: [[MUL_18:%.*]] = and i32 undef, undef
150 ; CHECK-NEXT: [[MUL_29:%.*]] = and i32 undef, [[MUL_18]]
151 ; CHECK-NEXT: [[MUL_310:%.*]] = and i32 undef, [[MUL_29]]
152 ; CHECK-NEXT: [[MUL_411:%.*]] = and i32 undef, [[MUL_310]]
153 ; CHECK-NEXT: [[MUL_512:%.*]] = and i32 undef, [[MUL_411]]
154 ; CHECK-NEXT: [[MUL_613:%.*]] = and i32 undef, [[MUL_512]]
155142 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32>
156143 ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <8 x i32> [[TMP1]], [[RDX_SHUF]]
157144 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
159146 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32>
160147 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = and <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
161148 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
162 ; CHECK-NEXT: [[MUL_714:%.*]] = and i32 undef, [[MUL_613]]
163149 ; CHECK-NEXT: ret i32 [[TMP2]]
164150 ;
165151 entry:
207193 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
208194 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
209195 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
210 ; CHECK-NEXT: [[MUL_18:%.*]] = or i32 undef, undef
211 ; CHECK-NEXT: [[MUL_29:%.*]] = or i32 undef, [[MUL_18]]
212 ; CHECK-NEXT: [[MUL_310:%.*]] = or i32 undef, [[MUL_29]]
213 ; CHECK-NEXT: [[MUL_411:%.*]] = or i32 undef, [[MUL_310]]
214 ; CHECK-NEXT: [[MUL_512:%.*]] = or i32 undef, [[MUL_411]]
215 ; CHECK-NEXT: [[MUL_613:%.*]] = or i32 undef, [[MUL_512]]
216196 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32>
217197 ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <8 x i32> [[TMP1]], [[RDX_SHUF]]
218198 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
220200 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32>
221201 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = or <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
222202 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
223 ; CHECK-NEXT: [[MUL_714:%.*]] = or i32 undef, [[MUL_613]]
224203 ; CHECK-NEXT: ret i32 [[TMP2]]
225204 ;
226205 entry:
268247 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
269248 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
270249 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
271 ; CHECK-NEXT: [[MUL_18:%.*]] = xor i32 undef, undef
272 ; CHECK-NEXT: [[MUL_29:%.*]] = xor i32 undef, [[MUL_18]]
273 ; CHECK-NEXT: [[MUL_310:%.*]] = xor i32 undef, [[MUL_29]]
274 ; CHECK-NEXT: [[MUL_411:%.*]] = xor i32 undef, [[MUL_310]]
275 ; CHECK-NEXT: [[MUL_512:%.*]] = xor i32 undef, [[MUL_411]]
276 ; CHECK-NEXT: [[MUL_613:%.*]] = xor i32 undef, [[MUL_512]]
277250 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32>
278251 ; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <8 x i32> [[TMP1]], [[RDX_SHUF]]
279252 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32>
281254 ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32>
282255 ; CHECK-NEXT: [[BIN_RDX4:%.*]] = xor <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
283256 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
284 ; CHECK-NEXT: [[MUL_714:%.*]] = xor i32 undef, [[MUL_613]]
285257 ; CHECK-NEXT: ret i32 [[TMP2]]
286258 ;
287259 entry:
321293 ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP4]],
322294 ; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[TMP5]]
323295 ; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[SELF]], align 16
324 ; CHECK-NEXT: [[TMP7:%.*]] = xor i32 undef, undef
325 ; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[TMP7]], undef
326296 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32>
327297 ; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <4 x i32> [[TMP6]], [[RDX_SHUF]]
328298 ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32>
329299 ; CHECK-NEXT: [[BIN_RDX2:%.*]] = xor <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
330 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
331 ; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[TMP8]], undef
332 ; CHECK-NEXT: ret i32 [[TMP9]]
300 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
301 ; CHECK-NEXT: ret i32 [[TMP7]]
333302 ;
334303 entry:
335304 %0 = load <4 x i32>, <4 x i32>* %self, align 16
3232 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[ARRAYIDX2]] to <4 x i32>*
3333 ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4
3434 ; CHECK-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP9]]
35 ; CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 undef, [[A_088]]
3635 ; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 1
37 ; CHECK-NEXT: [[ADD24:%.*]] = add nsw i32 [[ADD10]], undef
3836 ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 2
39 ; CHECK-NEXT: [[ADD38:%.*]] = add nsw i32 [[ADD24]], undef
4037 ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* [[M2]], i64 0, i64 [[INDVARS_IV]], i64 3
4138 ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX6]] to <4 x i32>*
4239 ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP14]], align 16
4643 ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add nsw <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
4744 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
4845 ; CHECK-NEXT: [[OP_EXTRA]] = add nsw i32 [[TMP15]], [[A_088]]
49 ; CHECK-NEXT: [[ADD52:%.*]] = add nsw i32 [[ADD38]], undef
5046 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
5147 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8
5248 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1818 ; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef
1919 ; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> undef, <4 x i32>
2020 ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE8]],
21 ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 undef, undef
22 ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 undef, i32 undef
23 ; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], undef
24 ; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef
25 ; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], undef
2621 ; CHECK-NEXT: [[RDX_SHUF9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> undef, <4 x i32>
2722 ; CHECK-NEXT: [[RDX_MINMAX_CMP10:%.*]] = icmp sgt <4 x i32> [[TMP5]], [[RDX_SHUF9]]
2823 ; CHECK-NEXT: [[RDX_MINMAX_SELECT11:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP10]], <4 x i32> [[TMP5]], <4 x i32> [[RDX_SHUF9]]
3025 ; CHECK-NEXT: [[RDX_MINMAX_CMP13:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT11]], [[RDX_SHUF12]]
3126 ; CHECK-NEXT: [[RDX_MINMAX_SELECT14:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP13]], <4 x i32> [[RDX_MINMAX_SELECT11]], <4 x i32> [[RDX_SHUF12]]
3227 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT14]], i32 0
33 ; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef
3428 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef
3529 ; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], 63
3630 ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]]
3731 ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef
3832 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32>
3933 ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]],
40 ; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 undef, undef
41 ; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 undef, i32 undef
42 ; CHECK-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP27]], undef
43 ; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 undef, i32 [[TMP27]]
44 ; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 undef, undef
45 ; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 undef, i32 undef
46 ; CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP29]]
47 ; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP29]], i32 [[TMP32]]
48 ; CHECK-NEXT: [[TMP36:%.*]] = icmp sgt i32 undef, undef
49 ; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 undef, i32 undef
50 ; CHECK-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP37]], [[TMP34]]
51 ; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP34]], i32 [[TMP37]]
52 ; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 undef, undef
53 ; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 undef, i32 undef
54 ; CHECK-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], [[TMP39]]
5534 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <4 x i32>
5635 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP9]], [[RDX_SHUF]]
5736 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP9]], <4 x i32> [[RDX_SHUF]]
6948 ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = select i1 [[TMP14]], i32 [[OP_EXTRA5]], i32 undef
7049 ; CHECK-NEXT: [[TMP15:%.*]] = icmp slt i32 [[OP_EXTRA6]], undef
7150 ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = select i1 [[TMP15]], i32 [[OP_EXTRA6]], i32 undef
72 ; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP39]], i32 [[TMP42]]
7351 ; CHECK-NEXT: [[TMP45:%.*]] = icmp sgt i32 undef, [[OP_EXTRA7]]
7452 ; CHECK-NEXT: unreachable
7553 ;
1515 ; CHECK-NEXT: [[DOTSROA_RAW_IDX_7:%.*]] = getelementptr inbounds %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76", %"struct.std::h.0.4.8.12.16.20.24.28.248.0.1.2.3.76"* undef, i64 7, i32 1
1616 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x i32>*
1717 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
18 ; CHECK-NEXT: [[CMP_I1_4:%.*]] = icmp slt i32 undef, undef
19 ; CHECK-NEXT: [[DOTSROA_SPECULATED_4:%.*]] = select i1 [[CMP_I1_4]], i32 undef, i32 undef
20 ; CHECK-NEXT: [[CMP_I1_5:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_4]], undef
21 ; CHECK-NEXT: [[DOTSROA_SPECULATED_5:%.*]] = select i1 [[CMP_I1_5]], i32 undef, i32 [[DOTSROA_SPECULATED_4]]
22 ; CHECK-NEXT: [[CMP_I1_6:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_5]], undef
23 ; CHECK-NEXT: [[DOTSROA_SPECULATED_6:%.*]] = select i1 [[CMP_I1_6]], i32 undef, i32 [[DOTSROA_SPECULATED_5]]
24 ; CHECK-NEXT: [[CMP_I1_7:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_6]], undef
25 ; CHECK-NEXT: [[DOTSROA_SPECULATED_7:%.*]] = select i1 [[CMP_I1_7]], i32 undef, i32 [[DOTSROA_SPECULATED_6]]
26 ; CHECK-NEXT: [[CMP_I1_8:%.*]] = icmp slt i32 undef, undef
2718 ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32>
2819 ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP1]], [[RDX_SHUF]]
2920 ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP1]], <8 x i32> [[RDX_SHUF]]
3829 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 undef
3930 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[OP_EXTRA]], undef
4031 ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = select i1 [[TMP4]], i32 [[OP_EXTRA]], i32 undef
41 ; CHECK-NEXT: [[DOTSROA_SPECULATED_8:%.*]] = select i1 [[CMP_I1_8]], i32 undef, i32 undef
4232 ; CHECK-NEXT: [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32 undef, i32 [[OP_EXTRA7]]
4333 ; CHECK-NEXT: [[CMP_I1_10:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_9]], undef
4434 ; CHECK-NEXT: ret void
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt -slp-vectorizer -S -o - -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s
2
3 @k = external dso_local constant [8 x [4 x i32]], align 16
4 @l = external dso_local global [366 x i32], align 16
5
6 ; Function Attrs: nofree norecurse noreturn nounwind writeonly
7