llvm.org GIT mirror llvm / 9a1cf21
Merging r321870, r321872, and r321994: ------------------------------------------------------------------------ r321870 | abataev | 2018-01-05 07:20:40 -0800 (Fri, 05 Jan 2018) | 1 line [SLP] Update test checks, NFC. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r321872 | abataev | 2018-01-05 08:15:17 -0800 (Fri, 05 Jan 2018) | 1 line [SLP] Update more test checks, NFC. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r321994 | abataev | 2018-01-08 06:43:06 -0800 (Mon, 08 Jan 2018) | 13 lines [SLP] Fix PR35777: Incorrect handling of aggregate values. Summary: Fixes the bug with incorrect handling of InsertValue|InsertElement instrucions in SLP vectorizer. Currently, we may use incorrect ExtractElement instructions as the operands of the original InsertValue|InsertElement instructions. Reviewers: mkuper, hfinkel, RKSimon, spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D41767 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@322675 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 1 year, 8 months ago
6 changed file(s) with 348 addition(s) and 197 deletion(s). Raw diff Collapse all Expand all
9494 bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R);
9595
9696 /// \brief Try to vectorize a list of operands.
97 /// \@param BuildVector A list of users to ignore for the purpose of
98 /// scheduling and cost estimation when NeedExtraction
99 /// is false.
10097 /// \returns true if a value was vectorized.
10198 bool tryToVectorizeList(ArrayRef VL, slpvectorizer::BoUpSLP &R,
102 ArrayRef BuildVector = None,
103 bool AllowReorder = false,
104 bool NeedExtraction = false);
99 bool AllowReorder = false);
105100
106101 /// \brief Try to vectorize a chain that may start at the operands of \p I.
107102 bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
44154415 if (!A || !B)
44164416 return false;
44174417 Value *VL[] = { A, B };
4418 return tryToVectorizeList(VL, R, None, true);
4418 return tryToVectorizeList(VL, R, true);
44194419 }
44204420
44214421 bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R,
4422 ArrayRef BuildVector,
4423 bool AllowReorder,
4424 bool NeedExtraction) {
4422 bool AllowReorder) {
44254423 if (VL.size() < 2)
44264424 return false;
44274425
45154513 << "\n");
45164514 ArrayRef Ops = VL.slice(I, OpsWidth);
45174515
4518 ArrayRef EmptyArray;
4519 ArrayRef BuildVectorSlice;
4520 if (!BuildVector.empty())
4521 BuildVectorSlice = BuildVector.slice(I, OpsWidth);
4522
4523 R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice);
4516 R.buildTree(Ops);
45244517 // TODO: check if we can allow reordering for more cases.
45254518 if (AllowReorder && R.shouldReorder()) {
45264519 // Conceptually, there is nothing actually preventing us from trying to
45284521 // reductions. However, at this point, we only expect to get here when
45294522 // there are exactly two operations.
45304523 assert(Ops.size() == 2);
4531 assert(BuildVectorSlice.empty());
45324524 Value *ReorderedOps[] = {Ops[1], Ops[0]};
45334525 R.buildTree(ReorderedOps, None);
45344526 }
45484540 << " and with tree size "
45494541 << ore::NV("TreeSize", R.getTreeSize()));
45504542
4551 Value *VectorizedRoot = R.vectorizeTree();
4552
4553 // Reconstruct the build vector by extracting the vectorized root. This
4554 // way we handle the case where some elements of the vector are
4555 // undefined.
4556 // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
4557 if (!BuildVectorSlice.empty()) {
4558 // The insert point is the last build vector instruction. The
4559 // vectorized root will precede it. This guarantees that we get an
4560 // instruction. The vectorized tree could have been constant folded.
4561 Instruction *InsertAfter = cast(BuildVectorSlice.back());
4562 unsigned VecIdx = 0;
4563 for (auto &V : BuildVectorSlice) {
4564 IRBuilder Builder(InsertAfter->getParent(),
4565 ++BasicBlock::iterator(InsertAfter));
4566 Instruction *I = cast(V);
4567 assert(isa(I) || isa(I));
4568 Instruction *Extract =
4569 cast(Builder.CreateExtractElement(
4570 VectorizedRoot, Builder.getInt32(VecIdx++)));
4571 I->setOperand(1, Extract);
4572 I->moveAfter(Extract);
4573 InsertAfter = I;
4574 }
4575 }
4543 R.vectorizeTree();
45764544 // Move to the next bundle.
45774545 I += VF - 1;
45784546 NextInst = I + 1;
54935461 ///
54945462 /// Returns true if it matches
54955463 static bool findBuildVector(InsertElementInst *LastInsertElem,
5496 SmallVectorImpl &BuildVector,
54975464 SmallVectorImpl &BuildVectorOpds) {
54985465 Value *V = nullptr;
54995466 do {
5500 BuildVector.push_back(LastInsertElem);
55015467 BuildVectorOpds.push_back(LastInsertElem->getOperand(1));
55025468 V = LastInsertElem->getOperand(0);
55035469 if (isa(V))
55065472 if (!LastInsertElem || !LastInsertElem->hasOneUse())
55075473 return false;
55085474 } while (true);
5509 std::reverse(BuildVector.begin(), BuildVector.end());
55105475 std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
55115476 return true;
55125477 }
55155480 ///
55165481 /// \return true if it matches.
55175482 static bool findBuildAggregate(InsertValueInst *IV,
5518 SmallVectorImpl &BuildVector,
55195483 SmallVectorImpl &BuildVectorOpds) {
55205484 Value *V;
55215485 do {
5522 BuildVector.push_back(IV);
55235486 BuildVectorOpds.push_back(IV->getInsertedValueOperand());
55245487 V = IV->getAggregateOperand();
55255488 if (isa(V))
55285491 if (!IV || !IV->hasOneUse())
55295492 return false;
55305493 } while (true);
5531 std::reverse(BuildVector.begin(), BuildVector.end());
55325494 std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
55335495 return true;
55345496 }
57045666 if (!R.canMapToVector(IVI->getType(), DL))
57055667 return false;
57065668
5707 SmallVector BuildVector;
57085669 SmallVector BuildVectorOpds;
5709 if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds))
5670 if (!findBuildAggregate(IVI, BuildVectorOpds))
57105671 return false;
57115672
57125673 DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
57135674 // Aggregate value is unlikely to be processed in vector register, we need to
57145675 // extract scalars into scalar registers, so NeedExtraction is set true.
5715 return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true);
5676 return tryToVectorizeList(BuildVectorOpds, R);
57165677 }
57175678
57185679 bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
57195680 BasicBlock *BB, BoUpSLP &R) {
5720 SmallVector BuildVector;
57215681 SmallVector BuildVectorOpds;
5722 if (!findBuildVector(IEI, BuildVector, BuildVectorOpds))
5682 if (!findBuildVector(IEI, BuildVectorOpds))
57235683 return false;
57245684
57255685 // Vectorize starting with the build vector operands ignoring the BuildVector
57265686 // instructions for the purpose of scheduling and user extraction.
5727 return tryToVectorizeList(BuildVectorOpds, R, BuildVector);
5687 return tryToVectorizeList(BuildVectorOpds, R);
57285688 }
57295689
57305690 bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB,
58025762 // is done when there are exactly two elements since tryToVectorizeList
58035763 // asserts that there are only two values when AllowReorder is true.
58045764 bool AllowReorder = NumElts == 2;
5805 if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
5806 None, AllowReorder)) {
5765 if (NumElts > 1 &&
5766 tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, AllowReorder)) {
58075767 // Success start over because instructions might have been changed.
58085768 HaveVectorizedPhiNodes = true;
58095769 Changed = true;
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -verify -slp-vectorizer -o - -S -mtriple=x86_64-apple-macosx10.13.0 | FileCheck %s
2
3 @global = local_unnamed_addr global [6 x double] zeroinitializer, align 16
4
5 define { i64, i64 } @patatino(double %arg) {
6 ; CHECK-LABEL: @patatino(
7 ; CHECK-NEXT: bb:
8 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* bitcast ([6 x double]* @global to <2 x double>*), align 16
9 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16
10 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0
11 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1
12 ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]]
13 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]]
14 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16
15 ; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
16 ; CHECK-NEXT: [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32>
17 ; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
18 ; CHECK-NEXT: [[TMP10:%.*]] = trunc <2 x i64> [[TMP9]] to <2 x i32>
19 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0
20 ; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
21 ; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP12]], 0
22 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
23 ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
24 ; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i64, i64 } [[TMP16]], i64 [[TMP14]], 1
25 ; CHECK-NEXT: ret { i64, i64 } [[TMP17]]
26 ;
27 bb:
28 %tmp = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 0), align 16
29 %tmp1 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2), align 16
30 %tmp2 = fmul double %tmp1, %arg
31 %tmp3 = fadd double %tmp, %tmp2
32 %tmp4 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4), align 16
33 %tmp5 = fadd double %tmp4, %tmp3
34 %tmp6 = fptosi double %tmp5 to i32
35 %tmp7 = sext i32 %tmp6 to i64
36 %tmp8 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 1), align 8
37 %tmp9 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 3), align 8
38 %tmp10 = fmul double %tmp9, %arg
39 %tmp11 = fadd double %tmp8, %tmp10
40 %tmp12 = load double, double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 5), align 8
41 %tmp13 = fadd double %tmp12, %tmp11
42 %tmp14 = fptosi double %tmp13 to i32
43 %tmp15 = sext i32 %tmp14 to i64
44 %tmp16 = insertvalue { i64, i64 } undef, i64 %tmp7, 0
45 %tmp17 = insertvalue { i64, i64 } %tmp16, i64 %tmp15, 1
46 ret { i64, i64 } %tmp17
47 }
66
77 define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
88 ; CHECK-LABEL: @simple_select(
9 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
10 ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
9 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
10 ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
1111 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
1212 ; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
1313 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
1919 ; CHECK-NEXT: ret <4 x float> [[RD]]
2020 ;
2121 ; ZEROTHRESH-LABEL: @simple_select(
22 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
23 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
22 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
23 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
2424 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
2525 ; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
2626 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
6363 ; This entire tree is ephemeral, don't vectorize any of it.
6464 define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
6565 ; CHECK-LABEL: @simple_select_eph(
66 ; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
67 ; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
68 ; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
69 ; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
70 ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
71 ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
72 ; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
73 ; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
74 ; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
75 ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
76 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
77 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
66 ; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
67 ; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
68 ; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
69 ; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
70 ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
71 ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
72 ; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
73 ; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
74 ; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
75 ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
76 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
77 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
7878 ; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
7979 ; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
8080 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
9999 ; CHECK-NEXT: ret <4 x float> undef
100100 ;
101101 ; ZEROTHRESH-LABEL: @simple_select_eph(
102 ; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
103 ; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
104 ; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
105 ; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
106 ; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
107 ; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
108 ; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
109 ; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
110 ; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
111 ; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
112 ; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
113 ; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
102 ; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
103 ; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
104 ; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
105 ; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
106 ; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
107 ; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
108 ; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
109 ; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
110 ; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
111 ; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
112 ; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
113 ; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
114114 ; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
115115 ; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
116116 ; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
174174 ; doesn't matter
175175 define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
176176 ; CHECK-LABEL: @simple_select_insert_out_of_order(
177 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
178 ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
177 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
178 ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
179179 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
180180 ; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
181181 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
187187 ; CHECK-NEXT: ret <4 x float> [[RD]]
188188 ;
189189 ; ZEROTHRESH-LABEL: @simple_select_insert_out_of_order(
190 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
191 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
190 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
191 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
192192 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
193193 ; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
194194 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
232232 ; Multiple users of the final constructed vector
233233 define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
234234 ; CHECK-LABEL: @simple_select_users(
235 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
236 ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
235 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
236 ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
237237 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
238238 ; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
239239 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
246246 ; CHECK-NEXT: ret <4 x float> [[RD]]
247247 ;
248248 ; ZEROTHRESH-LABEL: @simple_select_users(
249 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
250 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
249 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
250 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
251251 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
252252 ; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
253253 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
290290 ; Unused insertelement
291291 define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
292292 ; CHECK-LABEL: @simple_select_no_users(
293 ; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
294 ; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
295 ; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
296 ; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
297 ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
298 ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
299 ; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
300 ; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
301 ; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
302 ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
303 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
304 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
293 ; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
294 ; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
295 ; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
296 ; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
297 ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
298 ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
299 ; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
300 ; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
301 ; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
302 ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
303 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
304 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
305305 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
306306 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
307307 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
329329 ; CHECK-NEXT: ret <4 x float> [[RD]]
330330 ;
331331 ; ZEROTHRESH-LABEL: @simple_select_no_users(
332 ; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
333 ; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
334 ; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
335 ; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
336 ; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
337 ; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
338 ; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2
339 ; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3
340 ; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
341 ; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
342 ; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2
343 ; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3
332 ; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
333 ; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
334 ; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
335 ; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
336 ; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
337 ; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
338 ; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
339 ; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
340 ; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
341 ; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
342 ; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
343 ; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
344344 ; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
345345 ; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
346346 ; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
386386 ; to do this backwards this backwards
387387 define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
388388 ; CHECK-LABEL: @reconstruct(
389 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
390 ; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
391 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
392 ; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
393 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
394 ; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
395 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
396 ; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
389 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
390 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[C]], i32 2
391 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 1
392 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[C]], i32 0
393 ; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
394 ; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP3]], i32 1
395 ; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP2]], i32 2
396 ; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP1]], i32 3
397397 ; CHECK-NEXT: ret <4 x i32> [[RD]]
398398 ;
399399 ; ZEROTHRESH-LABEL: @reconstruct(
400 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
401 ; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
402 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
403 ; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
404 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
405 ; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
406 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
407 ; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
400 ; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
401 ; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
402 ; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
403 ; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
404 ; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[C0]], i32 0
405 ; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1
406 ; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2
407 ; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3
408408 ; ZEROTHRESH-NEXT: ret <4 x i32> [[RD]]
409409 ;
410410 %c0 = extractelement <4 x i32> %c, i32 0
420420
421421 define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
422422 ; CHECK-LABEL: @simple_select_v2(
423 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> %c, zeroinitializer
424 ; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> %a, <2 x float> %b
423 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[C:%.*]], zeroinitializer
424 ; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]
425425 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
426426 ; CHECK-NEXT: [[RA:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0
427427 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
429429 ; CHECK-NEXT: ret <2 x float> [[RB]]
430430 ;
431431 ; ZEROTHRESH-LABEL: @simple_select_v2(
432 ; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <2 x i32> %c, i32 0
433 ; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <2 x i32> %c, i32 1
434 ; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <2 x float> %a, i32 0
435 ; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <2 x float> %a, i32 1
436 ; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <2 x float> %b, i32 0
437 ; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <2 x float> %b, i32 1
432 ; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 0
433 ; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <2 x i32> [[C]], i32 1
434 ; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
435 ; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <2 x float> [[A]], i32 1
436 ; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <2 x float> [[B:%.*]], i32 0
437 ; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <2 x float> [[B]], i32 1
438438 ; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
439439 ; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
440440 ; ZEROTHRESH-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
463463 ; (low cost threshold needed to force this to happen)
464464 define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
465465 ; CHECK-LABEL: @simple_select_partial_vector(
466 ; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
467 ; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
468 ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
469 ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
470 ; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
471 ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
466 ; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
467 ; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
468 ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
469 ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
470 ; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
471 ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
472472 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
473473 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
474474 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
484484 ; CHECK-NEXT: ret <4 x float> [[RB]]
485485 ;
486486 ; ZEROTHRESH-LABEL: @simple_select_partial_vector(
487 ; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
488 ; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
489 ; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0
490 ; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1
491 ; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0
492 ; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1
487 ; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
488 ; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
489 ; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
490 ; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
491 ; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
492 ; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
493493 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
494494 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
495495 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
529529 ; must be rescheduled. The case here is from compiling Julia.
530530 define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
531531 ; CHECK-LABEL: @reschedule_extract(
532 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
532 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
533533 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
534534 ; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
535535 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
541541 ; CHECK-NEXT: ret <4 x float> [[V3]]
542542 ;
543543 ; ZEROTHRESH-LABEL: @reschedule_extract(
544 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
544 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
545545 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
546546 ; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
547547 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
575575 ; instructions that are erased.
576576 define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
577577 ; CHECK-LABEL: @take_credit(
578 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
578 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
579579 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
580580 ; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
581581 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
587587 ; CHECK-NEXT: ret <4 x float> [[V3]]
588588 ;
589589 ; ZEROTHRESH-LABEL: @take_credit(
590 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b
590 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
591591 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
592592 ; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
593593 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
621621 define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
622622 ; CHECK-LABEL: @multi_tree(
623623 ; CHECK-NEXT: entry:
624 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
625 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
626 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
627 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
624 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
625 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1
626 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2
627 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3
628628 ; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]],
629629 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]]
630630 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
639639 ;
640640 ; ZEROTHRESH-LABEL: @multi_tree(
641641 ; ZEROTHRESH-NEXT: entry:
642 ; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
643 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
644 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
645 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
642 ; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0
643 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[X:%.*]], i32 1
644 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2
645 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3
646646 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]],
647647 ; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]]
648648 ; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
674674 define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
675675 ; CHECK-LABEL: @_vadd256(
676676 ; CHECK-NEXT: entry:
677 ; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b
677 ; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
678678 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
679679 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
680680 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
695695 ;
696696 ; ZEROTHRESH-LABEL: @_vadd256(
697697 ; ZEROTHRESH-NEXT: entry:
698 ; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b
698 ; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
699699 ; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
700700 ; ZEROTHRESH-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
701701 ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
12
2 ; CHECK-LABEL: julia_2xdouble
3 ; CHECK: load <2 x double>
4 ; CHECK: load <2 x double>
5 ; CHECK: fmul <2 x double>
6 ; CHECK: fadd <2 x double>
73 define void @julia_2xdouble([2 x double]* sret, [2 x double]*, [2 x double]*, [2 x double]*) {
4 ; CHECK-LABEL: @julia_2xdouble(
5 ; CHECK-NEXT: top:
6 ; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2:%.*]], i64 0, i64 0
7 ; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3:%.*]], i64 0, i64 0
8 ; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP2]], i64 0, i64 1
9 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PX0]] to <2 x double>*
10 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 4
11 ; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP3]], i64 0, i64 1
12 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[PY0]] to <2 x double>*
13 ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 4
14 ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]]
15 ; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1:%.*]], i64 0, i64 0
16 ; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [2 x double], [2 x double]* [[TMP1]], i64 0, i64 1
17 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[PZ0]] to <2 x double>*
18 ; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[TMP9]], align 4
19 ; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]]
20 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
21 ; CHECK-NEXT: [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0
22 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
23 ; CHECK-NEXT: [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1
24 ; CHECK-NEXT: store [2 x double] [[I1]], [2 x double]* [[TMP0:%.*]], align 4
25 ; CHECK-NEXT: ret void
26 ;
827 top:
928 %px0 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0
1029 %x0 = load double, double* %px0, align 4
2847 ret void
2948 }
3049
31 ; CHECK-LABEL: julia_4xfloat
32 ; CHECK: load <4 x float>
33 ; CHECK: load <4 x float>
34 ; CHECK: fmul <4 x float>
35 ; CHECK: fadd <4 x float>
3650 define void @julia_4xfloat([4 x float]* sret, [4 x float]*, [4 x float]*, [4 x float]*) {
51 ; CHECK-LABEL: @julia_4xfloat(
52 ; CHECK-NEXT: top:
53 ; CHECK-NEXT: [[PX0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2:%.*]], i64 0, i64 0
54 ; CHECK-NEXT: [[PY0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3:%.*]], i64 0, i64 0
55 ; CHECK-NEXT: [[PX1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 1
56 ; CHECK-NEXT: [[PY1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 1
57 ; CHECK-NEXT: [[PX2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 2
58 ; CHECK-NEXT: [[PY2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 2
59 ; CHECK-NEXT: [[PX3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP2]], i64 0, i64 3
60 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[PX0]] to <4 x float>*
61 ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
62 ; CHECK-NEXT: [[PY3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP3]], i64 0, i64 3
63 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[PY0]] to <4 x float>*
64 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
65 ; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]]
66 ; CHECK-NEXT: [[PZ0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1:%.*]], i64 0, i64 0
67 ; CHECK-NEXT: [[PZ1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 1
68 ; CHECK-NEXT: [[PZ2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 2
69 ; CHECK-NEXT: [[PZ3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[TMP1]], i64 0, i64 3
70 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[PZ0]] to <4 x float>*
71 ; CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4
72 ; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]]
73 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0
74 ; CHECK-NEXT: [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0
75 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1
76 ; CHECK-NEXT: [[I1:%.*]] = insertvalue [4 x float] [[I0]], float [[TMP13]], 1
77 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2
78 ; CHECK-NEXT: [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2
79 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3
80 ; CHECK-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3
81 ; CHECK-NEXT: store [4 x float] [[I3]], [4 x float]* [[TMP0:%.*]], align 4
82 ; CHECK-NEXT: ret void
83 ;
3784 top:
3885 %px0 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0
3986 %x0 = load float, float* %px0, align 4
75122 ret void
76123 }
77124
78 ; CHECK-LABEL: julia_load_array_of_float
79 ; CHECK: fsub <4 x float>
80125 define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x float]* %c) {
126 ; CHECK-LABEL: @julia_load_array_of_float(
127 ; CHECK-NEXT: top:
128 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x float]* [[A:%.*]] to <4 x float>*
129 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
130 ; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x float], [4 x float]* [[A]], align 4
131 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x float]* [[B:%.*]] to <4 x float>*
132 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
133 ; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x float], [4 x float]* [[B]], align 4
134 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
135 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
136 ; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0
137 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
138 ; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x float] [[C_ARR0]], float [[TMP6]], 1
139 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
140 ; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2
141 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
142 ; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3
143 ; CHECK-NEXT: store [4 x float] [[C_ARR3]], [4 x float]* [[C:%.*]], align 4
144 ; CHECK-NEXT: ret void
145 ;
81146 top:
82147 %a_arr = load [4 x float], [4 x float]* %a, align 4
83148 %a0 = extractvalue [4 x float] %a_arr, 0
101166 ret void
102167 }
103168
104 ; CHECK-LABEL: julia_load_array_of_i32
105 ; CHECK: load <4 x i32>
106 ; CHECK: load <4 x i32>
107 ; CHECK: sub <4 x i32>
108169 define void @julia_load_array_of_i32([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
170 ; CHECK-LABEL: @julia_load_array_of_i32(
171 ; CHECK-NEXT: top:
172 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x i32]* [[A:%.*]] to <4 x i32>*
173 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
174 ; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i32], [4 x i32]* [[A]], align 4
175 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast [4 x i32]* [[B:%.*]] to <4 x i32>*
176 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
177 ; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i32], [4 x i32]* [[B]], align 4
178 ; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
179 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
180 ; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0
181 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
182 ; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i32] [[C_ARR0]], i32 [[TMP6]], 1
183 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
184 ; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2
185 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
186 ; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3
187 ; CHECK-NEXT: store [4 x i32] [[C_ARR3]], [4 x i32]* [[C:%.*]], align 4
188 ; CHECK-NEXT: ret void
189 ;
109190 top:
110191 %a_arr = load [4 x i32], [4 x i32]* %a, align 4
111192 %a0 = extractvalue [4 x i32] %a_arr, 0
131212
132213 ; Almost identical to previous test, but for type that should NOT be vectorized.
133214 ;
134 ; CHECK-LABEL: julia_load_array_of_i16
135 ; CHECK-NOT: i2>
136215 define void @julia_load_array_of_i16([4 x i16]* %a, [4 x i16]* %b, [4 x i16]* %c) {
216 ; CHECK-LABEL: @julia_load_array_of_i16(
217 ; CHECK-NEXT: top:
218 ; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i16], [4 x i16]* [[A:%.*]], align 4
219 ; CHECK-NEXT: [[A0:%.*]] = extractvalue [4 x i16] [[A_ARR]], 0
220 ; CHECK-NEXT: [[A2:%.*]] = extractvalue [4 x i16] [[A_ARR]], 2
221 ; CHECK-NEXT: [[A1:%.*]] = extractvalue [4 x i16] [[A_ARR]], 1
222 ; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i16], [4 x i16]* [[B:%.*]], align 4
223 ; CHECK-NEXT: [[B0:%.*]] = extractvalue [4 x i16] [[B_ARR]], 0
224 ; CHECK-NEXT: [[B2:%.*]] = extractvalue [4 x i16] [[B_ARR]], 2
225 ; CHECK-NEXT: [[B1:%.*]] = extractvalue [4 x i16] [[B_ARR]], 1
226 ; CHECK-NEXT: [[A3:%.*]] = extractvalue [4 x i16] [[A_ARR]], 3
227 ; CHECK-NEXT: [[C1:%.*]] = sub i16 [[A1]], [[B1]]
228 ; CHECK-NEXT: [[B3:%.*]] = extractvalue [4 x i16] [[B_ARR]], 3
229 ; CHECK-NEXT: [[C0:%.*]] = sub i16 [[A0]], [[B0]]
230 ; CHECK-NEXT: [[C2:%.*]] = sub i16 [[A2]], [[B2]]
231 ; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i16] undef, i16 [[C0]], 0
232 ; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i16] [[C_ARR0]], i16 [[C1]], 1
233 ; CHECK-NEXT: [[C3:%.*]] = sub i16 [[A3]], [[B3]]
234 ; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i16] [[C_ARR1]], i16 [[C2]], 2
235 ; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i16] [[C_ARR2]], i16 [[C3]], 3
236 ; CHECK-NEXT: store [4 x i16] [[C_ARR3]], [4 x i16]* [[C:%.*]], align 4
237 ; CHECK-NEXT: ret void
238 ;
137239 top:
138240 %a_arr = load [4 x i16], [4 x i16]* %a, align 4
139241 %a0 = extractvalue [4 x i16] %a_arr, 0
159261
160262 %pseudovec = type { float, float, float, float }
161263
162 ; CHECK-LABEL: julia_load_struct_of_float
163 ; CHECK: load <4 x float>
164 ; CHECK: load <4 x float>
165 ; CHECK: fsub <4 x float>
166264 define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudovec* %c) {
265 ; CHECK-LABEL: @julia_load_struct_of_float(
266 ; CHECK-NEXT: top:
267 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast %pseudovec* [[A:%.*]] to <4 x float>*
268 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
269 ; CHECK-NEXT: [[A_STRUCT:%.*]] = load [[PSEUDOVEC:%.*]], %pseudovec* [[A]], align 4
270 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast %pseudovec* [[B:%.*]] to <4 x float>*
271 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
272 ; CHECK-NEXT: [[B_STRUCT:%.*]] = load [[PSEUDOVEC]], %pseudovec* [[B]], align 4
273 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]]
274 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
275 ; CHECK-NEXT: [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0
276 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
277 ; CHECK-NEXT: [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct0, float [[TMP6]], 1
278 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
279 ; CHECK-NEXT: [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct1, float [[TMP7]], 2
280 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
281 ; CHECK-NEXT: [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct2, float [[TMP8]], 3
282 ; CHECK-NEXT: store [[PSEUDOVEC]] %c_struct3, %pseudovec* [[C:%.*]], align 4
283 ; CHECK-NEXT: ret void
284 ;
167285 top:
168286 %a_struct = load %pseudovec, %pseudovec* %a, align 4
169287 %a0 = extractvalue %pseudovec %a_struct, 0
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt -slp-vectorizer < %s -S -mtriple="x86_64-grtev3-linux-gnu" -mcpu=corei7-avx | FileCheck %s
12
23 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3 target triple = "x86_64-grtev3-linux-gnu"
44
55 ; We used to crash on this example because we were building a constant
66 ; expression during vectorization and the vectorizer expects instructions
77 ; as elements of the vectorized tree.
8 ; CHECK-LABEL: @test
98 ; PR19621
109
1110 define void @test() {
11 ; CHECK-LABEL: @test(
12 ; CHECK-NEXT: bb279:
13 ; CHECK-NEXT: br label [[BB283:%.*]]
14 ; CHECK: bb283:
15 ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
16 ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP13:%.*]], [[EXIT]] ]
17 ; CHECK-NEXT: br label [[BB284:%.*]]
18 ; CHECK: bb284:
19 ; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
20 ; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
21 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
22 ; CHECK-NEXT: br label [[BB21_I:%.*]]
23 ; CHECK: bb21.i:
24 ; CHECK-NEXT: br i1 undef, label [[BB22_I:%.*]], label [[EXIT]]
25 ; CHECK: bb22.i:
26 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
27 ; CHECK-NEXT: br label [[BB32_I:%.*]]
28 ; CHECK: bb32.i:
29 ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
30 ; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]]
31 ; CHECK: exit:
32 ; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
33 ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> , [[TMP7]]
34 ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
35 ; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> undef, [[TMP9]]
36 ; CHECK-NEXT: [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
37 ; CHECK-NEXT: [[TMP317:%.*]] = fptrunc double undef to float
38 ; CHECK-NEXT: [[TMP319:%.*]] = fptrunc double undef to float
39 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP317]], i32 0
40 ; CHECK-NEXT: [[TMP13]] = insertelement <2 x float> [[TMP12]], float [[TMP319]], i32 1
41 ; CHECK-NEXT: br label [[BB283]]
42 ;
1243 bb279:
1344 br label %bb283
1445
6192 ; vectorizer starts at the type (%t2, %t3) and wil constant fold the tree.
6293 ; The code that handles insertelement instructions must handle this.
6394 define <4 x double> @constant_folding() {
95 ; CHECK-LABEL: @constant_folding(
96 ; CHECK-NEXT: entry:
97 ; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 1
98 ; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0
99 ; CHECK-NEXT: ret <4 x double> [[I2]]
100 ;
64101 entry:
65102 %t0 = fadd double 1.000000e+00 , 0.000000e+00
66103 %t1 = fadd double 1.000000e+00 , 1.000000e+00
70107 %i2 = insertelement <4 x double> %i1, double %t3, i32 0
71108 ret <4 x double> %i2
72109 }
73
74 ; CHECK-LABEL: @constant_folding
75 ; CHECK: %[[V0:.+]] = extractelement <2 x double> , i32 0
76 ; CHECK: %[[V1:.+]] = insertelement <4 x double> undef, double %[[V0]], i32 1
77 ; CHECK: %[[V2:.+]] = extractelement <2 x double> , i32 1
78 ; CHECK: %[[V3:.+]] = insertelement <4 x double> %[[V1]], double %[[V2]], i32 0
79 ; CHECK: ret <4 x double> %[[V3]]