llvm.org GIT mirror llvm / 0a8c54a
[Cost][X86] Add v2i64 truncation costs We are missing costs for a lot of truncation cases, I'm hoping to address all the 'zero cost' cases in trunc.ll I thought this was a vector widening side effect, but even before this we had some interesting LV decisions (notably over indvars) being made due to these zero costs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372498 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim a month ago
6 changed file(s) with 173 addition(s) and 121 deletion(s). Raw diff Collapse all Expand all
15651565 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
15661566 { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
15671567 { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
1568 { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // PSHUFB
15681569
15691570 { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
15701571 };
16361637 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 },
16371638 { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
16381639 { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 },
1640 { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 4 }, // PAND+3*PACKUSWB
1641 { ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 2 }, // PSHUFD+PSHUFLW
1642 { ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1 }, // PSHUFD
16391643 };
16401644
16411645 std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src);
3737 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
3838 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
3939 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
40 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
41 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
42 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
40 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
41 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
42 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
4343 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3)
4444 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
4545 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
5656 ; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
5757 ; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
5858 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
59 ; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
60 ; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
61 ; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
59 ; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
60 ; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
61 ; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
6262 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3)
6363 ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
6464 ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
170170 ; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
171171 ; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
172172 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
173 ; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
174 ; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
175 ; SLM-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
173 ; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
174 ; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
175 ; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
176176 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3)
177177 ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
178178 ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
189189 ; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
190190 ; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
191191 ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3)
192 ; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
193 ; GLM-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
194 ; GLM-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
192 ; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
193 ; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
194 ; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
195195 ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3)
196196 ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
197197 ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
271271 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
272272 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
273273 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
274 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
275 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
276 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
274 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
275 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
276 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
277277 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3)
278278 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
279279 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
290290 ; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
291291 ; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
292292 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
293 ; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
294 ; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
295 ; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
293 ; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
294 ; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
295 ; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
296296 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3)
297297 ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
298298 ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
404404 ; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
405405 ; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
406406 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
407 ; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
408 ; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
409 ; SLM-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
407 ; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
408 ; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
409 ; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
410410 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3)
411411 ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
412412 ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
423423 ; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3)
424424 ; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3)
425425 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3)
426 ; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
427 ; GLM-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
428 ; GLM-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
426 ; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3)
427 ; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3)
428 ; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3)
429429 ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3)
430430 ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3)
431431 ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3)
993993 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
994994 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
995995 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
996 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
997 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
998 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
996 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
997 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
998 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
999999 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
10001000 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
10011001 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
10121012 ; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
10131013 ; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
10141014 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1015 ; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1016 ; SSE42-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1017 ; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1015 ; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1016 ; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1017 ; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
10181018 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
10191019 ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
10201020 ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
11261126 ; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
11271127 ; SLM-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
11281128 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1129 ; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1130 ; SLM-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1131 ; SLM-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1129 ; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1130 ; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1131 ; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
11321132 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
11331133 ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
11341134 ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
11451145 ; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
11461146 ; GLM-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
11471147 ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1148 ; GLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1149 ; GLM-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1150 ; GLM-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1148 ; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1149 ; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1150 ; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
11511151 ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
11521152 ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
11531153 ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
12311231 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
12321232 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
12331233 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1234 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1235 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1236 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1234 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1235 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1236 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
12371237 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
12381238 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
12391239 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
12501250 ; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
12511251 ; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
12521252 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1253 ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1254 ; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1255 ; SSE42-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1253 ; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1254 ; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1255 ; SSE42-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
12561256 ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
12571257 ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
12581258 ; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
13641364 ; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
13651365 ; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
13661366 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1367 ; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1368 ; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1369 ; SLM-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1367 ; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1368 ; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1369 ; SLM-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
13701370 ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
13711371 ; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
13721372 ; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
13831383 ; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
13841384 ; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
13851385 ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1386 ; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1387 ; GLM-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1388 ; GLM-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1386 ; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1387 ; GLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1388 ; GLM-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
13891389 ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
13901390 ; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
13911391 ; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
8989 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D3 = zext <16 x i16> undef to <16 x i32>
9090 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D4 = zext <16 x i8> undef to <16 x i32>
9191 ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D5 = zext <16 x i1> undef to <16 x i32>
92 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = trunc <4 x i64> undef to <4 x i32>
92 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = trunc <4 x i64> undef to <4 x i32>
9393 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = trunc <8 x i32> undef to <8 x i16>
9494 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8>
9595 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8>
96 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8>
97 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %G = trunc <8 x i64> undef to <8 x i32>
96 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8>
97 ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G = trunc <8 x i64> undef to <8 x i32>
9898 ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16>
9999 ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8>
100100 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
119119 ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D3 = zext <16 x i16> undef to <16 x i32>
120120 ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D4 = zext <16 x i8> undef to <16 x i32>
121121 ; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D5 = zext <16 x i1> undef to <16 x i32>
122 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %E = trunc <4 x i64> undef to <4 x i32>
122 ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = trunc <4 x i64> undef to <4 x i32>
123123 ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F = trunc <8 x i32> undef to <8 x i16>
124124 ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8>
125125 ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8>
126 ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8>
127 ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %G = trunc <8 x i64> undef to <8 x i32>
126 ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8>
127 ; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G = trunc <8 x i64> undef to <8 x i32>
128128 ; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16>
129129 ; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8>
130130 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1212
1313 define i32 @trunc_vXi32() {
1414 ; SSE-LABEL: 'trunc_vXi32'
15 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32>
16 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32>
17 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32>
18 ; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32>
15 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32>
16 ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32>
17 ; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32>
18 ; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32>
1919 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2020 ;
2121 ; AVX1-LABEL: 'trunc_vXi32'
22 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32>
22 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32>
2323 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32>
2424 ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32>
2525 ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32>
2626 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2727 ;
2828 ; AVX2-LABEL: 'trunc_vXi32'
29 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32>
29 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32>
3030 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32>
3131 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32>
3232 ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32>
3333 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
3434 ;
3535 ; AVX512-LABEL: 'trunc_vXi32'
36 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32>
36 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32>
3737 ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32>
3838 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32>
3939 ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32>
4040 ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
4141 ;
4242 ; BTVER2-LABEL: 'trunc_vXi32'
43 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32>
43 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32>
4444 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32>
4545 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32>
4646 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32>
5555
5656 define i32 @trunc_vXi16() {
5757 ; SSE2-LABEL: 'trunc_vXi16'
58 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
59 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
60 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
61 ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
62 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16>
58 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
59 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
60 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
61 ; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
62 ; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16>
6363 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16>
6464 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16>
6565 ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16>
6868 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
6969 ;
7070 ; SSSE3-LABEL: 'trunc_vXi16'
71 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
72 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
73 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
74 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
75 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16>
71 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
72 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
73 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
74 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
75 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16>
7676 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16>
7777 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16>
7878 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16>
8181 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
8282 ;
8383 ; SSE42-LABEL: 'trunc_vXi16'
84 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
85 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
86 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
87 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
88 ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16>
84 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
85 ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
86 ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
87 ; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
88 ; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16>
8989 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16>
9090 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16>
9191 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16>
9494 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
9595 ;
9696 ; AVX1-LABEL: 'trunc_vXi16'
97 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
97 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
9898 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
9999 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
100100 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
107107 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
108108 ;
109109 ; AVX2-LABEL: 'trunc_vXi16'
110 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
110 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
111111 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
112112 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
113113 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
120120 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
121121 ;
122122 ; AVX512F-LABEL: 'trunc_vXi16'
123 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
123 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
124124 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
125125 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
126126 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
133133 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
134134 ;
135135 ; AVX512BW-LABEL: 'trunc_vXi16'
136 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
136 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
137137 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
138138 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
139139 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
146146 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
147147 ;
148148 ; BTVER2-LABEL: 'trunc_vXi16'
149 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
149 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16>
150150 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16>
151151 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16>
152152 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16>
174174
175175 define i32 @trunc_vXi8() {
176176 ; SSE2-LABEL: 'trunc_vXi8'
177 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
178 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8>
179 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
180 ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
181 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8>
182 ; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8>
177 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
178 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8>
179 ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
180 ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
181 ; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8>
182 ; SSE2-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8>
183183 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8>
184184 ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8>
185185 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8>
195195 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
196196 ;
197197 ; SSSE3-LABEL: 'trunc_vXi8'
198 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
199 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8>
200 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
201 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
202 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8>
203 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8>
198 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
199 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8>
200 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
201 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
202 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8>
203 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8>
204204 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8>
205205 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8>
206206 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8>
216216 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
217217 ;
218218 ; SSE42-LABEL: 'trunc_vXi8'
219 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
220 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8>
221 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
222 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
223 ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8>
224 ; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8>
219 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
220 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8>
221 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
222 ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
223 ; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8>
224 ; SSE42-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8>
225225 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8>
226226 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8>
227227 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8>
237237 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
238238 ;
239239 ; AVX1-LABEL: 'trunc_vXi8'
240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
241241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8>
242242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
243243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
258258 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
259259 ;
260260 ; AVX2-LABEL: 'trunc_vXi8'
261 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
261 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
262262 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8>
263263 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
264264 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
279279 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
280280 ;
281281 ; AVX512F-LABEL: 'trunc_vXi8'
282 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
282 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
283283 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8>
284284 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
285285 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
300300 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
301301 ;
302302 ; AVX512BW-LABEL: 'trunc_vXi8'
303 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
303 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
304304 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8>
305305 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
306306 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
321321 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
322322 ;
323323 ; BTVER2-LABEL: 'trunc_vXi8'
324 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
324 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i8>
325325 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8>
326326 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8>
327327 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8>
148148
149149 define void @smul_v16i32() {
150150 ; SSE-LABEL: @smul_v16i32(
151 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @a32 to <4 x i32>*), align 4
152 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4) to <4 x i32>*), align 4
153 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8) to <4 x i32>*), align 4
154 ; SSE-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12) to <4 x i32>*), align 4
155 ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @b32 to <4 x i32>*), align 4
156 ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4) to <4 x i32>*), align 4
157 ; SSE-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8) to <4 x i32>*), align 4
158 ; SSE-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12) to <4 x i32>*), align 4
159 ; SSE-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP5]], i32 3)
160 ; SSE-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP2]], <4 x i32> [[TMP6]], i32 3)
161 ; SSE-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP3]], <4 x i32> [[TMP7]], i32 3)
162 ; SSE-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP8]], i32 3)
163 ; SSE-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast ([16 x i32]* @c32 to <4 x i32>*), align 4
164 ; SSE-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4) to <4 x i32>*), align 4
165 ; SSE-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8) to <4 x i32>*), align 4
166 ; SSE-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12) to <4 x i32>*), align 4
151 ; SSE-NEXT: [[A0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 0), align 4
152 ; SSE-NEXT: [[A1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 1), align 4
153 ; SSE-NEXT: [[A2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 2), align 4
154 ; SSE-NEXT: [[A3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 3), align 4
155 ; SSE-NEXT: [[A4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 4), align 4
156 ; SSE-NEXT: [[A5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 5), align 4
157 ; SSE-NEXT: [[A6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 6), align 4
158 ; SSE-NEXT: [[A7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 7), align 4
159 ; SSE-NEXT: [[A8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 8), align 4
160 ; SSE-NEXT: [[A9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 9), align 4
161 ; SSE-NEXT: [[A10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 10), align 4
162 ; SSE-NEXT: [[A11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 11), align 4
163 ; SSE-NEXT: [[A12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 12), align 4
164 ; SSE-NEXT: [[A13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 13), align 4
165 ; SSE-NEXT: [[A14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 14), align 4
166 ; SSE-NEXT: [[A15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @a32, i32 0, i64 15), align 4
167 ; SSE-NEXT: [[B0:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 0), align 4
168 ; SSE-NEXT: [[B1:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 1), align 4
169 ; SSE-NEXT: [[B2:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 2), align 4
170 ; SSE-NEXT: [[B3:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 3), align 4
171 ; SSE-NEXT: [[B4:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 4), align 4
172 ; SSE-NEXT: [[B5:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 5), align 4
173 ; SSE-NEXT: [[B6:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 6), align 4
174 ; SSE-NEXT: [[B7:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 7), align 4
175 ; SSE-NEXT: [[B8:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 8), align 4
176 ; SSE-NEXT: [[B9:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 9), align 4
177 ; SSE-NEXT: [[B10:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 10), align 4
178 ; SSE-NEXT: [[B11:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 11), align 4
179 ; SSE-NEXT: [[B12:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 12), align 4
180 ; SSE-NEXT: [[B13:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 13), align 4
181 ; SSE-NEXT: [[B14:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 14), align 4
182 ; SSE-NEXT: [[B15:%.*]] = load i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @b32, i32 0, i64 15), align 4
183 ; SSE-NEXT: [[R0:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A0]], i32 [[B0]], i32 3)
184 ; SSE-NEXT: [[R1:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A1]], i32 [[B1]], i32 3)
185 ; SSE-NEXT: [[R2:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A2]], i32 [[B2]], i32 3)
186 ; SSE-NEXT: [[R3:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A3]], i32 [[B3]], i32 3)
187 ; SSE-NEXT: [[R4:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A4]], i32 [[B4]], i32 3)
188 ; SSE-NEXT: [[R5:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A5]], i32 [[B5]], i32 3)
189 ; SSE-NEXT: [[R6:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A6]], i32 [[B6]], i32 3)
190 ; SSE-NEXT: [[R7:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A7]], i32 [[B7]], i32 3)
191 ; SSE-NEXT: [[R8:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A8]], i32 [[B8]], i32 3)
192 ; SSE-NEXT: [[R9:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A9]], i32 [[B9]], i32 3)
193 ; SSE-NEXT: [[R10:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A10]], i32 [[B10]], i32 3)
194 ; SSE-NEXT: [[R11:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A11]], i32 [[B11]], i32 3)
195 ; SSE-NEXT: [[R12:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A12]], i32 [[B12]], i32 3)
196 ; SSE-NEXT: [[R13:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A13]], i32 [[B13]], i32 3)
197 ; SSE-NEXT: [[R14:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A14]], i32 [[B14]], i32 3)
198 ; SSE-NEXT: [[R15:%.*]] = call i32 @llvm.smul.fix.i32(i32 [[A15]], i32 [[B15]], i32 3)
199 ; SSE-NEXT: store i32 [[R0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 0), align 4
200 ; SSE-NEXT: store i32 [[R1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 1), align 4
201 ; SSE-NEXT: store i32 [[R2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 2), align 4
202 ; SSE-NEXT: store i32 [[R3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 3), align 4
203 ; SSE-NEXT: store i32 [[R4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 4), align 4
204 ; SSE-NEXT: store i32 [[R5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 5), align 4
205 ; SSE-NEXT: store i32 [[R6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 6), align 4
206 ; SSE-NEXT: store i32 [[R7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 7), align 4
207 ; SSE-NEXT: store i32 [[R8]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 8), align 4
208 ; SSE-NEXT: store i32 [[R9]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 9), align 4
209 ; SSE-NEXT: store i32 [[R10]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 10), align 4
210 ; SSE-NEXT: store i32 [[R11]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 11), align 4
211 ; SSE-NEXT: store i32 [[R12]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 12), align 4
212 ; SSE-NEXT: store i32 [[R13]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 13), align 4
213 ; SSE-NEXT: store i32 [[R14]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 14), align 4
214 ; SSE-NEXT: store i32 [[R15]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @c32, i32 0, i64 15), align 4
167215 ; SSE-NEXT: ret void
168216 ;
169217 ; SLM-LABEL: @smul_v16i32(