llvm.org GIT mirror llvm / c55d5fc
[CostModel][X86] Fix SLM <2 x i64> icmp costs SLM is 2 x slower for <2 x i64> comparison ops than other vector types, we should account for this like we do for SLM <2 x i64> add/sub/mul costs. This should remove some of the SLM codegen diffs in D43582 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@372954 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 1 year, 26 days ago
9 changed file(s) with 358 addition(s) and 87 deletion(s). Raw diff Collapse all Expand all
17961796 }
17971797 }
17981798
1799 static const CostTblEntry SLMCostTbl[] = {
1800 // slm pcmpeq/pcmpgt throughput is 2
1801 { ISD::SETCC, MVT::v2i64, 2 },
1802 };
1803
17991804 static const CostTblEntry AVX512BWCostTbl[] = {
18001805 { ISD::SETCC, MVT::v32i16, 1 },
18011806 { ISD::SETCC, MVT::v64i8, 1 },
18811886
18821887 { ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps
18831888 };
1889
1890 if (ST->isSLM())
1891 if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
1892 return LT.first * (ExtraCost + Entry->Cost);
18841893
18851894 if (ST->hasBWI())
18861895 if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
170170 ;
171171 ; SLM-LABEL: 'sadd'
172172 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
173 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
174 ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
175 ; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
173 ; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
174 ; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
175 ; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
176176 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
177177 ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
178178 ; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
408408 ;
409409 ; SLM-LABEL: 'uadd'
410410 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
411 ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
412 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
413 ; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
411 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
412 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
413 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
414414 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
415415 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
416416 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
646646 ;
647647 ; SLM-LABEL: 'ssub'
648648 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
649 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
650 ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
651 ; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
649 ; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
650 ; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
651 ; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
652652 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
653653 ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
654654 ; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
884884 ;
885885 ; SLM-LABEL: 'usub'
886886 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
887 ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
888 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
889 ; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
887 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
888 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
889 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
890890 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
891891 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
892892 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
11221122 ;
11231123 ; SLM-LABEL: 'smul'
11241124 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
1125 ; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1126 ; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1127 ; SLM-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1125 ; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1126 ; SLM-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1127 ; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
11281128 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
11291129 ; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
11301130 ; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
13601360 ;
13611361 ; SLM-LABEL: 'umul'
13621362 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1363 ; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1364 ; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1365 ; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1363 ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1364 ; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1365 ; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
13661366 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
13671367 ; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
13681368 ; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
169169 ;
170170 ; SLM-LABEL: 'add'
171171 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
172 ; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
173 ; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
174 ; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
172 ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
173 ; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
174 ; SLM-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
175175 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
176176 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
177177 ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
403403 ;
404404 ; SLM-LABEL: 'sub'
405405 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
406 ; SLM-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
407 ; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
408 ; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
406 ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
407 ; SLM-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
408 ; SLM-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
409409 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
410410 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
411411 ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
169169 ;
170170 ; SLM-LABEL: 'add'
171171 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
172 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
173 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
174 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
172 ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
173 ; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
174 ; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
175175 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
176176 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
177177 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
403403 ;
404404 ; SLM-LABEL: 'sub'
405405 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
406 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
407 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
408 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
406 ; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
407 ; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
408 ; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
409409 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
410410 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
411411 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
5656 ;
5757 ; SLM-LABEL: 'var_funnel_i64'
5858 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 %c64)
59 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128)
60 ; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
61 ; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
59 ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128)
60 ; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
61 ; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
6262 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
6363 ;
6464 ; GLM-LABEL: 'var_funnel_i64'
384384 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
385385 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
386386 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
387 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
388 ; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
389 ; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
387 ; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
388 ; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
389 ; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
390390 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
391391 ;
392392 ; GLM-LABEL: 'splatvar_funnel_i64'
776776 ;
777777 ; SLM-LABEL: 'constant_funnel_i64'
778778 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
779 ; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> )
780 ; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> )
781 ; SLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> >)
779 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> >)
780 ; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> )
781 ; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> )
782782 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
783783 ;
784784 ; GLM-LABEL: 'constant_funnel_i64'
10921092 ;
10931093 ; SLM-LABEL: 'splatconstant_funnel_i64'
10941094 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
1095 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> )
1096 ; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> )
1097 ; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> i64 7, i64 7>)
1095 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
1096 ; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> )
1097 ; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> )
10981098 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
10991099 ;
11001100 ; GLM-LABEL: 'splatconstant_funnel_i64'
5656 ;
5757 ; SLM-LABEL: 'var_funnel_i64'
5858 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 %c64)
59 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128)
60 ; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
61 ; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
59 ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128)
60 ; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
61 ; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
6262 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
6363 ;
6464 ; GLM-LABEL: 'var_funnel_i64'
384384 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <2 x i64> %c128, <2 x i64> undef, <2 x i32> zeroinitializer
385385 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <4 x i64> %c256, <4 x i64> undef, <4 x i32> zeroinitializer
386386 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <8 x i64> %c512, <8 x i64> undef, <8 x i32> zeroinitializer
387 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
388 ; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
389 ; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
387 ; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %u128)
388 ; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %u256)
389 ; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %u512)
390390 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
391391 ;
392392 ; GLM-LABEL: 'splatvar_funnel_i64'
776776 ;
777777 ; SLM-LABEL: 'constant_funnel_i64'
778778 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
779 ; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> )
780 ; SLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> )
781 ; SLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> >)
779 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> >)
780 ; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> )
781 ; SLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> )
782782 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
783783 ;
784784 ; GLM-LABEL: 'constant_funnel_i64'
10921092 ;
10931093 ; SLM-LABEL: 'splatconstant_funnel_i64'
10941094 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
1095 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> )
1096 ; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> )
1097 ; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> i64 7, i64 7>)
1095 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
1096 ; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> )
1097 ; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> )
10981098 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
10991099 ;
11001100 ; GLM-LABEL: 'splatconstant_funnel_i64'
1010 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+xop,+avx | FileCheck %s -check-prefixes=CHECK,AVX,XOPAVX1
1111 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=CHECK,AVX,XOPAVX2
1212 ;
13 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
13 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SLM
1414 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
1515 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2
1616
268268 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
269269 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
270270 ;
271 ; SLM-LABEL: 'cmp_int_eq'
272 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
273 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
274 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef
275 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef
276 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef
277 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 undef, undef
278 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef
279 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef
280 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef
281 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef
282 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 undef, undef
283 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef
284 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef
285 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef
286 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef
287 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 undef, undef
288 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef
289 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef
290 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef
291 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef
292 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
293 ;
271294 ; BTVER2-LABEL: 'cmp_int_eq'
272295 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 undef, undef
273296 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef
572595 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
573596 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
574597 ;
598 ; SLM-LABEL: 'cmp_int_ne'
599 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
600 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
601 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> undef, undef
602 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> undef, undef
603 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> undef, undef
604 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 undef, undef
605 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> undef, undef
606 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> undef, undef
607 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> undef, undef
608 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> undef, undef
609 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 undef, undef
610 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> undef, undef
611 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> undef, undef
612 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> undef, undef
613 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> undef, undef
614 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 undef, undef
615 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp ne <2 x i64> undef, undef
616 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp ne <4 x i64> undef, undef
617 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp ne <8 x i64> undef, undef
618 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp ne <16 x i64> undef, undef
619 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
620 ;
575621 ; BTVER2-LABEL: 'cmp_int_ne'
576622 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 undef, undef
577623 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> undef, undef
876922 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
877923 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
878924 ;
925 ; SLM-LABEL: 'cmp_int_sge'
926 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
927 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
928 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> undef, undef
929 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> undef, undef
930 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> undef, undef
931 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 undef, undef
932 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> undef, undef
933 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> undef, undef
934 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> undef, undef
935 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> undef, undef
936 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 undef, undef
937 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> undef, undef
938 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> undef, undef
939 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> undef, undef
940 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> undef, undef
941 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 undef, undef
942 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sge <2 x i64> undef, undef
943 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sge <4 x i64> undef, undef
944 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sge <8 x i64> undef, undef
945 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sge <16 x i64> undef, undef
946 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
947 ;
879948 ; BTVER2-LABEL: 'cmp_int_sge'
880949 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 undef, undef
881950 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> undef, undef
11801249 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
11811250 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
11821251 ;
1252 ; SLM-LABEL: 'cmp_int_uge'
1253 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
1254 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
1255 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> undef, undef
1256 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> undef, undef
1257 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> undef, undef
1258 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 undef, undef
1259 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> undef, undef
1260 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> undef, undef
1261 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> undef, undef
1262 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> undef, undef
1263 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 undef, undef
1264 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> undef, undef
1265 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> undef, undef
1266 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> undef, undef
1267 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> undef, undef
1268 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 undef, undef
1269 ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp uge <2 x i64> undef, undef
1270 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp uge <4 x i64> undef, undef
1271 ; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp uge <8 x i64> undef, undef
1272 ; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp uge <16 x i64> undef, undef
1273 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1274 ;
11831275 ; BTVER2-LABEL: 'cmp_int_uge'
11841276 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 undef, undef
11851277 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> undef, undef
14841576 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
14851577 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
14861578 ;
1579 ; SLM-LABEL: 'cmp_int_sgt'
1580 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
1581 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
1582 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> undef, undef
1583 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> undef, undef
1584 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> undef, undef
1585 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 undef, undef
1586 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> undef, undef
1587 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> undef, undef
1588 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> undef, undef
1589 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> undef, undef
1590 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 undef, undef
1591 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> undef, undef
1592 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> undef, undef
1593 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> undef, undef
1594 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> undef, undef
1595 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 undef, undef
1596 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp sgt <2 x i64> undef, undef
1597 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp sgt <4 x i64> undef, undef
1598 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp sgt <8 x i64> undef, undef
1599 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp sgt <16 x i64> undef, undef
1600 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1601 ;
14871602 ; BTVER2-LABEL: 'cmp_int_sgt'
14881603 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 undef, undef
14891604 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> undef, undef
17881903 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
17891904 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
17901905 ;
1906 ; SLM-LABEL: 'cmp_int_ugt'
1907 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
1908 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
1909 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> undef, undef
1910 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> undef, undef
1911 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> undef, undef
1912 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 undef, undef
1913 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> undef, undef
1914 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> undef, undef
1915 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> undef, undef
1916 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> undef, undef
1917 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 undef, undef
1918 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> undef, undef
1919 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> undef, undef
1920 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> undef, undef
1921 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> undef, undef
1922 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 undef, undef
1923 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ugt <2 x i64> undef, undef
1924 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ugt <4 x i64> undef, undef
1925 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ugt <8 x i64> undef, undef
1926 ; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ugt <16 x i64> undef, undef
1927 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1928 ;
17911929 ; BTVER2-LABEL: 'cmp_int_ugt'
17921930 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 undef, undef
17931931 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> undef, undef
20922230 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
20932231 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
20942232 ;
2233 ; SLM-LABEL: 'cmp_int_sle'
2234 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
2235 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
2236 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> undef, undef
2237 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> undef, undef
2238 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> undef, undef
2239 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 undef, undef
2240 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> undef, undef
2241 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> undef, undef
2242 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> undef, undef
2243 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> undef, undef
2244 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 undef, undef
2245 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> undef, undef
2246 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> undef, undef
2247 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> undef, undef
2248 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> undef, undef
2249 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 undef, undef
2250 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = icmp sle <2 x i64> undef, undef
2251 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = icmp sle <4 x i64> undef, undef
2252 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = icmp sle <8 x i64> undef, undef
2253 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I64 = icmp sle <16 x i64> undef, undef
2254 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2255 ;
20952256 ; BTVER2-LABEL: 'cmp_int_sle'
20962257 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 undef, undef
20972258 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> undef, undef
23962557 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
23972558 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
23982559 ;
2560 ; SLM-LABEL: 'cmp_int_ule'
2561 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
2562 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
2563 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> undef, undef
2564 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> undef, undef
2565 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> undef, undef
2566 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 undef, undef
2567 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> undef, undef
2568 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> undef, undef
2569 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> undef, undef
2570 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> undef, undef
2571 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 undef, undef
2572 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> undef, undef
2573 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> undef, undef
2574 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> undef, undef
2575 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> undef, undef
2576 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 undef, undef
2577 ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp ule <2 x i64> undef, undef
2578 ; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp ule <4 x i64> undef, undef
2579 ; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp ule <8 x i64> undef, undef
2580 ; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp ule <16 x i64> undef, undef
2581 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2582 ;
23992583 ; BTVER2-LABEL: 'cmp_int_ule'
24002584 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 undef, undef
24012585 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> undef, undef
27002884 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
27012885 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
27022886 ;
2887 ; SLM-LABEL: 'cmp_int_slt'
2888 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
2889 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
2890 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> undef, undef
2891 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> undef, undef
2892 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> undef, undef
2893 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 undef, undef
2894 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> undef, undef
2895 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> undef, undef
2896 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> undef, undef
2897 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> undef, undef
2898 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 undef, undef
2899 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> undef, undef
2900 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> undef, undef
2901 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> undef, undef
2902 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> undef, undef
2903 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 undef, undef
2904 ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = icmp slt <2 x i64> undef, undef
2905 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp slt <4 x i64> undef, undef
2906 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp slt <8 x i64> undef, undef
2907 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp slt <16 x i64> undef, undef
2908 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2909 ;
27032910 ; BTVER2-LABEL: 'cmp_int_slt'
27042911 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 undef, undef
27052912 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> undef, undef
30043211 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
30053212 ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
30063213 ;
3214 ; SLM-LABEL: 'cmp_int_ult'
3215 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
3216 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
3217 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> undef, undef
3218 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> undef, undef
3219 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> undef, undef
3220 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 undef, undef
3221 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> undef, undef
3222 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> undef, undef
3223 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> undef, undef
3224 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> undef, undef
3225 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 undef, undef
3226 ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> undef, undef
3227 ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> undef, undef
3228 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> undef, undef
3229 ; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> undef, undef
3230 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 undef, undef
3231 ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp ult <2 x i64> undef, undef
3232 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp ult <4 x i64> undef, undef
3233 ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp ult <8 x i64> undef, undef
3234 ; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp ult <16 x i64> undef, undef
3235 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
3236 ;
30073237 ; BTVER2-LABEL: 'cmp_int_ult'
30083238 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 undef, undef
30093239 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> undef, undef
6161 ; SSE-NEXT: ret void
6262 ;
6363 ; SLM-LABEL: @add_v8i64(
64 ; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
65 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
66 ; SLM-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
67 ; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
68 ; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
69 ; SLM-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
70 ; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
71 ; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
72 ; SLM-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP5]])
73 ; SLM-NEXT: [[TMP10:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> [[TMP2]], <2 x i64> [[TMP6]])
74 ; SLM-NEXT: [[TMP11:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP7]])
75 ; SLM-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP8]])
76 ; SLM-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
77 ; SLM-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
78 ; SLM-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
79 ; SLM-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
64 ; SLM-NEXT: [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
65 ; SLM-NEXT: [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
66 ; SLM-NEXT: [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
67 ; SLM-NEXT: [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
68 ; SLM-NEXT: [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
69 ; SLM-NEXT: [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
70 ; SLM-NEXT: [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
71 ; SLM-NEXT: [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
72 ; SLM-NEXT: [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
73 ; SLM-NEXT: [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
74 ; SLM-NEXT: [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
75 ; SLM-NEXT: [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
76 ; SLM-NEXT: [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
77 ; SLM-NEXT: [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
78 ; SLM-NEXT: [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
79 ; SLM-NEXT: [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
80 ; SLM-NEXT: [[R0:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A0]], i64 [[B0]])
81 ; SLM-NEXT: [[R1:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A1]], i64 [[B1]])
82 ; SLM-NEXT: [[R2:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A2]], i64 [[B2]])
83 ; SLM-NEXT: [[R3:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A3]], i64 [[B3]])
84 ; SLM-NEXT: [[R4:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A4]], i64 [[B4]])
85 ; SLM-NEXT: [[R5:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A5]], i64 [[B5]])
86 ; SLM-NEXT: [[R6:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A6]], i64 [[B6]])
87 ; SLM-NEXT: [[R7:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A7]], i64 [[B7]])
88 ; SLM-NEXT: store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
89 ; SLM-NEXT: store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
90 ; SLM-NEXT: store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
91 ; SLM-NEXT: store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
92 ; SLM-NEXT: store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
93 ; SLM-NEXT: store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
94 ; SLM-NEXT: store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
95 ; SLM-NEXT: store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
8096 ; SLM-NEXT: ret void
8197 ;
8298 ; AVX-LABEL: @add_v8i64(
6161 ; SSE-NEXT: ret void
6262 ;
6363 ; SLM-LABEL: @sub_v8i64(
64 ; SLM-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
65 ; SLM-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
66 ; SLM-NEXT: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <2 x i64>*), align 8
67 ; SLM-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6) to <2 x i64>*), align 8
68 ; SLM-NEXT: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @b64 to <2 x i64>*), align 8
69 ; SLM-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2) to <2 x i64>*), align 8
70 ; SLM-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <2 x i64>*), align 8
71 ; SLM-NEXT: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6) to <2 x i64>*), align 8
72 ; SLM-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP5]])
73 ; SLM-NEXT: [[TMP10:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP2]], <2 x i64> [[TMP6]])
74 ; SLM-NEXT: [[TMP11:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP7]])
75 ; SLM-NEXT: [[TMP12:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP8]])
76 ; SLM-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* bitcast ([8 x i64]* @c64 to <2 x i64>*), align 8
77 ; SLM-NEXT: store <2 x i64> [[TMP10]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2) to <2 x i64>*), align 8
78 ; SLM-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
79 ; SLM-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
64 ; SLM-NEXT: [[A0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
65 ; SLM-NEXT: [[A1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
66 ; SLM-NEXT: [[A2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
67 ; SLM-NEXT: [[A3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 3), align 8
68 ; SLM-NEXT: [[A4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4), align 8
69 ; SLM-NEXT: [[A5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 5), align 8
70 ; SLM-NEXT: [[A6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 6), align 8
71 ; SLM-NEXT: [[A7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 7), align 8
72 ; SLM-NEXT: [[B0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 0), align 8
73 ; SLM-NEXT: [[B1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 1), align 8
74 ; SLM-NEXT: [[B2:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 2), align 8
75 ; SLM-NEXT: [[B3:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 3), align 8
76 ; SLM-NEXT: [[B4:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4), align 8
77 ; SLM-NEXT: [[B5:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 5), align 8
78 ; SLM-NEXT: [[B6:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 6), align 8
79 ; SLM-NEXT: [[B7:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 7), align 8
80 ; SLM-NEXT: [[R0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A0]], i64 [[B0]])
81 ; SLM-NEXT: [[R1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A1]], i64 [[B1]])
82 ; SLM-NEXT: [[R2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A2]], i64 [[B2]])
83 ; SLM-NEXT: [[R3:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A3]], i64 [[B3]])
84 ; SLM-NEXT: [[R4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A4]], i64 [[B4]])
85 ; SLM-NEXT: [[R5:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A5]], i64 [[B5]])
86 ; SLM-NEXT: [[R6:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A6]], i64 [[B6]])
87 ; SLM-NEXT: [[R7:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A7]], i64 [[B7]])
88 ; SLM-NEXT: store i64 [[R0]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 0), align 8
89 ; SLM-NEXT: store i64 [[R1]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 1), align 8
90 ; SLM-NEXT: store i64 [[R2]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 2), align 8
91 ; SLM-NEXT: store i64 [[R3]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 3), align 8
92 ; SLM-NEXT: store i64 [[R4]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4), align 8
93 ; SLM-NEXT: store i64 [[R5]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 5), align 8
94 ; SLM-NEXT: store i64 [[R6]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6), align 8
95 ; SLM-NEXT: store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
8096 ; SLM-NEXT: ret void
8197 ;
8298 ; AVX-LABEL: @sub_v8i64(