llvm.org GIT mirror llvm / ab39cf6
[CostModel][X86] Add min/max reduction costs for all SSE targets The original costs stopped at SSE42, I've added conservative estimates for everything down to SSE1/SSE2 and moved some of the SSE42 costs to SSE41 (really only the addition of PCMPGT makes any difference). I've also added missing vXi8 costs (we use PHMINPOSUW for i8/i16 for scarily quick results) and 256-bit vector costs for AVX1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360528 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 months ago
10 changed file(s) with 703 addition(s) and 601 deletion(s). Raw diff Collapse all Expand all
25512551 // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
25522552 // and make it as the cost.
25532553
2554 static const CostTblEntry SSE42CostTblPairWise[] = {
2554 static const CostTblEntry SSE1CostTblPairWise[] = {
2555 {ISD::FMINNUM, MVT::v4f32, 4},
2556 };
2557
2558 static const CostTblEntry SSE2CostTblPairWise[] = {
25552559 {ISD::FMINNUM, MVT::v2f64, 3},
2560 {ISD::SMIN, MVT::v2i64, 6},
2561 {ISD::UMIN, MVT::v2i64, 8},
2562 {ISD::SMIN, MVT::v4i32, 6},
2563 {ISD::UMIN, MVT::v4i32, 8},
2564 {ISD::SMIN, MVT::v8i16, 4},
2565 {ISD::UMIN, MVT::v8i16, 6},
2566 {ISD::SMIN, MVT::v16i8, 8},
2567 {ISD::UMIN, MVT::v16i8, 6},
2568 };
2569
2570 static const CostTblEntry SSE41CostTblPairWise[] = {
25562571 {ISD::FMINNUM, MVT::v4f32, 2},
2557 {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
2558 {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6"
2572 {ISD::SMIN, MVT::v2i64, 9},
2573 {ISD::UMIN, MVT::v2i64,10},
25592574 {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
25602575 {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
25612576 {ISD::SMIN, MVT::v8i16, 2},
25622577 {ISD::UMIN, MVT::v8i16, 2},
2578 {ISD::SMIN, MVT::v16i8, 3},
2579 {ISD::UMIN, MVT::v16i8, 3},
2580 };
2581
2582 static const CostTblEntry SSE42CostTblPairWise[] = {
2583 {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
2584 {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6"
25632585 };
25642586
25652587 static const CostTblEntry AVX1CostTblPairWise[] = {
25722594 {ISD::UMIN, MVT::v4i32, 1},
25732595 {ISD::SMIN, MVT::v8i16, 1},
25742596 {ISD::UMIN, MVT::v8i16, 1},
2597 {ISD::SMIN, MVT::v16i8, 2},
2598 {ISD::UMIN, MVT::v16i8, 2},
2599 {ISD::SMIN, MVT::v4i64, 7},
2600 {ISD::UMIN, MVT::v4i64, 7},
25752601 {ISD::SMIN, MVT::v8i32, 3},
25762602 {ISD::UMIN, MVT::v8i32, 3},
2603 {ISD::SMIN, MVT::v16i16, 3},
2604 {ISD::UMIN, MVT::v16i16, 3},
2605 {ISD::SMIN, MVT::v32i8, 3},
2606 {ISD::UMIN, MVT::v32i8, 3},
25772607 };
25782608
25792609 static const CostTblEntry AVX2CostTblPairWise[] = {
25962626 {ISD::UMIN, MVT::v16i32, 1},
25972627 };
25982628
2599 static const CostTblEntry SSE42CostTblNoPairWise[] = {
2629 static const CostTblEntry SSE1CostTblNoPairWise[] = {
2630 {ISD::FMINNUM, MVT::v4f32, 4},
2631 };
2632
2633 static const CostTblEntry SSE2CostTblNoPairWise[] = {
26002634 {ISD::FMINNUM, MVT::v2f64, 3},
2635 {ISD::SMIN, MVT::v2i64, 6},
2636 {ISD::UMIN, MVT::v2i64, 8},
2637 {ISD::SMIN, MVT::v4i32, 6},
2638 {ISD::UMIN, MVT::v4i32, 8},
2639 {ISD::SMIN, MVT::v8i16, 4},
2640 {ISD::UMIN, MVT::v8i16, 6},
2641 {ISD::SMIN, MVT::v16i8, 8},
2642 {ISD::UMIN, MVT::v16i8, 6},
2643 };
2644
2645 static const CostTblEntry SSE41CostTblNoPairWise[] = {
26012646 {ISD::FMINNUM, MVT::v4f32, 3},
2602 {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
2603 {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6"
2647 {ISD::SMIN, MVT::v2i64, 9},
2648 {ISD::UMIN, MVT::v2i64,11},
26042649 {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
26052650 {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
26062651 {ISD::SMIN, MVT::v8i16, 1}, // The data reported by the IACA is "1.5"
26072652 {ISD::UMIN, MVT::v8i16, 2}, // The data reported by the IACA is "1.8"
2653 {ISD::SMIN, MVT::v16i8, 3},
2654 {ISD::UMIN, MVT::v16i8, 3},
2655 };
2656
2657 static const CostTblEntry SSE42CostTblNoPairWise[] = {
2658 {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
2659 {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6"
26082660 };
26092661
26102662 static const CostTblEntry AVX1CostTblNoPairWise[] = {
26172669 {ISD::UMIN, MVT::v4i32, 1},
26182670 {ISD::SMIN, MVT::v8i16, 1},
26192671 {ISD::UMIN, MVT::v8i16, 1},
2672 {ISD::SMIN, MVT::v16i8, 2},
2673 {ISD::UMIN, MVT::v16i8, 2},
2674 {ISD::SMIN, MVT::v4i64, 7},
2675 {ISD::UMIN, MVT::v4i64, 7},
26202676 {ISD::SMIN, MVT::v8i32, 2},
26212677 {ISD::UMIN, MVT::v8i32, 2},
2678 {ISD::SMIN, MVT::v16i16, 2},
2679 {ISD::UMIN, MVT::v16i16, 2},
2680 {ISD::SMIN, MVT::v32i8, 2},
2681 {ISD::UMIN, MVT::v32i8, 2},
26222682 };
26232683
26242684 static const CostTblEntry AVX2CostTblNoPairWise[] = {
26572717 if (ST->hasSSE42())
26582718 if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
26592719 return LT.first * Entry->Cost;
2720
2721 if (ST->hasSSE41())
2722 if (const auto *Entry = CostTableLookup(SSE41CostTblPairWise, ISD, MTy))
2723 return LT.first * Entry->Cost;
2724
2725 if (ST->hasSSE2())
2726 if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
2727 return LT.first * Entry->Cost;
2728
2729 if (ST->hasSSE1())
2730 if (const auto *Entry = CostTableLookup(SSE1CostTblPairWise, ISD, MTy))
2731 return LT.first * Entry->Cost;
26602732 } else {
26612733 if (ST->hasAVX512())
26622734 if (const auto *Entry =
26732745
26742746 if (ST->hasSSE42())
26752747 if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
2748 return LT.first * Entry->Cost;
2749
2750 if (ST->hasSSE41())
2751 if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy))
2752 return LT.first * Entry->Cost;
2753
2754 if (ST->hasSSE2())
2755 if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
2756 return LT.first * Entry->Cost;
2757
2758 if (ST->hasSSE1())
2759 if (const auto *Entry = CostTableLookup(SSE1CostTblNoPairWise, ISD, MTy))
26762760 return LT.first * Entry->Cost;
26772761 }
26782762
1010 define i32 @reduce_i64(i32 %arg) {
1111 ; SSE2-LABEL: 'reduce_i64'
1212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
1717 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1818 ;
1919 ; SSSE3-LABEL: 'reduce_i64'
2020 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
2525 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2626 ;
2727 ; SSE42-LABEL: 'reduce_i64'
3535 ; AVX1-LABEL: 'reduce_i64'
3636 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
3737 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
4141 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
4242 ;
4343 ; AVX2-LABEL: 'reduce_i64'
6666
6767 define i32 @reduce_i32(i32 %arg) {
6868 ; SSE2-LABEL: 'reduce_i32'
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
7474 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
7575 ;
7676 ; SSSE3-LABEL: 'reduce_i32'
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
8282 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
8383 ;
8484 ; SSE42-LABEL: 'reduce_i32'
123123
124124 define i32 @reduce_i16(i32 %arg) {
125125 ; SSE2-LABEL: 'reduce_i16'
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
132132 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
133133 ;
134134 ; SSSE3-LABEL: 'reduce_i16'
135135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
141141 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
142142 ;
143143 ; SSE42-LABEL: 'reduce_i16'
153153 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
154154 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
155155 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
159159 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
160160 ;
161161 ; AVX2-LABEL: 'reduce_i16'
205205
206206 define i32 @reduce_i8(i32 %arg) {
207207 ; SSE2-LABEL: 'reduce_i8'
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
215215 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
216216 ;
217217 ; SSSE3-LABEL: 'reduce_i8'
218218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
225225 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
226226 ;
227227 ; SSE42-LABEL: 'reduce_i8'
228 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
229 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
230 ; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
228 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
229 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
230 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
235235 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
236236 ;
237237 ; AVX1-LABEL: 'reduce_i8'
238 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
239 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
238 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
239 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
245245 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
246246 ;
247247 ; AVX2-LABEL: 'reduce_i8'
248 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
249 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
250 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
248 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
249 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
250 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
252252 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
253253 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
254254 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
255255 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
256256 ;
257257 ; AVX512F-LABEL: 'reduce_i8'
258 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
259 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
260 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
258 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
259 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
260 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
262262 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
263263 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
264264 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
265265 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
266266 ;
267267 ; AVX512BW-LABEL: 'reduce_i8'
268 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
269 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
270 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
268 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
269 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
270 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
272272 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
273273 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
274274 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
275275 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
276276 ;
277277 ; AVX512DQ-LABEL: 'reduce_i8'
278 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
279 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
280 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
278 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
279 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
280 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
282282 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
283283 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
284284 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
1010 define i32 @reduce_i64(i32 %arg) {
1111 ; SSE2-LABEL: 'reduce_i64'
1212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
1717 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1818 ;
1919 ; SSSE3-LABEL: 'reduce_i64'
2020 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
2525 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2626 ;
2727 ; SSE42-LABEL: 'reduce_i64'
3535 ; AVX1-LABEL: 'reduce_i64'
3636 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v1i64(<1 x i64> undef)
3737 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> undef)
4141 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
4242 ;
4343 ; AVX2-LABEL: 'reduce_i64'
6666
6767 define i32 @reduce_i32(i32 %arg) {
6868 ; SSE2-LABEL: 'reduce_i32'
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
7474 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
7575 ;
7676 ; SSSE3-LABEL: 'reduce_i32'
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> undef)
8282 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
8383 ;
8484 ; SSE42-LABEL: 'reduce_i32'
123123
124124 define i32 @reduce_i16(i32 %arg) {
125125 ; SSE2-LABEL: 'reduce_i16'
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
132132 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
133133 ;
134134 ; SSSE3-LABEL: 'reduce_i16'
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
141141 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
142142 ;
143143 ; SSE42-LABEL: 'reduce_i16'
153153 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.i16.v2i16(<2 x i16> undef)
154154 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> undef)
155155 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> undef)
159159 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
160160 ;
161161 ; AVX2-LABEL: 'reduce_i16'
205205
206206 define i32 @reduce_i8(i32 %arg) {
207207 ; SSE2-LABEL: 'reduce_i8'
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
215215 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
216216 ;
217217 ; SSSE3-LABEL: 'reduce_i8'
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
225225 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
226226 ;
227227 ; SSE42-LABEL: 'reduce_i8'
228228 ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
229229 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
230230 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
235235 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
236236 ;
237237 ; AVX1-LABEL: 'reduce_i8'
238238 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
239239 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
240240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
245245 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
246246 ;
247247 ; AVX2-LABEL: 'reduce_i8'
248248 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
249249 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
250250 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
252252 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
253253 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
254254 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
258258 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
259259 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
260260 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
262262 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
263263 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
264264 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
268268 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
269269 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
270270 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
272272 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
273273 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
274274 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
278278 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.i8.v2i8(<2 x i8> undef)
279279 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.i8.v4i8(<4 x i8> undef)
280280 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> undef)
282282 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> undef)
283283 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> undef)
284284 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> undef)
1010 define i32 @reduce_i64(i32 %arg) {
1111 ; SSE2-LABEL: 'reduce_i64'
1212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
1717 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1818 ;
1919 ; SSSE3-LABEL: 'reduce_i64'
2020 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
2525 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2626 ;
2727 ; SSE42-LABEL: 'reduce_i64'
3535 ; AVX1-LABEL: 'reduce_i64'
3636 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
3737 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
4141 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
4242 ;
4343 ; AVX2-LABEL: 'reduce_i64'
6666
6767 define i32 @reduce_i32(i32 %arg) {
6868 ; SSE2-LABEL: 'reduce_i32'
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
7474 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
7575 ;
7676 ; SSSE3-LABEL: 'reduce_i32'
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
8282 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
8383 ;
8484 ; SSE42-LABEL: 'reduce_i32'
123123
124124 define i32 @reduce_i16(i32 %arg) {
125125 ; SSE2-LABEL: 'reduce_i16'
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
131131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
132132 ;
133133 ; SSSE3-LABEL: 'reduce_i16'
134 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
134 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
139139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
140140 ;
141141 ; SSE42-LABEL: 'reduce_i16'
149149 ; AVX1-LABEL: 'reduce_i16'
150150 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
151151 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
152 ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
153 ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
154 ; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
152 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
153 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
154 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
155155 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
156156 ;
157157 ; AVX2-LABEL: 'reduce_i16'
196196
197197 define i32 @reduce_i8(i32 %arg) {
198198 ; SSE2-LABEL: 'reduce_i8'
199 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
200 ; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
201 ; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
202 ; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
203 ; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
204 ; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
205 ; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
199 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
200 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
201 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
202 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
203 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
204 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
205 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
206206 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
207207 ;
208208 ; SSSE3-LABEL: 'reduce_i8'
209209 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
210 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
211 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
212 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
213 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
214 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
215 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
210 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
211 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
212 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
213 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
214 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
215 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
216216 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
217217 ;
218218 ; SSE42-LABEL: 'reduce_i8'
219 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
220 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
221 ; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
222 ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
223 ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
224 ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
225 ; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
219 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
220 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
221 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
222 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
223 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
224 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
225 ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
226226 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
227227 ;
228228 ; AVX1-LABEL: 'reduce_i8'
229 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
230 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
231 ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
232 ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
233 ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
234 ; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
235 ; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
229 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
230 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
231 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
232 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
233 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
234 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
235 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
236236 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
237237 ;
238238 ; AVX2-LABEL: 'reduce_i8'
239 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
240 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
241 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
242 ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
239 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
240 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
241 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
242 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
243243 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
244244 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
245245 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
246246 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
247247 ;
248248 ; AVX512F-LABEL: 'reduce_i8'
249 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
250 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
251 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
252 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
249 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
250 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
251 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
252 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
253253 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
254254 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
255255 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
256256 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
257257 ;
258258 ; AVX512BW-LABEL: 'reduce_i8'
259 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
260 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
261 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
262 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
259 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
260 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
261 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
262 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
263263 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
264264 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
265265 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
266266 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
267267 ;
268268 ; AVX512DQ-LABEL: 'reduce_i8'
269 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
270 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
271 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
272 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
269 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
270 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
271 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
272 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
273273 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
274274 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
275275 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
1010 define i32 @reduce_i64(i32 %arg) {
1111 ; SSE2-LABEL: 'reduce_i64'
1212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
1717 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1818 ;
1919 ; SSSE3-LABEL: 'reduce_i64'
2020 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
2525 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2626 ;
2727 ; SSE42-LABEL: 'reduce_i64'
3535 ; AVX1-LABEL: 'reduce_i64'
3636 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v1i64(<1 x i64> undef)
3737 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> undef)
4141 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
4242 ;
4343 ; AVX2-LABEL: 'reduce_i64'
6666
6767 define i32 @reduce_i32(i32 %arg) {
6868 ; SSE2-LABEL: 'reduce_i32'
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
7474 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
7575 ;
7676 ; SSSE3-LABEL: 'reduce_i32'
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.i32.v32i32(<32 x i32> undef)
8282 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
8383 ;
8484 ; SSE42-LABEL: 'reduce_i32'
123123
124124 define i32 @reduce_i16(i32 %arg) {
125125 ; SSE2-LABEL: 'reduce_i16'
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
132132 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
133133 ;
134134 ; SSSE3-LABEL: 'reduce_i16'
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
141141 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
142142 ;
143143 ; SSE42-LABEL: 'reduce_i16'
153153 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.i16.v2i16(<2 x i16> undef)
154154 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> undef)
155155 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.i16.v64i16(<64 x i16> undef)
159159 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
160160 ;
161161 ; AVX2-LABEL: 'reduce_i16'
205205
206206 define i32 @reduce_i8(i32 %arg) {
207207 ; SSE2-LABEL: 'reduce_i8'
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
215215 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
216216 ;
217217 ; SSSE3-LABEL: 'reduce_i8'
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
225225 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
226226 ;
227227 ; SSE42-LABEL: 'reduce_i8'
228228 ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
229229 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
230230 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
235235 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
236236 ;
237237 ; AVX1-LABEL: 'reduce_i8'
238238 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
239239 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
240240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
245245 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
246246 ;
247247 ; AVX2-LABEL: 'reduce_i8'
248248 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
249249 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
250250 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
252252 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
253253 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
254254 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
258258 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
259259 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
260260 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
262262 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
263263 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
264264 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
268268 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
269269 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
270270 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
272272 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
273273 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
274274 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
278278 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.i8.v2i8(<2 x i8> undef)
279279 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smin.i8.v4i8(<4 x i8> undef)
280280 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> undef)
282282 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.i8.v32i8(<32 x i8> undef)
283283 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.i8.v64i8(<64 x i8> undef)
284284 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.i8.v128i8(<128 x i8> undef)
1010 define i32 @reduce_i64(i32 %arg) {
1111 ; SSE2-LABEL: 'reduce_i64'
1212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
1717 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1818 ;
1919 ; SSSE3-LABEL: 'reduce_i64'
2020 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
2525 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2626 ;
2727 ; SSE42-LABEL: 'reduce_i64'
3535 ; AVX1-LABEL: 'reduce_i64'
3636 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
3737 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
4141 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
4242 ;
4343 ; AVX2-LABEL: 'reduce_i64'
6767 define i32 @reduce_i32(i32 %arg) {
6868 ; SSE2-LABEL: 'reduce_i32'
6969 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
7474 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
7575 ;
7676 ; SSSE3-LABEL: 'reduce_i32'
7777 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
8282 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
8383 ;
8484 ; SSE42-LABEL: 'reduce_i32'
123123
124124 define i32 @reduce_i16(i32 %arg) {
125125 ; SSE2-LABEL: 'reduce_i16'
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
132132 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
133133 ;
134134 ; SSSE3-LABEL: 'reduce_i16'
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
141141 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
142142 ;
143143 ; SSE42-LABEL: 'reduce_i16'
153153 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
154154 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
155155 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
159159 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
160160 ;
161161 ; AVX2-LABEL: 'reduce_i16'
205205
206206 define i32 @reduce_i8(i32 %arg) {
207207 ; SSE2-LABEL: 'reduce_i8'
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
215215 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
216216 ;
217217 ; SSSE3-LABEL: 'reduce_i8'
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
225225 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
226226 ;
227227 ; SSE42-LABEL: 'reduce_i8'
228 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
229 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
230 ; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
228 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
229 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
230 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
235235 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
236236 ;
237237 ; AVX1-LABEL: 'reduce_i8'
238 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
239 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
238 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
239 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
245245 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
246246 ;
247247 ; AVX2-LABEL: 'reduce_i8'
248 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
249 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
250 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
248 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
249 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
250 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
252252 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
253253 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
254254 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
255255 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
256256 ;
257257 ; AVX512F-LABEL: 'reduce_i8'
258 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
259 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
260 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
258 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
259 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
260 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
262262 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
263263 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
264264 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
265265 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
266266 ;
267267 ; AVX512BW-LABEL: 'reduce_i8'
268 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
269 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
270 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
268 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
269 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
270 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
272272 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
273273 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
274274 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
275275 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
276276 ;
277277 ; AVX512DQ-LABEL: 'reduce_i8'
278 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
279 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
280 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
278 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
279 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
280 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
282282 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
283283 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
284284 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
1010 define i32 @reduce_i64(i32 %arg) {
1111 ; SSE2-LABEL: 'reduce_i64'
1212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
1717 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1818 ;
1919 ; SSSE3-LABEL: 'reduce_i64'
2020 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
2525 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2626 ;
2727 ; SSE42-LABEL: 'reduce_i64'
3535 ; AVX1-LABEL: 'reduce_i64'
3636 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v1i64(<1 x i64> undef)
3737 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> undef)
4141 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
4242 ;
4343 ; AVX2-LABEL: 'reduce_i64'
6666
6767 define i32 @reduce_i32(i32 %arg) {
6868 ; SSE2-LABEL: 'reduce_i32'
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
7474 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
7575 ;
7676 ; SSSE3-LABEL: 'reduce_i32'
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.i32.v32i32(<32 x i32> undef)
8282 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
8383 ;
8484 ; SSE42-LABEL: 'reduce_i32'
123123
124124 define i32 @reduce_i16(i32 %arg) {
125125 ; SSE2-LABEL: 'reduce_i16'
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
132132 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
133133 ;
134134 ; SSSE3-LABEL: 'reduce_i16'
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
141141 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
142142 ;
143143 ; SSE42-LABEL: 'reduce_i16'
153153 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.i16.v2i16(<2 x i16> undef)
154154 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> undef)
155155 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.i16.v64i16(<64 x i16> undef)
159159 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
160160 ;
161161 ; AVX2-LABEL: 'reduce_i16'
205205
206206 define i32 @reduce_i8(i32 %arg) {
207207 ; SSE2-LABEL: 'reduce_i8'
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
215215 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
216216 ;
217217 ; SSSE3-LABEL: 'reduce_i8'
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
225225 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
226226 ;
227227 ; SSE42-LABEL: 'reduce_i8'
228228 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
229229 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
230230 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
235235 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
236236 ;
237237 ; AVX1-LABEL: 'reduce_i8'
238238 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
239239 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
240240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
245245 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
246246 ;
247247 ; AVX2-LABEL: 'reduce_i8'
248248 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
249249 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
250250 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
252252 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
253253 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
254254 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
258258 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
259259 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
260260 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
262262 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
263263 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
264264 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
268268 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
269269 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
270270 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
272272 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
273273 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
274274 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
278278 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.i8.v2i8(<2 x i8> undef)
279279 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umax.i8.v4i8(<4 x i8> undef)
280280 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> undef)
282282 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.i8.v32i8(<32 x i8> undef)
283283 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.i8.v64i8(<64 x i8> undef)
284284 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.i8.v128i8(<128 x i8> undef)
1010 define i32 @reduce_i64(i32 %arg) {
1111 ; SSE2-LABEL: 'reduce_i64'
1212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
1717 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1818 ;
1919 ; SSSE3-LABEL: 'reduce_i64'
2020 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
2525 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2626 ;
2727 ; SSE42-LABEL: 'reduce_i64'
3535 ; AVX1-LABEL: 'reduce_i64'
3636 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
3737 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
4141 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
4242 ;
4343 ; AVX2-LABEL: 'reduce_i64'
6767 define i32 @reduce_i32(i32 %arg) {
6868 ; SSE2-LABEL: 'reduce_i32'
6969 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
7474 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
7575 ;
7676 ; SSSE3-LABEL: 'reduce_i32'
7777 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
8282 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
8383 ;
8484 ; SSE42-LABEL: 'reduce_i32'
123123
124124 define i32 @reduce_i16(i32 %arg) {
125125 ; SSE2-LABEL: 'reduce_i16'
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
132132 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
133133 ;
134134 ; SSSE3-LABEL: 'reduce_i16'
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
141141 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
142142 ;
143143 ; SSE42-LABEL: 'reduce_i16'
153153 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
154154 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
155155 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
159159 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
160160 ;
161161 ; AVX2-LABEL: 'reduce_i16'
205205
206206 define i32 @reduce_i8(i32 %arg) {
207207 ; SSE2-LABEL: 'reduce_i8'
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
215215 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
216216 ;
217217 ; SSSE3-LABEL: 'reduce_i8'
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
225225 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
226226 ;
227227 ; SSE42-LABEL: 'reduce_i8'
228 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
229 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
230 ; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
228 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
229 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
230 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
235235 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
236236 ;
237237 ; AVX1-LABEL: 'reduce_i8'
238 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
239 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
238 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
239 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
245245 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
246246 ;
247247 ; AVX2-LABEL: 'reduce_i8'
248 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
249 ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
250 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
248 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
249 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
250 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
252252 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
253253 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
254254 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
255255 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
256256 ;
257257 ; AVX512F-LABEL: 'reduce_i8'
258 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
259 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
260 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
258 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
259 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
260 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
262262 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
263263 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
264264 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
265265 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
266266 ;
267267 ; AVX512BW-LABEL: 'reduce_i8'
268 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
269 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
270 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
268 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
269 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
270 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
272272 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
273273 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
274274 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
275275 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
276276 ;
277277 ; AVX512DQ-LABEL: 'reduce_i8'
278 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
279 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
280 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
278 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
279 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
280 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
282282 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
283283 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
284284 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
1010 define i32 @reduce_i64(i32 %arg) {
1111 ; SSE2-LABEL: 'reduce_i64'
1212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
13 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
14 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
15 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
16 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
1717 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1818 ;
1919 ; SSSE3-LABEL: 'reduce_i64'
2020 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
21 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
22 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
23 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
24 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
2525 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2626 ;
2727 ; SSE42-LABEL: 'reduce_i64'
3535 ; AVX1-LABEL: 'reduce_i64'
3636 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v1i64(<1 x i64> undef)
3737 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
38 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.i64.v4i64(<4 x i64> undef)
39 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> undef)
40 ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> undef)
4141 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
4242 ;
4343 ; AVX2-LABEL: 'reduce_i64'
6666
6767 define i32 @reduce_i32(i32 %arg) {
6868 ; SSE2-LABEL: 'reduce_i32'
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
69 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
70 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
71 ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
72 ; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
73 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
7474 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
7575 ;
7676 ; SSSE3-LABEL: 'reduce_i32'
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
77 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.i32.v2i32(<2 x i32> undef)
78 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> undef)
79 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.i32.v8i32(<8 x i32> undef)
80 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> undef)
81 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.i32.v32i32(<32 x i32> undef)
8282 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
8383 ;
8484 ; SSE42-LABEL: 'reduce_i32'
123123
124124 define i32 @reduce_i16(i32 %arg) {
125125 ; SSE2-LABEL: 'reduce_i16'
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
126 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
127 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
128 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
129 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
130 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
131 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
132132 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
133133 ;
134134 ; SSSE3-LABEL: 'reduce_i16'
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
135 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
136 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
137 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
138 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
139 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
140 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
141141 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
142142 ;
143143 ; SSE42-LABEL: 'reduce_i16'
153153 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.i16.v2i16(<2 x i16> undef)
154154 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> undef)
155155 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
156 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> undef)
157 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.i16.v32i16(<32 x i16> undef)
158 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.i16.v64i16(<64 x i16> undef)
159159 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
160160 ;
161161 ; AVX2-LABEL: 'reduce_i16'
205205
206206 define i32 @reduce_i8(i32 %arg) {
207207 ; SSE2-LABEL: 'reduce_i8'
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
208 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
209 ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
210 ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
211 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
212 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
213 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
214 ; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
215215 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
216216 ;
217217 ; SSSE3-LABEL: 'reduce_i8'
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
218 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
219 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
220 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
221 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
222 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
223 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
224 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
225225 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
226226 ;
227227 ; SSE42-LABEL: 'reduce_i8'
228228 ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
229229 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
230230 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
231 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
232 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
233 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
234 ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
235235 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
236236 ;
237237 ; AVX1-LABEL: 'reduce_i8'
238238 ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
239239 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
240240 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
241 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
242 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
243 ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
244 ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
245245 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
246246 ;
247247 ; AVX2-LABEL: 'reduce_i8'
248248 ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
249249 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
250250 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
251 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
252252 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
253253 ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
254254 ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
258258 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
259259 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
260260 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
261 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
262262 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
263263 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
264264 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
268268 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
269269 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
270270 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
271 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
272272 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
273273 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
274274 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
278278 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.i8.v2i8(<2 x i8> undef)
279279 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.umin.i8.v4i8(<4 x i8> undef)
280280 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
281 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> undef)
282282 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.i8.v32i8(<32 x i8> undef)
283283 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.i8.v64i8(<64 x i8> undef)
284284 ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.i8.v128i8(<128 x i8> undef)
676676 ; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4
677677 ; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]]
678678 ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
679 ; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
680 ; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
681 ; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
682 ; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
683 ; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
684 ; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
685 ; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
686 ; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
687 ; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
688 ; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
679 ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
680 ; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
681 ; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
682 ; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
683 ; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
684 ; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
685 ; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
686 ; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
687 ; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
688 ; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
689689 ; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
690 ; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
691 ; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
692 ; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
693 ; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
694 ; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
695 ; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
696 ; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
697 ; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4
698 ; SSE-NEXT: store i32 [[TMP24]], i32* @var, align 8
699 ; SSE-NEXT: ret i32 [[TMP23]]
690 ; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32>
691 ; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
692 ; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
693 ; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32>
694 ; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
695 ; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
696 ; SSE-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
697 ; SSE-NEXT: [[TMP18:%.*]] = icmp sgt i32 [[TMP17]], [[TMP15]]
698 ; SSE-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP17]], i32 [[TMP15]]
699 ; SSE-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP19]], [[TMP5]]
700 ; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP20]], i32 [[TMP19]], i32 [[TMP5]]
701 ; SSE-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
702 ; SSE-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
703 ; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[OP_EXTRA]], [[TMP22]]
704 ; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[OP_EXTRA]], i32 [[TMP22]]
705 ; SSE-NEXT: [[TMP25:%.*]] = select i1 [[TMP4]], i32 3, i32 4
706 ; SSE-NEXT: store i32 [[TMP25]], i32* @var, align 8
707 ; SSE-NEXT: ret i32 [[TMP24]]
700708 ;
701709 ; AVX-LABEL: @maxi8_mutiple_uses(
702710 ; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16
844852 ; SSE-NEXT: br label [[PP:%.*]]
845853 ; SSE: pp:
846854 ; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]]
847 ; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8
848 ; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
849 ; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]]
850 ; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4
851 ; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]]
852 ; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
853 ; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16
854 ; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]]
855 ; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]]
856 ; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4
855 ; SSE-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8
856 ; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], undef
857 ; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 undef
858 ; SSE-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef
859 ; SSE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef
860 ; SSE-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef
861 ; SSE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef
862 ; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef
863 ; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef
864 ; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
857865 ; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]]
858866 ; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]]
859 ; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8
867 ; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
860868 ; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]]
861 ; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
862 ; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4
863 ; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]]
864 ; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]]
865 ; SSE-NEXT: ret i32 [[TMP23]]
869 ; SSE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32>
870 ; SSE-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]]
871 ; SSE-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]]
872 ; SSE-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32>
873 ; SSE-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
874 ; SSE-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]]
875 ; SSE-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0
876 ; SSE-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], [[TMP15]]
877 ; SSE-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP15]]
878 ; SSE-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], [[TMP18]]
879 ; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP18]]
880 ; SSE-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP5]]
881 ; SSE-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP5]]
882 ; SSE-NEXT: [[TMP26:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]]
883 ; SSE-NEXT: ret i32 [[OP_EXTRA]]
866884 ;
867885 ; AVX-LABEL: @maxi8_wrong_parent(
868886 ; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16