llvm.org GIT mirror llvm / 93a8a3f
[CostModel][X86] Extend two src shuffle cost tests Cover most 128/256/512/1024-bit cases for vXf64/vXi64, vXf32/vXi32, vXi16 + vXi8 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310641 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 2 years ago
1 changed file(s) with 195 addition(s) and 17 deletion(s). Raw diff Collapse all Expand all
None ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 | FileCheck %s --check-prefix=SKX
0 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2
1 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSSE3
2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42
3 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1
4 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2
5 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
6 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
7 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VBMI
18
29 ;
310 ; Verify the cost model for 2 src shuffles
411 ;
512
6 ; SKX-LABEL: 'test_vXf64'
7 define void @test_vXf64(<4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) {
8 ; SKX: cost of 1 {{.*}} %V256 = shufflevector
13 ; CHECK-LABEL: 'test_vXf64'
14 define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512, <16 x double> %src1024, <2 x double> %src128_1, <4 x double> %src256_1, <8 x double> %src512_1, <16 x double> %src1024_1) {
15
16 ; SSE2: cost of 1 {{.*}} %V128 = shufflevector
17 ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
18 ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
19 ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
20 ; AVX2: cost of 1 {{.*}} %V128 = shufflevector
21 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
22 %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32>
23
24 ; SSE2: cost of 6 {{.*}} %V256 = shufflevector
25 ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
26 ; SSE42: cost of 6 {{.*}} %V256 = shufflevector
27 ; AVX1: cost of 6 {{.*}} %V256 = shufflevector
28 ; AVX2: cost of 6 {{.*}} %V256 = shufflevector
29 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
930 %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32>
1031
11 ; SKX: cost of 1 {{.*}} %V512 = shufflevector
32 ; SSE2: cost of 28 {{.*}} %V512 = shufflevector
33 ; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
34 ; SSE42: cost of 28 {{.*}} %V512 = shufflevector
35 ; AVX1: cost of 12 {{.*}} %V512 = shufflevector
36 ; AVX2: cost of 12 {{.*}} %V512 = shufflevector
37 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
1238 %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32>
1339
14 ; SKX: cost of 6 {{.*}} %V1024 = shufflevector
40 ; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
41 ; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
42 ; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
43 ; AVX1: cost of 24 {{.*}} %V1024 = shufflevector
44 ; AVX2: cost of 24 {{.*}} %V1024 = shufflevector
45 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
1546 %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32>
47
48 ret void
49 }
50
51 ; CHECK-LABEL: 'test_vXi64'
52 define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512, <16 x i64> %src1024, <2 x i64> %src128_1, <4 x i64> %src256_1, <8 x i64> %src512_1, <16 x i64> %src1024_1) {
53
54 ; SSE2: cost of 1 {{.*}} %V128 = shufflevector
55 ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
56 ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
57 ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
58 ; AVX2: cost of 1 {{.*}} %V128 = shufflevector
59 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
60 %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32>
61
62 ; SSE2: cost of 6 {{.*}} %V256 = shufflevector
63 ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
64 ; SSE42: cost of 6 {{.*}} %V256 = shufflevector
65 ; AVX1: cost of 8 {{.*}} %V256 = shufflevector
66 ; AVX2: cost of 8 {{.*}} %V256 = shufflevector
67 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
68 %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32>
69
70 ; SSE2: cost of 28 {{.*}} %V512 = shufflevector
71 ; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
72 ; SSE42: cost of 28 {{.*}} %V512 = shufflevector
73 ; AVX1: cost of 16 {{.*}} %V512 = shufflevector
74 ; AVX2: cost of 16 {{.*}} %V512 = shufflevector
75 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
76 %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32>
77
78 ; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
79 ; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
80 ; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
81 ; AVX1: cost of 32 {{.*}} %V1024 = shufflevector
82 ; AVX2: cost of 32 {{.*}} %V1024 = shufflevector
83 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
84 %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32>
1685
1786 ret void
1887 }
2089 ; CHECK-LABEL: 'test_vXf32'
2190 define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> %src512, <32 x float> %src1024, <4 x float> %src128_1, <8 x float> %src256_1, <16 x float> %src512_1, <32 x float> %src1024_1) {
2291
23 ; SKX: cost of 1 {{.*}} %V128 = shufflevector
92 ; SSE2: cost of 2 {{.*}} %V128 = shufflevector
93 ; SSSE3: cost of 2 {{.*}} %V128 = shufflevector
94 ; SSE42: cost of 2 {{.*}} %V128 = shufflevector
95 ; AVX1: cost of 2 {{.*}} %V128 = shufflevector
96 ; AVX2: cost of 2 {{.*}} %V128 = shufflevector
97 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
2498 %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32>
2599
26 ; SKX: cost of 1 {{.*}} %V256 = shufflevector
100 ; SSE2: cost of 12 {{.*}} %V256 = shufflevector
101 ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
102 ; SSE42: cost of 12 {{.*}} %V256 = shufflevector
103 ; AVX1: cost of 14 {{.*}} %V256 = shufflevector
104 ; AVX2: cost of 14 {{.*}} %V256 = shufflevector
105 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
27106 %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32>
28107
29 ; SKX: cost of 1 {{.*}} %V512 = shufflevector
108 ; SSE2: cost of 56 {{.*}} %V512 = shufflevector
109 ; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
110 ; SSE42: cost of 56 {{.*}} %V512 = shufflevector
111 ; AVX1: cost of 28 {{.*}} %V512 = shufflevector
112 ; AVX2: cost of 28 {{.*}} %V512 = shufflevector
113 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
30114 %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32>
31115
32 ; SKX: cost of 6 {{.*}} %V1024 = shufflevector
116 ; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
117 ; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
118 ; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
119 ; AVX1: cost of 56 {{.*}} %V1024 = shufflevector
120 ; AVX2: cost of 56 {{.*}} %V1024 = shufflevector
121 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
33122 %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32>
123
124 ret void
125 }
126
127 ; CHECK-LABEL: 'test_vXi32'
128 define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512, <32 x i32> %src1024, <4 x i32> %src128_1, <8 x i32> %src256_1, <16 x i32> %src512_1, <32 x i32> %src1024_1) {
129
130 ; SSE2: cost of 2 {{.*}} %V128 = shufflevector
131 ; SSSE3: cost of 2 {{.*}} %V128 = shufflevector
132 ; SSE42: cost of 2 {{.*}} %V128 = shufflevector
133 ; AVX1: cost of 2 {{.*}} %V128 = shufflevector
134 ; AVX2: cost of 2 {{.*}} %V128 = shufflevector
135 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
136 %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32>
137
138 ; SSE2: cost of 12 {{.*}} %V256 = shufflevector
139 ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
140 ; SSE42: cost of 12 {{.*}} %V256 = shufflevector
141 ; AVX1: cost of 16 {{.*}} %V256 = shufflevector
142 ; AVX2: cost of 16 {{.*}} %V256 = shufflevector
143 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
144 %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32>
145
146 ; SSE2: cost of 56 {{.*}} %V512 = shufflevector
147 ; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
148 ; SSE42: cost of 56 {{.*}} %V512 = shufflevector
149 ; AVX1: cost of 32 {{.*}} %V512 = shufflevector
150 ; AVX2: cost of 32 {{.*}} %V512 = shufflevector
151 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
152 %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32>
153
154 ; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
155 ; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
156 ; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
157 ; AVX1: cost of 64 {{.*}} %V1024 = shufflevector
158 ; AVX2: cost of 64 {{.*}} %V1024 = shufflevector
159 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
160 %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32>
34161
35162 ret void
36163 }
38165 ; CHECK-LABEL: 'test_vXi16'
39166 define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512, <64 x i16> %src1024, <8 x i16> %src128_1, <16 x i16> %src256_1, <32 x i16> %src512_1, <64 x i16> %src1024_1) {
40167
41 ; SKX: cost of 1 {{.*}} %V128 = shufflevector
168 ; SSE2: cost of 16 {{.*}} %V128 = shufflevector
169 ; SSSE3: cost of 3 {{.*}} %V128 = shufflevector
170 ; SSE42: cost of 3 {{.*}} %V128 = shufflevector
171 ; AVX1: cost of 3 {{.*}} %V128 = shufflevector
172 ; AVX2: cost of 3 {{.*}} %V128 = shufflevector
173 ; AVX512F: cost of 3 {{.*}} %V128 = shufflevector
174 ; AVX512BW: cost of 1 {{.*}} %V128 = shufflevector
175 ; AVX512VBMI: cost of 1 {{.*}} %V128 = shufflevector
42176 %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32>
43177
44 ; SKX: cost of 1 {{.*}} %V256 = shufflevector
178 ; SSE2: cost of 32 {{.*}} %V256 = shufflevector
179 ; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
180 ; SSE42: cost of 18 {{.*}} %V256 = shufflevector
181 ; AVX1: cost of 32 {{.*}} %V256 = shufflevector
182 ; AVX2: cost of 32 {{.*}} %V256 = shufflevector
183 ; AVX512F: cost of 32 {{.*}} %V256 = shufflevector
184 ; AVX512BW: cost of 1 {{.*}} %V256 = shufflevector
185 ; AVX512VBMI: cost of 1 {{.*}} %V256 = shufflevector
45186 %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32>
46187
47 ; SKX: cost of 1 {{.*}} %V512 = shufflevector
188 ; SSE2: cost of 64 {{.*}} %V512 = shufflevector
189 ; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
190 ; SSE42: cost of 84 {{.*}} %V512 = shufflevector
191 ; AVX1: cost of 64 {{.*}} %V512 = shufflevector
192 ; AVX2: cost of 64 {{.*}} %V512 = shufflevector
193 ; AVX512F: cost of 64 {{.*}} %V512 = shufflevector
194 ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
195 ; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector
48196 %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32>
49197
50 ; SKX: cost of 6 {{.*}} %V1024 = shufflevector
198 ; SSE2: cost of 128 {{.*}} %V1024 = shufflevector
199 ; SSSE3: cost of 360 {{.*}} %V1024 = shufflevector
200 ; SSE42: cost of 360 {{.*}} %V1024 = shufflevector
201 ; AVX1: cost of 128 {{.*}} %V1024 = shufflevector
202 ; AVX2: cost of 128 {{.*}} %V1024 = shufflevector
203 ; AVX512F: cost of 128 {{.*}} %V1024 = shufflevector
204 ; AVX512BW: cost of 6 {{.*}} %V1024 = shufflevector
205 ; AVX512VBMI: cost of 6 {{.*}} %V1024 = shufflevector
51206 %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32>
207
52208 ret void
53209 }
54210
55211 ; CHECK-LABEL: 'test_vXi8'
56212 define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512, <16 x i8> %src128_1, <32 x i8> %src256_1, <64 x i8> %src512_1) {
57 ; SKX: cost of 3 {{.*}} %V128 = shufflevector
213
214 ; SSE2: cost of 32 {{.*}} %V128 = shufflevector
215 ; SSSE3: cost of 3 {{.*}} %V128 = shufflevector
216 ; SSE42: cost of 3 {{.*}} %V128 = shufflevector
217 ; AVX1: cost of 3 {{.*}} %V128 = shufflevector
218 ; AVX2: cost of 3 {{.*}} %V128 = shufflevector
219 ; AVX512F: cost of 3 {{.*}} %V128 = shufflevector
220 ; AVX512BW: cost of 3 {{.*}} %V128 = shufflevector
221 ; AVX512VBMI: cost of 1 {{.*}} %V128 = shufflevector
58222 %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32>
59223
60 ; SKX: cost of 3 {{.*}} %V256 = shufflevector
224 ; SSE2: cost of 64 {{.*}} %V256 = shufflevector
225 ; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
226 ; SSE42: cost of 18 {{.*}} %V256 = shufflevector
227 ; AVX1: cost of 64 {{.*}} %V256 = shufflevector
228 ; AVX2: cost of 64 {{.*}} %V256 = shufflevector
229 ; AVX512F: cost of 64 {{.*}} %V256 = shufflevector
230 ; AVX512BW: cost of 3 {{.*}} %V256 = shufflevector
231 ; AVX512VBMI: cost of 1 {{.*}} %V256 = shufflevector
61232 %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32>
62233
63 ; SKX: cost of 19 {{.*}} %V512 = shufflevector
234 ; SSE2: cost of 128 {{.*}} %V512 = shufflevector
235 ; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
236 ; SSE42: cost of 84 {{.*}} %V512 = shufflevector
237 ; AVX1: cost of 128 {{.*}} %V512 = shufflevector
238 ; AVX2: cost of 128 {{.*}} %V512 = shufflevector
239 ; AVX512F: cost of 128 {{.*}} %V512 = shufflevector
240 ; AVX512BW: cost of 19 {{.*}} %V512 = shufflevector
241 ; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector
64242 %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32>
65243
66244 ret void