llvm.org GIT mirror llvm / 9a17eb1
[CostModel][X86] Add avx1 two-src shuffle costs git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310650 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 2 years ago
3 changed file(s) with 35 addition(s) and 26 deletion(s). Raw diff Collapse all Expand all
892892 // + 2*por + vinsertf128
893893 { TTI::SK_PermuteSingleSrc, MVT::v32i8, 8 }, // vextractf128 + 4*pshufb
894894 // + 2*por + vinsertf128
895
896 { TTI::SK_PermuteTwoSrc, MVT::v4f64, 4 }, // 2*vperm2f128 + 2*vshufpd
897 { TTI::SK_PermuteTwoSrc, MVT::v8f32, 4 }, // 2*vperm2f128 + 2*vshufps
898 { TTI::SK_PermuteTwoSrc, MVT::v4i64, 4 }, // 2*vperm2f128 + 2*vshufpd
899 { TTI::SK_PermuteTwoSrc, MVT::v8i32, 4 }, // 2*vperm2f128 + 2*vshufps
900 { TTI::SK_PermuteTwoSrc, MVT::v16i16, 15 }, // 2*vextractf128 + 8*pshufb
901 // + 4*por + vinsertf128
902 { TTI::SK_PermuteTwoSrc, MVT::v32i8, 15 }, // 2*vextractf128 + 8*pshufb
903 // + 4*por + vinsertf128
895904 };
896905
897906 if (ST->hasAVX())
3232 ; SSE2: cost of 12 {{.*}} %V512 = shufflevector
3333 ; SSSE3: cost of 12 {{.*}} %V512 = shufflevector
3434 ; SSE42: cost of 12 {{.*}} %V512 = shufflevector
35 ; AVX1: cost of 12 {{.*}} %V512 = shufflevector
35 ; AVX1: cost of 8 {{.*}} %V512 = shufflevector
3636 ; AVX2: cost of 6 {{.*}} %V512 = shufflevector
3737 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
3838 %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32>
4040 ; SSE2: cost of 56 {{.*}} %V1024 = shufflevector
4141 ; SSSE3: cost of 56 {{.*}} %V1024 = shufflevector
4242 ; SSE42: cost of 56 {{.*}} %V1024 = shufflevector
43 ; AVX1: cost of 72 {{.*}} %V1024 = shufflevector
43 ; AVX1: cost of 48 {{.*}} %V1024 = shufflevector
4444 ; AVX2: cost of 36 {{.*}} %V1024 = shufflevector
4545 ; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
4646 %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32>
7070 ; SSE2: cost of 12 {{.*}} %V512 = shufflevector
7171 ; SSSE3: cost of 12 {{.*}} %V512 = shufflevector
7272 ; SSE42: cost of 12 {{.*}} %V512 = shufflevector
73 ; AVX1: cost of 16 {{.*}} %V512 = shufflevector
73 ; AVX1: cost of 8 {{.*}} %V512 = shufflevector
7474 ; AVX2: cost of 6 {{.*}} %V512 = shufflevector
7575 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
7676 %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32>
100100 ; SSE2: cost of 24 {{.*}} %V512 = shufflevector
101101 ; SSSE3: cost of 24 {{.*}} %V512 = shufflevector
102102 ; SSE42: cost of 24 {{.*}} %V512 = shufflevector
103 ; AVX1: cost of 28 {{.*}} %V512 = shufflevector
103 ; AVX1: cost of 8 {{.*}} %V512 = shufflevector
104104 ; AVX2: cost of 6 {{.*}} %V512 = shufflevector
105105 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
106106 %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32>
130130 ; SSE2: cost of 24 {{.*}} %V512 = shufflevector
131131 ; SSSE3: cost of 24 {{.*}} %V512 = shufflevector
132132 ; SSE42: cost of 24 {{.*}} %V512 = shufflevector
133 ; AVX1: cost of 32 {{.*}} %V512 = shufflevector
133 ; AVX1: cost of 8 {{.*}} %V512 = shufflevector
134134 ; AVX2: cost of 6 {{.*}} %V512 = shufflevector
135135 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
136136 %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32>
138138 ; SSE2: cost of 112 {{.*}} %V1024 = shufflevector
139139 ; SSSE3: cost of 112 {{.*}} %V1024 = shufflevector
140140 ; SSE42: cost of 112 {{.*}} %V1024 = shufflevector
141 ; AVX1: cost of 192 {{.*}} %V1024 = shufflevector
141 ; AVX1: cost of 48 {{.*}} %V1024 = shufflevector
142142 ; AVX2: cost of 36 {{.*}} %V1024 = shufflevector
143143 ; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
144144 %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32>
171171 ; SSE2: cost of 192 {{.*}} %V512 = shufflevector
172172 ; SSSE3: cost of 36 {{.*}} %V512 = shufflevector
173173 ; SSE42: cost of 36 {{.*}} %V512 = shufflevector
174 ; AVX1: cost of 64 {{.*}} %V512 = shufflevector
174 ; AVX1: cost of 30 {{.*}} %V512 = shufflevector
175175 ; AVX2: cost of 14 {{.*}} %V512 = shufflevector
176176 ; AVX512F: cost of 14 {{.*}} %V512 = shufflevector
177177 ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
181181 ; SSE2: cost of 896 {{.*}} %V1024 = shufflevector
182182 ; SSSE3: cost of 168 {{.*}} %V1024 = shufflevector
183183 ; SSE42: cost of 168 {{.*}} %V1024 = shufflevector
184 ; AVX1: cost of 384 {{.*}} %V1024 = shufflevector
184 ; AVX1: cost of 180 {{.*}} %V1024 = shufflevector
185185 ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
186186 ; AVX512F: cost of 84 {{.*}} %V1024 = shufflevector
187187 ; AVX512BW: cost of 2 {{.*}} %V1024 = shufflevector
213213 ; SSE2: cost of 384 {{.*}} %V512 = shufflevector
214214 ; SSSE3: cost of 36 {{.*}} %V512 = shufflevector
215215 ; SSE42: cost of 36 {{.*}} %V512 = shufflevector
216 ; AVX1: cost of 128 {{.*}} %V512 = shufflevector
216 ; AVX1: cost of 30 {{.*}} %V512 = shufflevector
217217 ; AVX2: cost of 14 {{.*}} %V512 = shufflevector
218218 ; AVX512F: cost of 14 {{.*}} %V512 = shufflevector
219219 ; AVX512BW: cost of 8 {{.*}} %V512 = shufflevector
2424 ; SSE2: cost of 6 {{.*}} %V256 = shufflevector
2525 ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
2626 ; SSE42: cost of 6 {{.*}} %V256 = shufflevector
27 ; AVX1: cost of 6 {{.*}} %V256 = shufflevector
27 ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
2828 ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
2929 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
3030 %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32>
3232 ; SSE2: cost of 28 {{.*}} %V512 = shufflevector
3333 ; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
3434 ; SSE42: cost of 28 {{.*}} %V512 = shufflevector
35 ; AVX1: cost of 12 {{.*}} %V512 = shufflevector
35 ; AVX1: cost of 24 {{.*}} %V512 = shufflevector
3636 ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
3737 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
3838 %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32>
4040 ; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
4141 ; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
4242 ; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
43 ; AVX1: cost of 24 {{.*}} %V1024 = shufflevector
43 ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
4444 ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
4545 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
4646 %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32>
6262 ; SSE2: cost of 6 {{.*}} %V256 = shufflevector
6363 ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
6464 ; SSE42: cost of 6 {{.*}} %V256 = shufflevector
65 ; AVX1: cost of 8 {{.*}} %V256 = shufflevector
65 ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
6666 ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
6767 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
6868 %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32>
7070 ; SSE2: cost of 28 {{.*}} %V512 = shufflevector
7171 ; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
7272 ; SSE42: cost of 28 {{.*}} %V512 = shufflevector
73 ; AVX1: cost of 16 {{.*}} %V512 = shufflevector
73 ; AVX1: cost of 24 {{.*}} %V512 = shufflevector
7474 ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
7575 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
7676 %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32>
7878 ; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
7979 ; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
8080 ; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
81 ; AVX1: cost of 32 {{.*}} %V1024 = shufflevector
81 ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
8282 ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
8383 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
8484 %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32>
100100 ; SSE2: cost of 12 {{.*}} %V256 = shufflevector
101101 ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
102102 ; SSE42: cost of 12 {{.*}} %V256 = shufflevector
103 ; AVX1: cost of 14 {{.*}} %V256 = shufflevector
103 ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
104104 ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
105105 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
106106 %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32>
108108 ; SSE2: cost of 56 {{.*}} %V512 = shufflevector
109109 ; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
110110 ; SSE42: cost of 56 {{.*}} %V512 = shufflevector
111 ; AVX1: cost of 28 {{.*}} %V512 = shufflevector
111 ; AVX1: cost of 24 {{.*}} %V512 = shufflevector
112112 ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
113113 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
114114 %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32>
116116 ; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
117117 ; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
118118 ; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
119 ; AVX1: cost of 56 {{.*}} %V1024 = shufflevector
119 ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
120120 ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
121121 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
122122 %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32>
138138 ; SSE2: cost of 12 {{.*}} %V256 = shufflevector
139139 ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
140140 ; SSE42: cost of 12 {{.*}} %V256 = shufflevector
141 ; AVX1: cost of 16 {{.*}} %V256 = shufflevector
141 ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
142142 ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
143143 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
144144 %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32>
146146 ; SSE2: cost of 56 {{.*}} %V512 = shufflevector
147147 ; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
148148 ; SSE42: cost of 56 {{.*}} %V512 = shufflevector
149 ; AVX1: cost of 32 {{.*}} %V512 = shufflevector
149 ; AVX1: cost of 24 {{.*}} %V512 = shufflevector
150150 ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
151151 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
152152 %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32>
154154 ; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
155155 ; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
156156 ; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
157 ; AVX1: cost of 64 {{.*}} %V1024 = shufflevector
157 ; AVX1: cost of 112 {{.*}} %V1024 = shufflevector
158158 ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
159159 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
160160 %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32>
178178 ; SSE2: cost of 32 {{.*}} %V256 = shufflevector
179179 ; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
180180 ; SSE42: cost of 18 {{.*}} %V256 = shufflevector
181 ; AVX1: cost of 32 {{.*}} %V256 = shufflevector
181 ; AVX1: cost of 15 {{.*}} %V256 = shufflevector
182182 ; AVX2: cost of 7 {{.*}} %V256 = shufflevector
183183 ; AVX512F: cost of 7 {{.*}} %V256 = shufflevector
184184 ; AVX512BW: cost of 1 {{.*}} %V256 = shufflevector
188188 ; SSE2: cost of 64 {{.*}} %V512 = shufflevector
189189 ; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
190190 ; SSE42: cost of 84 {{.*}} %V512 = shufflevector
191 ; AVX1: cost of 64 {{.*}} %V512 = shufflevector
191 ; AVX1: cost of 90 {{.*}} %V512 = shufflevector
192192 ; AVX2: cost of 42 {{.*}} %V512 = shufflevector
193193 ; AVX512F: cost of 42 {{.*}} %V512 = shufflevector
194194 ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
198198 ; SSE2: cost of 128 {{.*}} %V1024 = shufflevector
199199 ; SSSE3: cost of 360 {{.*}} %V1024 = shufflevector
200200 ; SSE42: cost of 360 {{.*}} %V1024 = shufflevector
201 ; AVX1: cost of 128 {{.*}} %V1024 = shufflevector
201 ; AVX1: cost of 420 {{.*}} %V1024 = shufflevector
202202 ; AVX2: cost of 196 {{.*}} %V1024 = shufflevector
203203 ; AVX512F: cost of 196 {{.*}} %V1024 = shufflevector
204204 ; AVX512BW: cost of 6 {{.*}} %V1024 = shufflevector
224224 ; SSE2: cost of 64 {{.*}} %V256 = shufflevector
225225 ; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
226226 ; SSE42: cost of 18 {{.*}} %V256 = shufflevector
227 ; AVX1: cost of 64 {{.*}} %V256 = shufflevector
227 ; AVX1: cost of 15 {{.*}} %V256 = shufflevector
228228 ; AVX2: cost of 7 {{.*}} %V256 = shufflevector
229229 ; AVX512F: cost of 7 {{.*}} %V256 = shufflevector
230230 ; AVX512BW: cost of 3 {{.*}} %V256 = shufflevector
234234 ; SSE2: cost of 128 {{.*}} %V512 = shufflevector
235235 ; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
236236 ; SSE42: cost of 84 {{.*}} %V512 = shufflevector
237 ; AVX1: cost of 128 {{.*}} %V512 = shufflevector
237 ; AVX1: cost of 90 {{.*}} %V512 = shufflevector
238238 ; AVX2: cost of 42 {{.*}} %V512 = shufflevector
239239 ; AVX512F: cost of 42 {{.*}} %V512 = shufflevector
240240 ; AVX512BW: cost of 19 {{.*}} %V512 = shufflevector