llvm.org GIT mirror llvm / 1706e6e
[X86][AVX512] Add constant splat vector rotate tests for D35463 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308169 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
1 changed file(s) with 252 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
3
4 declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
5 declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
6 declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
7 declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
8
9 ; Tests showing failure to replace variable rotates with immediate splat versions.
10
11 define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
12 ; KNL-LABEL: test_splat_rol_v16i32:
13 ; KNL: # BB#0:
14 ; KNL-NEXT: kmovw %edi, %k1
15 ; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
16 ; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm3
17 ; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm1 {%k1}
18 ; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm0 {%k1} {z}
19 ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
20 ; KNL-NEXT: vpaddd %zmm3, %zmm0, %zmm0
21 ; KNL-NEXT: retq
22 ;
23 ; SKX-LABEL: test_splat_rol_v16i32:
24 ; SKX: # BB#0:
25 ; SKX-NEXT: kmovd %edi, %k1
26 ; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
27 ; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm3
28 ; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm1 {%k1}
29 ; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm0 {%k1} {z}
30 ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
31 ; SKX-NEXT: vpaddd %zmm3, %zmm0, %zmm0
32 ; SKX-NEXT: retq
33 %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2)
34 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> zeroinitializer, i16 %x2)
35 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 -1)
36 %res3 = add <16 x i32> %res, %res1
37 %res4 = add <16 x i32> %res3, %res2
38 ret <16 x i32> %res4
39 }
40
41 define <8 x i64>@test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
42 ; KNL-LABEL: test_splat_rol_v8i64:
43 ; KNL: # BB#0:
44 ; KNL-NEXT: kmovw %edi, %k1
45 ; KNL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
46 ; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm3
47 ; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm1 {%k1}
48 ; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm0 {%k1} {z}
49 ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
50 ; KNL-NEXT: vpaddq %zmm3, %zmm0, %zmm0
51 ; KNL-NEXT: retq
52 ;
53 ; SKX-LABEL: test_splat_rol_v8i64:
54 ; SKX: # BB#0:
55 ; SKX-NEXT: kmovd %edi, %k1
56 ; SKX-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
57 ; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm3
58 ; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm1 {%k1}
59 ; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm0 {%k1} {z}
60 ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
61 ; SKX-NEXT: vpaddq %zmm3, %zmm0, %zmm0
62 ; SKX-NEXT: retq
63 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2)
64 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> zeroinitializer, i8 %x2)
65 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 -1)
66 %res3 = add <8 x i64> %res, %res1
67 %res4 = add <8 x i64> %res3, %res2
68 ret <8 x i64> %res4
69 }
70
71 define <16 x i32> @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
72 ; KNL-LABEL: test_splat_ror_v16i32:
73 ; KNL: # BB#0:
74 ; KNL-NEXT: kmovw %edi, %k1
75 ; KNL-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
76 ; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm3
77 ; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm1 {%k1}
78 ; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm0 {%k1} {z}
79 ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
80 ; KNL-NEXT: vpaddd %zmm3, %zmm0, %zmm0
81 ; KNL-NEXT: retq
82 ;
83 ; SKX-LABEL: test_splat_ror_v16i32:
84 ; SKX: # BB#0:
85 ; SKX-NEXT: kmovd %edi, %k1
86 ; SKX-NEXT: vpbroadcastd {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
87 ; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm3
88 ; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm1 {%k1}
89 ; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm0 {%k1} {z}
90 ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
91 ; SKX-NEXT: vpaddd %zmm3, %zmm0, %zmm0
92 ; SKX-NEXT: retq
93 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2)
94 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> zeroinitializer, i16 %x2)
95 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 -1)
96 %res3 = add <16 x i32> %res, %res1
97 %res4 = add <16 x i32> %res3, %res2
98 ret <16 x i32> %res4
99 }
100
101 define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
102 ; KNL-LABEL: test_splat_ror_v8i64:
103 ; KNL: # BB#0:
104 ; KNL-NEXT: kmovw %edi, %k1
105 ; KNL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
106 ; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm3
107 ; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm1 {%k1}
108 ; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm0 {%k1} {z}
109 ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
110 ; KNL-NEXT: vpaddq %zmm3, %zmm0, %zmm0
111 ; KNL-NEXT: retq
112 ;
113 ; SKX-LABEL: test_splat_ror_v8i64:
114 ; SKX: # BB#0:
115 ; SKX-NEXT: kmovd %edi, %k1
116 ; SKX-NEXT: vpbroadcastq {{.*#+}} zmm2 = [5,5,5,5,5,5,5,5]
117 ; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm3
118 ; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm1 {%k1}
119 ; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm0 {%k1} {z}
120 ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
121 ; SKX-NEXT: vpaddq %zmm3, %zmm0, %zmm0
122 ; SKX-NEXT: retq
123 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2)
124 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> zeroinitializer, i8 %x2)
125 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 -1)
126 %res3 = add <8 x i64> %res, %res1
127 %res4 = add <8 x i64> %res3, %res2
128 ret <8 x i64> %res4
129 }
130
131 ; Tests showing failure to replace out-of-bounds variable rotates with in-bounds immediate splat versions.
132
133 define <16 x i32> @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
134 ; KNL-LABEL: test_splat_bounds_rol_v16i32:
135 ; KNL: # BB#0:
136 ; KNL-NEXT: kmovw %edi, %k1
137 ; KNL-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
138 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
139 ; KNL-NEXT: vprolvd %zmm2, %zmm0, %zmm2 {%k1} {z}
140 ; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
141 ; KNL-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
142 ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
143 ; KNL-NEXT: retq
144 ;
145 ; SKX-LABEL: test_splat_bounds_rol_v16i32:
146 ; SKX: # BB#0:
147 ; SKX-NEXT: kmovd %edi, %k1
148 ; SKX-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
149 ; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
150 ; SKX-NEXT: vprolvd %zmm2, %zmm0, %zmm2 {%k1} {z}
151 ; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
152 ; SKX-NEXT: vprolvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
153 ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
154 ; SKX-NEXT: retq
155 %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2)
156 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> zeroinitializer, i16 %x2)
157 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 -1)
158 %res3 = add <16 x i32> %res, %res1
159 %res4 = add <16 x i32> %res3, %res2
160 ret <16 x i32> %res4
161 }
162
163 define <8 x i64>@test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
164 ; KNL-LABEL: test_splat_bounds_rol_v8i64:
165 ; KNL: # BB#0:
166 ; KNL-NEXT: kmovw %edi, %k1
167 ; KNL-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
168 ; KNL-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
169 ; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
170 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
171 ; KNL-NEXT: vprolvq %zmm2, %zmm0, %zmm0
172 ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
173 ; KNL-NEXT: retq
174 ;
175 ; SKX-LABEL: test_splat_bounds_rol_v8i64:
176 ; SKX: # BB#0:
177 ; SKX-NEXT: kmovd %edi, %k1
178 ; SKX-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
179 ; SKX-NEXT: vprolvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
180 ; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
181 ; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
182 ; SKX-NEXT: vprolvq %zmm2, %zmm0, %zmm0
183 ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
184 ; SKX-NEXT: retq
185 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2)
186 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> zeroinitializer, i8 %x2)
187 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 -1)
188 %res3 = add <8 x i64> %res, %res1
189 %res4 = add <8 x i64> %res3, %res2
190 ret <8 x i64> %res4
191 }
192
193 define <16 x i32> @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
194 ; KNL-LABEL: test_splat_bounds_ror_v16i32:
195 ; KNL: # BB#0:
196 ; KNL-NEXT: kmovw %edi, %k1
197 ; KNL-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
198 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
199 ; KNL-NEXT: vprorvd %zmm2, %zmm0, %zmm2 {%k1} {z}
200 ; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1
201 ; KNL-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
202 ; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
203 ; KNL-NEXT: retq
204 ;
205 ; SKX-LABEL: test_splat_bounds_ror_v16i32:
206 ; SKX: # BB#0:
207 ; SKX-NEXT: kmovd %edi, %k1
208 ; SKX-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm1 {%k1}
209 ; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
210 ; SKX-NEXT: vprorvd %zmm2, %zmm0, %zmm2 {%k1} {z}
211 ; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
212 ; SKX-NEXT: vprorvd {{.*}}(%rip){1to16}, %zmm0, %zmm0
213 ; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
214 ; SKX-NEXT: retq
215 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2)
216 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> zeroinitializer, i16 %x2)
217 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 -1)
218 %res3 = add <16 x i32> %res, %res1
219 %res4 = add <16 x i32> %res3, %res2
220 ret <16 x i32> %res4
221 }
222
223 define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
224 ; KNL-LABEL: test_splat_bounds_ror_v8i64:
225 ; KNL: # BB#0:
226 ; KNL-NEXT: kmovw %edi, %k1
227 ; KNL-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
228 ; KNL-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
229 ; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1
230 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
231 ; KNL-NEXT: vprorvq %zmm2, %zmm0, %zmm0
232 ; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0
233 ; KNL-NEXT: retq
234 ;
235 ; SKX-LABEL: test_splat_bounds_ror_v8i64:
236 ; SKX: # BB#0:
237 ; SKX-NEXT: kmovd %edi, %k1
238 ; SKX-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm1 {%k1}
239 ; SKX-NEXT: vprorvq {{.*}}(%rip){1to8}, %zmm0, %zmm2 {%k1} {z}
240 ; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1
241 ; SKX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
242 ; SKX-NEXT: vprorvq %zmm2, %zmm0, %zmm0
243 ; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0
244 ; SKX-NEXT: retq
245 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2)
246 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> zeroinitializer, i8 %x2)
247 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 -1)
248 %res3 = add <8 x i64> %res, %res1
249 %res4 = add <8 x i64> %res3, %res2
250 ret <8 x i64> %res4
251 }