llvm.org GIT mirror llvm / 53af872
[X86][AVX512] Autoupgrade the VPERMILPD/VPERMILPS intrinsics git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274498 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
5 changed file(s) with 139 addition(s) and 137 deletion(s). Raw diff Collapse all Expand all
224224 Name.startswith("x86.avx512.mask.pshuf.d.") ||
225225 Name.startswith("x86.avx512.mask.pshufl.w.") ||
226226 Name.startswith("x86.avx512.mask.pshufh.w.") ||
227 Name.startswith("x86.avx512.mask.vpermil.p") ||
227228 Name.startswith("x86.avx512.mask.punpckl") ||
228229 Name.startswith("x86.avx512.mask.punpckh") ||
229230 Name.startswith("x86.avx512.mask.unpckl.") ||
10061007 Rep = nullptr;
10071008 } else if (Name.startswith("llvm.x86.avx.vpermil.") ||
10081009 Name == "llvm.x86.sse2.pshuf.d" ||
1010 Name.startswith("llvm.x86.avx512.mask.vpermil.p") ||
10091011 Name.startswith("llvm.x86.avx512.mask.pshuf.d.")) {
10101012 Value *Op0 = CI->getArgOperand(0);
10111013 unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue();
10121014 VectorType *VecTy = cast(CI->getType());
10131015 unsigned NumElts = VecTy->getNumElements();
1014 // Calcuate the size of each index in the immediate.
1016 // Calculate the size of each index in the immediate.
10151017 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
10161018 unsigned IdxMask = ((1 << IdxSize) - 1);
10171019
316316 ret <8 x i64> %res4
317317 }
318318
319 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
320
321 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
322 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
323 ; CHECK: ## BB#0:
324 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = zmm0[0,1,3,2,5,4,6,6]
325 ; CHECK-NEXT: kmovw %edi, %k1
326 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
327 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
328 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
329 ; CHECK-NEXT: vaddpd %zmm2, %zmm0, %zmm0
330 ; CHECK-NEXT: retq
331 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
332 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
333 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
334 %res3 = fadd <8 x double> %res, %res1
335 %res4 = fadd <8 x double> %res3, %res2
336 ret <8 x double> %res4
337 }
338
339 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
340
341 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
342 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
343 ; CHECK: ## BB#0:
344 ; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
345 ; CHECK-NEXT: kmovw %edi, %k1
346 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
347 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
348 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
349 ; CHECK-NEXT: vaddps %zmm2, %zmm0, %zmm0
350 ; CHECK-NEXT: retq
351 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
352 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
353 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
354 %res3 = fadd <16 x float> %res, %res1
355 %res4 = fadd <16 x float> %res3, %res2
356 ret <16 x float> %res4
357 }
358
319359 declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i32, <16 x i32>, i16)
320360
321361 define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
54375437 ret <16 x float> %res2
54385438 }
54395439
5440 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
5441
5442 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
5443 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
5444 ; CHECK: ## BB#0:
5445 ; CHECK-NEXT: kmovw %edi, %k1
5446 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
5447 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
5448 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6]
5449 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
5450 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5451 ; CHECK-NEXT: retq
5452 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
5453 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
5454 %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
5455 %res3 = fadd <8 x double> %res, %res1
5456 %res4 = fadd <8 x double> %res3, %res2
5457 ret <8 x double> %res4
5458 }
5459
5460 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
5461
5462 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
5463 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
5464 ; CHECK: ## BB#0:
5465 ; CHECK-NEXT: kmovw %edi, %k1
5466 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5467 ; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5468 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
5469 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
5470 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5471 ; CHECK-NEXT: retq
5472 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
5473 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
5474 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
5475 %res3 = fadd <16 x float> %res, %res1
5476 %res4 = fadd <16 x float> %res3, %res2
5477 ret <16 x float> %res4
5478 }
5479
54805440 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
54815441
54825442 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
137137 ret <4 x double> %res4
138138 }
139139
140 declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
141
142 define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
143 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
144 ; CHECK: ## BB#0:
145 ; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xd0,0x06]
146 ; CHECK-NEXT: ## ymm2 = ymm0[0,1,3,2]
147 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
148 ; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06]
149 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,3,2]
150 ; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06]
151 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,1,3,2]
152 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
153 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
154 ; CHECK-NEXT: retq ## encoding: [0xc3]
155 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
156 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
157 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
158 %res3 = fadd <4 x double> %res, %res1
159 %res4 = fadd <4 x double> %res2, %res3
160 ret <4 x double> %res4
161 }
162
163 declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
164
165 define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
166 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
167 ; CHECK: ## BB#0:
168 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xd0,0x01]
169 ; CHECK-NEXT: ## xmm2 = xmm0[1,0]
170 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
171 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
172 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,0]
173 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01]
174 ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[1,0]
175 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
176 ; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc2]
177 ; CHECK-NEXT: retq ## encoding: [0xc3]
178 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
179 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
180 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
181 %res3 = fadd <2 x double> %res, %res1
182 %res4 = fadd <2 x double> %res3, %res2
183 ret <2 x double> %res4
184 }
185
186 declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
187
188 define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
189 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
190 ; CHECK: ## BB#0:
191 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xd0,0x16]
192 ; CHECK-NEXT: ## ymm2 = ymm0[2,1,1,0,6,5,5,4]
193 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
194 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
195 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
196 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16]
197 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
198 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
199 ; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc2]
200 ; CHECK-NEXT: retq ## encoding: [0xc3]
201 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
202 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
203 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
204 %res3 = fadd <8 x float> %res, %res1
205 %res4 = fadd <8 x float> %res3, %res2
206 ret <8 x float> %res4
207 }
208
209 declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
210
211 define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
212 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
213 ; CHECK: ## BB#0:
214 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xd0,0x16]
215 ; CHECK-NEXT: ## xmm2 = xmm0[2,1,1,0]
216 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
217 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
218 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[2,1,1,0]
219 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16]
220 ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[2,1,1,0]
221 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
222 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
223 ; CHECK-NEXT: retq ## encoding: [0xc3]
224 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
225 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
226 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
227 %res3 = fadd <4 x float> %res, %res1
228 %res4 = fadd <4 x float> %res2, %res3
229 ret <4 x float> %res4
230 }
231
140232 declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8)
141233
142234 define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
57215721 ret <4 x i64> %res2
57225722 }
57235723
5724 declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
5725
5726 define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
5727 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
5728 ; CHECK: ## BB#0:
5729 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5730 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x16]
5731 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,3,2]
5732 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xd0,0x16]
5733 ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,3,2]
5734 ; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xc0,0x16]
5735 ; CHECK-NEXT: ## ymm0 = ymm0[0,1,3,2]
5736 ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca]
5737 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1]
5738 ; CHECK-NEXT: retq ## encoding: [0xc3]
5739 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
5740 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
5741 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
5742 %res3 = fadd <4 x double> %res, %res1
5743 %res4 = fadd <4 x double> %res2, %res3
5744 ret <4 x double> %res4
5745 }
5746
5747 declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
5748
5749 define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
5750 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
5751 ; CHECK: ## BB#0:
5752 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5753 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
5754 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,0]
5755 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xd0,0x01]
5756 ; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[1,0]
5757 ; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xc0,0x01]
5758 ; CHECK-NEXT: ## xmm0 = xmm0[1,0]
5759 ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca]
5760 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
5761 ; CHECK-NEXT: retq ## encoding: [0xc3]
5762 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
5763 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
5764 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
5765 %res3 = fadd <2 x double> %res, %res1
5766 %res4 = fadd <2 x double> %res3, %res2
5767 ret <2 x double> %res4
5768 }
5769
5770 declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
5771
5772 define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
5773 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
5774 ; CHECK: ## BB#0:
5775 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5776 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
5777 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
5778 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xd0,0x16]
5779 ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
5780 ; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xc0,0x16]
5781 ; CHECK-NEXT: ## ymm0 = ymm0[2,1,1,0,6,5,5,4]
5782 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
5783 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
5784 ; CHECK-NEXT: retq ## encoding: [0xc3]
5785 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
5786 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
5787 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
5788 %res3 = fadd <8 x float> %res, %res1
5789 %res4 = fadd <8 x float> %res3, %res2
5790 ret <8 x float> %res4
5791 }
5792
5793 declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
5794
5795 define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
5796 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
5797 ; CHECK: ## BB#0:
5798 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5799 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
5800 ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[2,1,1,0]
5801 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xd0,0x16]
5802 ; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[2,1,1,0]
5803 ; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xc0,0x16]
5804 ; CHECK-NEXT: ## xmm0 = xmm0[2,1,1,0]
5805 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca]
5806 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
5807 ; CHECK-NEXT: retq ## encoding: [0xc3]
5808 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
5809 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
5810 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
5811 %res3 = fadd <4 x float> %res, %res1
5812 %res4 = fadd <4 x float> %res2, %res3
5813 ret <4 x float> %res4
5814 }
5815
58165724 declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
58175725
58185726 define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
72177125 ; CHECK: ## BB#0:
72187126 ; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
72197127 ; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A]
7220 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI472_0-4, kind: reloc_riprel_4byte
7128 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI468_0-4, kind: reloc_riprel_4byte
72217129 ; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A]
7222 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI472_1-4, kind: reloc_riprel_4byte
7130 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI468_1-4, kind: reloc_riprel_4byte
72237131 ; CHECK-NEXT: retq ## encoding: [0xc3]
72247132 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> , <8 x i32> , <8 x i32> zeroinitializer, i8 -1)
72257133 ret <8 x i32> %res
72507158 ; CHECK: ## BB#0:
72517159 ; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607]
72527160 ; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A]
7253 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI474_0-4, kind: reloc_riprel_4byte
7161 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI470_0-4, kind: reloc_riprel_4byte
72547162 ; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
7255 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI474_1-4, kind: reloc_riprel_4byte
7163 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI470_1-4, kind: reloc_riprel_4byte
72567164 ; CHECK-NEXT: retq ## encoding: [0xc3]
72577165 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> , <2 x i64> , <2 x i64> zeroinitializer, i8 -1)
72587166 ret <2 x i64> %res