llvm.org GIT mirror llvm / b7dea04
[X86][AVX512] Add support for UNPCK masked shuffle comments git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274464 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
7 changed file(s) with 126 addition(s) and 76 deletion(s). Raw diff Collapse all Expand all
8181 CASE_AVX_INS_COMMON(Inst, Y, r##src) \
8282 CASE_SSE_INS_COMMON(Inst, r##src)
8383
84 #define CASE_SHUF(Inst, suf) \
84 #define CASE_MASK_UNPCK(Inst, src) \
85 CASE_MASK_INS_COMMON(Inst, Z, r##src) \
86 CASE_MASK_INS_COMMON(Inst, Z256, r##src) \
87 CASE_MASK_INS_COMMON(Inst, Z128, r##src)
88
89 #define CASE_MASKZ_UNPCK(Inst, src) \
90 CASE_MASKZ_INS_COMMON(Inst, Z, r##src) \
91 CASE_MASKZ_INS_COMMON(Inst, Z256, r##src) \
92 CASE_MASKZ_INS_COMMON(Inst, Z128, r##src)
93
94 #define CASE_SHUF(Inst, suf) \
8595 CASE_AVX512_INS_COMMON(Inst, Z, suf) \
8696 CASE_AVX512_INS_COMMON(Inst, Z256, suf) \
8797 CASE_AVX512_INS_COMMON(Inst, Z128, suf) \
209219 CASE_MASKZ_PMOVZX(PMOVZXWD, r)
210220 CASE_MASKZ_PMOVZX(PMOVZXWQ, m)
211221 CASE_MASKZ_PMOVZX(PMOVZXWQ, r)
222 CASE_MASKZ_UNPCK(PUNPCKHBW, m)
223 CASE_MASKZ_UNPCK(PUNPCKHBW, r)
224 CASE_MASKZ_UNPCK(PUNPCKHWD, m)
225 CASE_MASKZ_UNPCK(PUNPCKHWD, r)
226 CASE_MASKZ_UNPCK(PUNPCKHDQ, m)
227 CASE_MASKZ_UNPCK(PUNPCKHDQ, r)
228 CASE_MASKZ_UNPCK(PUNPCKLBW, m)
229 CASE_MASKZ_UNPCK(PUNPCKLBW, r)
230 CASE_MASKZ_UNPCK(PUNPCKLWD, m)
231 CASE_MASKZ_UNPCK(PUNPCKLWD, r)
232 CASE_MASKZ_UNPCK(PUNPCKLDQ, m)
233 CASE_MASKZ_UNPCK(PUNPCKLDQ, r)
234 CASE_MASKZ_UNPCK(UNPCKHPD, m)
235 CASE_MASKZ_UNPCK(UNPCKHPD, r)
236 CASE_MASKZ_UNPCK(UNPCKHPS, m)
237 CASE_MASKZ_UNPCK(UNPCKHPS, r)
238 CASE_MASKZ_UNPCK(UNPCKLPD, m)
239 CASE_MASKZ_UNPCK(UNPCKLPD, r)
240 CASE_MASKZ_UNPCK(UNPCKLPS, m)
241 CASE_MASKZ_UNPCK(UNPCKLPS, r)
212242 CASE_MASKZ_SHUF(SHUFPD, m)
213243 CASE_MASKZ_SHUF(SHUFPD, r)
214244 CASE_MASKZ_SHUF(SHUFPS, m)
242272 CASE_MASK_PMOVZX(PMOVZXWD, r)
243273 CASE_MASK_PMOVZX(PMOVZXWQ, m)
244274 CASE_MASK_PMOVZX(PMOVZXWQ, r)
275 CASE_MASK_UNPCK(PUNPCKHBW, m)
276 CASE_MASK_UNPCK(PUNPCKHBW, r)
277 CASE_MASK_UNPCK(PUNPCKHWD, m)
278 CASE_MASK_UNPCK(PUNPCKHWD, r)
279 CASE_MASK_UNPCK(PUNPCKHDQ, m)
280 CASE_MASK_UNPCK(PUNPCKHDQ, r)
281 CASE_MASK_UNPCK(PUNPCKLBW, m)
282 CASE_MASK_UNPCK(PUNPCKLBW, r)
283 CASE_MASK_UNPCK(PUNPCKLWD, m)
284 CASE_MASK_UNPCK(PUNPCKLWD, r)
285 CASE_MASK_UNPCK(PUNPCKLDQ, m)
286 CASE_MASK_UNPCK(PUNPCKLDQ, r)
287 CASE_MASK_UNPCK(UNPCKHPD, m)
288 CASE_MASK_UNPCK(UNPCKHPD, r)
289 CASE_MASK_UNPCK(UNPCKHPS, m)
290 CASE_MASK_UNPCK(UNPCKHPS, r)
291 CASE_MASK_UNPCK(UNPCKLPD, m)
292 CASE_MASK_UNPCK(UNPCKLPD, r)
293 CASE_MASK_UNPCK(UNPCKLPS, m)
294 CASE_MASK_UNPCK(UNPCKLPS, r)
245295 CASE_MASK_SHUF(SHUFPD, m)
246296 CASE_MASK_SHUF(SHUFPD, r)
247297 CASE_MASK_SHUF(SHUFPS, m)
181181 ; X32: # BB#0:
182182 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
183183 ; X32-NEXT: kmovw %eax, %k1
184 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
184 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
185185 ; X32-NEXT: retl
186186 ;
187187 ; X64-LABEL: test_mm512_mask_unpackhi_epi32:
188188 ; X64: # BB#0:
189189 ; X64-NEXT: kmovw %edi, %k1
190 ; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
190 ; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
191191 ; X64-NEXT: retq
192192 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
193193 %arg1 = bitcast i16 %a1 to <16 x i1>
204204 ; X32: # BB#0:
205205 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
206206 ; X32-NEXT: kmovw %eax, %k1
207 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
207 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
208208 ; X32-NEXT: retl
209209 ;
210210 ; X64-LABEL: test_mm512_maskz_unpackhi_epi32:
211211 ; X64: # BB#0:
212212 ; X64-NEXT: kmovw %edi, %k1
213 ; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
213 ; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
214214 ; X64-NEXT: retq
215215 %arg0 = bitcast i16 %a0 to <16 x i1>
216216 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
292292 ; X32: # BB#0:
293293 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
294294 ; X32-NEXT: kmovw %eax, %k1
295 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
295 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
296296 ; X32-NEXT: retl
297297 ;
298298 ; X64-LABEL: test_mm512_mask_unpackhi_pd:
299299 ; X64: # BB#0:
300300 ; X64-NEXT: kmovw %edi, %k1
301 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
301 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
302302 ; X64-NEXT: retq
303303 %arg1 = bitcast i8 %a1 to <8 x i1>
304304 %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32>
311311 ; X32: # BB#0:
312312 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
313313 ; X32-NEXT: kmovw %eax, %k1
314 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
314 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
315315 ; X32-NEXT: retl
316316 ;
317317 ; X64-LABEL: test_mm512_maskz_unpackhi_pd:
318318 ; X64: # BB#0:
319319 ; X64-NEXT: kmovw %edi, %k1
320 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
320 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
321321 ; X64-NEXT: retq
322322 %arg0 = bitcast i8 %a0 to <8 x i1>
323323 %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32>
344344 ; X32: # BB#0:
345345 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
346346 ; X32-NEXT: kmovw %eax, %k1
347 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
347 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
348348 ; X32-NEXT: retl
349349 ;
350350 ; X64-LABEL: test_mm512_mask_unpackhi_ps:
351351 ; X64: # BB#0:
352352 ; X64-NEXT: kmovw %edi, %k1
353 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
353 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
354354 ; X64-NEXT: retq
355355 %arg1 = bitcast i16 %a1 to <16 x i1>
356356 %res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32>
363363 ; X32: # BB#0:
364364 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
365365 ; X32-NEXT: kmovw %eax, %k1
366 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
366 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
367367 ; X32-NEXT: retl
368368 ;
369369 ; X64-LABEL: test_mm512_maskz_unpackhi_ps:
370370 ; X64: # BB#0:
371371 ; X64-NEXT: kmovw %edi, %k1
372 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
372 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
373373 ; X64-NEXT: retq
374374 %arg0 = bitcast i16 %a0 to <16 x i1>
375375 %res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32>
399399 ; X32: # BB#0:
400400 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
401401 ; X32-NEXT: kmovw %eax, %k1
402 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
402 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
403403 ; X32-NEXT: retl
404404 ;
405405 ; X64-LABEL: test_mm512_mask_unpacklo_epi32:
406406 ; X64: # BB#0:
407407 ; X64-NEXT: kmovw %edi, %k1
408 ; X64-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
408 ; X64-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
409409 ; X64-NEXT: retq
410410 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
411411 %arg1 = bitcast i16 %a1 to <16 x i1>
422422 ; X32: # BB#0:
423423 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
424424 ; X32-NEXT: kmovw %eax, %k1
425 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
425 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
426426 ; X32-NEXT: retl
427427 ;
428428 ; X64-LABEL: test_mm512_maskz_unpacklo_epi32:
429429 ; X64: # BB#0:
430430 ; X64-NEXT: kmovw %edi, %k1
431 ; X64-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
431 ; X64-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
432432 ; X64-NEXT: retq
433433 %arg0 = bitcast i16 %a0 to <16 x i1>
434434 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
510510 ; X32: # BB#0:
511511 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
512512 ; X32-NEXT: kmovw %eax, %k1
513 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
513 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
514514 ; X32-NEXT: retl
515515 ;
516516 ; X64-LABEL: test_mm512_mask_unpacklo_pd:
517517 ; X64: # BB#0:
518518 ; X64-NEXT: kmovw %edi, %k1
519 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
519 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
520520 ; X64-NEXT: retq
521521 %arg1 = bitcast i8 %a1 to <8 x i1>
522522 %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32>
529529 ; X32: # BB#0:
530530 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
531531 ; X32-NEXT: kmovw %eax, %k1
532 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
532 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
533533 ; X32-NEXT: retl
534534 ;
535535 ; X64-LABEL: test_mm512_maskz_unpacklo_pd:
536536 ; X64: # BB#0:
537537 ; X64-NEXT: kmovw %edi, %k1
538 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
538 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
539539 ; X64-NEXT: retq
540540 %arg0 = bitcast i8 %a0 to <8 x i1>
541541 %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32>
562562 ; X32: # BB#0:
563563 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
564564 ; X32-NEXT: kmovw %eax, %k1
565 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
565 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
566566 ; X32-NEXT: retl
567567 ;
568568 ; X64-LABEL: test_mm512_mask_unpacklo_ps:
569569 ; X64: # BB#0:
570570 ; X64-NEXT: kmovw %edi, %k1
571 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
571 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
572572 ; X64-NEXT: retq
573573 %arg1 = bitcast i16 %a1 to <16 x i1>
574574 %res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32>
581581 ; X32: # BB#0:
582582 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
583583 ; X32-NEXT: kmovw %eax, %k1
584 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
584 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
585585 ; X32-NEXT: retl
586586 ;
587587 ; X64-LABEL: test_mm512_maskz_unpacklo_ps:
588588 ; X64: # BB#0:
589589 ; X64-NEXT: kmovw %edi, %k1
590 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
590 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
591591 ; X64-NEXT: retq
592592 %arg0 = bitcast i16 %a0 to <16 x i1>
593593 %res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32>
435435 ; CHECK: ## BB#0:
436436 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
437437 ; CHECK-NEXT: kmovw %edi, %k1
438 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
438 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
439439 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm0
440440 ; CHECK-NEXT: retq
441441 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
451451 ; CHECK: ## BB#0:
452452 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
453453 ; CHECK-NEXT: kmovw %edi, %k1
454 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
454 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
455455 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm0
456456 ; CHECK-NEXT: retq
457457 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
467467 ; CHECK: ## BB#0:
468468 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
469469 ; CHECK-NEXT: kmovw %edi, %k1
470 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
470 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
471471 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm0
472472 ; CHECK-NEXT: retq
473473 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
483483 ; CHECK: ## BB#0:
484484 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
485485 ; CHECK-NEXT: kmovw %edi, %k1
486 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
486 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
487487 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm0
488488 ; CHECK-NEXT: retq
489489 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
535535 ; CHECK: ## BB#0:
536536 ; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
537537 ; CHECK-NEXT: kmovw %edi, %k1
538 ; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
538 ; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
539539 ; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm0
540540 ; CHECK-NEXT: retq
541541 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
551551 ; CHECK: ## BB#0:
552552 ; CHECK-NEXT: vpunpckldq {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
553553 ; CHECK-NEXT: kmovw %edi, %k1
554 ; CHECK-NEXT: vpunpckldq {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
554 ; CHECK-NEXT: vpunpckldq {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
555555 ; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm0
556556 ; CHECK-NEXT: retq
557557 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
5858 ; X32: # BB#0:
5959 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
6060 ; X32-NEXT: kmovq (%eax), %k1
61 ; X32-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
61 ; X32-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
6262 ; X32-NEXT: retl
6363 ;
6464 ; X64-LABEL: test_mm512_mask_unpackhi_epi8:
6565 ; X64: # BB#0:
6666 ; X64-NEXT: kmovq (%rdi), %k1
67 ; X64-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
67 ; X64-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63]
6868 ; X64-NEXT: retq
6969 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
7070 %arg1 = bitcast i64* %a1 to <64 x i1>*
8282 ; X32: # BB#0:
8383 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
8484 ; X32-NEXT: kmovq (%eax), %k1
85 ; X32-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
85 ; X32-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} {z} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
8686 ; X32-NEXT: retl
8787 ;
8888 ; X64-LABEL: test_mm512_maskz_unpackhi_epi8:
8989 ; X64: # BB#0:
9090 ; X64-NEXT: kmovq (%rdi), %k1
91 ; X64-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
91 ; X64-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} {z} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
9292 ; X64-NEXT: retq
9393 %arg0 = bitcast i64* %a0 to <64 x i1>*
9494 %sel0 = load <64 x i1>, <64 x i1>* %arg0
122122 ; X32: # BB#0:
123123 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
124124 ; X32-NEXT: kmovd %eax, %k1
125 ; X32-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31]
125 ; X32-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31]
126126 ; X32-NEXT: retl
127127 ;
128128 ; X64-LABEL: test_mm512_mask_unpackhi_epi16:
129129 ; X64: # BB#0:
130130 ; X64-NEXT: kmovd %edi, %k1
131 ; X64-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31]
131 ; X64-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31]
132132 ; X64-NEXT: retq
133133 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
134134 %arg1 = bitcast i32 %a1 to <32 x i1>
145145 ; X32: # BB#0:
146146 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
147147 ; X32-NEXT: kmovd %eax, %k1
148 ; X32-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
148 ; X32-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} {z} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
149149 ; X32-NEXT: retl
150150 ;
151151 ; X64-LABEL: test_mm512_maskz_unpackhi_epi16:
152152 ; X64: # BB#0:
153153 ; X64-NEXT: kmovd %edi, %k1
154 ; X64-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
154 ; X64-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} {z} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
155155 ; X64-NEXT: retq
156156 %arg0 = bitcast i32 %a0 to <32 x i1>
157157 %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
184184 ; X32: # BB#0:
185185 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
186186 ; X32-NEXT: kmovq (%eax), %k1
187 ; X32-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
187 ; X32-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
188188 ; X32-NEXT: retl
189189 ;
190190 ; X64-LABEL: test_mm512_mask_unpacklo_epi8:
191191 ; X64: # BB#0:
192192 ; X64-NEXT: kmovq (%rdi), %k1
193 ; X64-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
193 ; X64-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55]
194194 ; X64-NEXT: retq
195195 %arg0 = bitcast <8 x i64> %a0 to <64 x i8>
196196 %arg1 = bitcast i64* %a1 to <64 x i1>*
208208 ; X32: # BB#0:
209209 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
210210 ; X32-NEXT: kmovq (%eax), %k1
211 ; X32-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
211 ; X32-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
212212 ; X32-NEXT: retl
213213 ;
214214 ; X64-LABEL: test_mm512_maskz_unpacklo_epi8:
215215 ; X64: # BB#0:
216216 ; X64-NEXT: kmovq (%rdi), %k1
217 ; X64-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
217 ; X64-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
218218 ; X64-NEXT: retq
219219 %arg0 = bitcast i64* %a0 to <64 x i1>*
220220 %sel0 = load <64 x i1>, <64 x i1>* %arg0
248248 ; X32: # BB#0:
249249 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
250250 ; X32-NEXT: kmovd %eax, %k1
251 ; X32-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27]
251 ; X32-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27]
252252 ; X32-NEXT: retl
253253 ;
254254 ; X64-LABEL: test_mm512_mask_unpacklo_epi16:
255255 ; X64: # BB#0:
256256 ; X64-NEXT: kmovd %edi, %k1
257 ; X64-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27]
257 ; X64-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27]
258258 ; X64-NEXT: retq
259259 %arg0 = bitcast <8 x i64> %a0 to <32 x i16>
260260 %arg1 = bitcast i32 %a1 to <32 x i1>
271271 ; X32: # BB#0:
272272 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
273273 ; X32-NEXT: kmovd %eax, %k1
274 ; X32-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
274 ; X32-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
275275 ; X32-NEXT: retl
276276 ;
277277 ; X64-LABEL: test_mm512_maskz_unpacklo_epi16:
278278 ; X64: # BB#0:
279279 ; X64-NEXT: kmovd %edi, %k1
280 ; X64-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
280 ; X64-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
281281 ; X64-NEXT: retq
282282 %arg0 = bitcast i32 %a0 to <32 x i1>
283283 %arg1 = bitcast <8 x i64> %a1 to <32 x i16>
446446 ; AVX512BW: ## BB#0:
447447 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
448448 ; AVX512BW-NEXT: kmovq %rdi, %k1
449 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
449 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
450450 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm0
451451 ; AVX512BW-NEXT: retq
452452 ;
454454 ; AVX512F-32: # BB#0:
455455 ; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
456456 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
457 ; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
457 ; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
458458 ; AVX512F-32-NEXT: vpaddb %zmm3, %zmm2, %zmm0
459459 ; AVX512F-32-NEXT: retl
460460 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
470470 ; AVX512BW: ## BB#0:
471471 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
472472 ; AVX512BW-NEXT: kmovq %rdi, %k1
473 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
473 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
474474 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm0
475475 ; AVX512BW-NEXT: retq
476476 ;
478478 ; AVX512F-32: # BB#0:
479479 ; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
480480 ; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
481 ; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
481 ; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
482482 ; AVX512F-32-NEXT: vpaddb %zmm3, %zmm2, %zmm0
483483 ; AVX512F-32-NEXT: retl
484484 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
494494 ; AVX512BW: ## BB#0:
495495 ; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
496496 ; AVX512BW-NEXT: kmovd %edi, %k1
497 ; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
497 ; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
498498 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm0
499499 ; AVX512BW-NEXT: retq
500500 ;
502502 ; AVX512F-32: # BB#0:
503503 ; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
504504 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
505 ; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
505 ; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
506506 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0
507507 ; AVX512F-32-NEXT: retl
508508 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
518518 ; AVX512BW: ## BB#0:
519519 ; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
520520 ; AVX512BW-NEXT: kmovd %edi, %k1
521 ; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
521 ; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
522522 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm0
523523 ; AVX512BW-NEXT: retq
524524 ;
526526 ; AVX512F-32: # BB#0:
527527 ; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
528528 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
529 ; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
529 ; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
530530 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0
531531 ; AVX512F-32-NEXT: retl
532532 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
None ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
11 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
22
33 declare void @llvm.x86.avx512.mask.storeu.b.128(i8*, <16 x i8>, i16)
367367 ; CHECK-NEXT: ## xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
368368 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
369369 ; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1]
370 ; CHECK-NEXT: ## xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
370 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
371371 ; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc3]
372372 ; CHECK-NEXT: retq ## encoding: [0xc3]
373373 %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
385385 ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
386386 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
387387 ; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1]
388 ; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
388 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
389389 ; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc3]
390390 ; CHECK-NEXT: retq ## encoding: [0xc3]
391391 %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
403403 ; CHECK-NEXT: ## ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
404404 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
405405 ; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1]
406 ; CHECK-NEXT: ## ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
406 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
407407 ; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc3]
408408 ; CHECK-NEXT: retq ## encoding: [0xc3]
409409 %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
421421 ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
422422 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
423423 ; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1]
424 ; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
424 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
425425 ; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc3]
426426 ; CHECK-NEXT: retq ## encoding: [0xc3]
427427 %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
439439 ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
440440 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
441441 ; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1]
442 ; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
442 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
443443 ; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc3]
444444 ; CHECK-NEXT: retq ## encoding: [0xc3]
445445 %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
457457 ; CHECK-NEXT: ## xmm3 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
458458 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
459459 ; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1]
460 ; CHECK-NEXT: ## xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
460 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
461461 ; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc3]
462462 ; CHECK-NEXT: retq ## encoding: [0xc3]
463463 %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
475475 ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
476476 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
477477 ; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1]
478 ; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
478 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
479479 ; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc3]
480480 ; CHECK-NEXT: retq ## encoding: [0xc3]
481481 %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
493493 ; CHECK-NEXT: ## ymm3 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
494494 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
495495 ; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1]
496 ; CHECK-NEXT: ## ymm2 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
496 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
497497 ; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc3]
498498 ; CHECK-NEXT: retq ## encoding: [0xc3]
499499 %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
888888 ; CHECK-NEXT: ## xmm3 = xmm0[1],xmm1[1]
889889 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
890890 ; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1]
891 ; CHECK-NEXT: ## xmm2 = xmm0[1],xmm1[1]
891 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[1],xmm1[1]
892892 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3]
893893 ; CHECK-NEXT: retq ## encoding: [0xc3]
894894 %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
906906 ; CHECK-NEXT: ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
907907 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
908908 ; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1]
909 ; CHECK-NEXT: ## ymm2 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
909 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
910910 ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3]
911911 ; CHECK-NEXT: retq ## encoding: [0xc3]
912912 %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
924924 ; CHECK-NEXT: ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
925925 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
926926 ; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1]
927 ; CHECK-NEXT: ## xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
927 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
928928 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3]
929929 ; CHECK-NEXT: retq ## encoding: [0xc3]
930930 %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
942942 ; CHECK-NEXT: ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
943943 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
944944 ; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1]
945 ; CHECK-NEXT: ## ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
945 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
946946 ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3]
947947 ; CHECK-NEXT: retq ## encoding: [0xc3]
948948 %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
960960 ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0]
961961 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
962962 ; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1]
963 ; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[0]
963 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0]
964964 ; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3]
965965 ; CHECK-NEXT: retq ## encoding: [0xc3]
966966 %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
978978 ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
979979 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
980980 ; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1]
981 ; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
981 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
982982 ; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3]
983983 ; CHECK-NEXT: retq ## encoding: [0xc3]
984984 %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
996996 ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
997997 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
998998 ; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1]
999 ; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
999 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10001000 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3]
10011001 ; CHECK-NEXT: retq ## encoding: [0xc3]
10021002 %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
10141014 ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
10151015 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10161016 ; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1]
1017 ; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1017 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
10181018 ; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3]
10191019 ; CHECK-NEXT: retq ## encoding: [0xc3]
10201020 %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
10321032 ; CHECK-NEXT: ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10331033 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10341034 ; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1]
1035 ; CHECK-NEXT: ## xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1035 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10361036 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3]
10371037 ; CHECK-NEXT: retq ## encoding: [0xc3]
10381038 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
10501050 ; CHECK-NEXT: ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10511051 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10521052 ; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1]
1053 ; CHECK-NEXT: ## xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1053 ; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10541054 ; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3]
10551055 ; CHECK-NEXT: retq ## encoding: [0xc3]
10561056 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
10681068 ; CHECK-NEXT: ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
10691069 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10701070 ; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1]
1071 ; CHECK-NEXT: ## ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1071 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
10721072 ; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3]
10731073 ; CHECK-NEXT: retq ## encoding: [0xc3]
10741074 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
10861086 ; CHECK-NEXT: ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
10871087 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10881088 ; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1]
1089 ; CHECK-NEXT: ## ymm2 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1089 ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
10901090 ; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3]
10911091 ; CHECK-NEXT: retq ## encoding: [0xc3]
10921092 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)