llvm.org GIT mirror llvm / d0ebabd
[Utils][X86] Help update_llc_test_checks.py to recognise retl/retq to reduce CHECK duplication (PR35003) This patch replaces the --x86_extra_scrub command line argument to automatically support a second level of regex-scrubbing if it improves the matching of nearly-identical code patterns. The argument '--extra_scrub' is there now to force extra matching if required. This is mostly useful to help us share 32-bit/64-bit x86 vector tests which only differs by retl/retq instructions, but any scrubber can now technically support this, meaning test checks don't have to be needlessly obfuscated. I've updated some of the existing checks that had been manually run with --x86_extra_scrub, to demonstrate the extra "ret{{[l|q]}}" scrub now only happens when useful, and re-run the sse42-intrinsics file to show extra matches - most sse/avx intrinsics files should be able to now share 32/64 checks. Tested with the opt/analysis scripts as well which share common code - AFAICT the other update scripts use their own versions. Differential Revision: https://reviews.llvm.org/D47485 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@333749 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 2 years ago
8 changed file(s) with 215 addition(s) and 245 deletion(s). Raw diff Collapse all Expand all
100100 ; X86: # %bb.0:
101101 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
102102 ; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
103 ; X86-NEXT: ret{{[l|q]}}
103 ; X86-NEXT: retl
104104 ;
105105 ; X64-LABEL: test_x86_avx_vbroadcastf128_pd_256:
106106 ; X64: # %bb.0:
107107 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
108 ; X64-NEXT: ret{{[l|q]}}
108 ; X64-NEXT: retq
109109 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
110110 ret <4 x double> %res
111111 }
117117 ; X86: # %bb.0:
118118 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
119119 ; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
120 ; X86-NEXT: ret{{[l|q]}}
120 ; X86-NEXT: retl
121121 ;
122122 ; X64-LABEL: test_x86_avx_vbroadcastf128_ps_256:
123123 ; X64: # %bb.0:
124124 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
125 ; X64-NEXT: ret{{[l|q]}}
125 ; X64-NEXT: retq
126126 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
127127 ret <8 x float> %res
128128 }
401401 ; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
402402 ; X86-NEXT: vpsubb %xmm1, %xmm0, %xmm0
403403 ; X86-NEXT: vmovdqu %xmm0, (%eax)
404 ; X86-NEXT: ret{{[l|q]}}
404 ; X86-NEXT: retl
405405 ;
406406 ; X64-LABEL: test_x86_sse2_storeu_dq:
407407 ; X64: # %bb.0:
408408 ; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
409409 ; X64-NEXT: vpsubb %xmm1, %xmm0, %xmm0
410410 ; X64-NEXT: vmovdqu %xmm0, (%rdi)
411 ; X64-NEXT: ret{{[l|q]}}
411 ; X64-NEXT: retq
412412 %a2 = add <16 x i8> %a1,
413413 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
414414 ret void
425425 ; X86-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
426426 ; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0
427427 ; X86-NEXT: vmovupd %xmm0, (%eax)
428 ; X86-NEXT: ret{{[l|q]}}
428 ; X86-NEXT: retl
429429 ;
430430 ; X64-LABEL: test_x86_sse2_storeu_pd:
431431 ; X64: # %bb.0:
433433 ; X64-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
434434 ; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0
435435 ; X64-NEXT: vmovupd %xmm0, (%rdi)
436 ; X64-NEXT: ret{{[l|q]}}
436 ; X64-NEXT: retq
437437 %a2 = fadd <2 x double> %a1,
438438 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
439439 ret void
446446 ; X86: # %bb.0:
447447 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
448448 ; X86-NEXT: vmovups %xmm0, (%eax)
449 ; X86-NEXT: ret{{[l|q]}}
449 ; X86-NEXT: retl
450450 ;
451451 ; X64-LABEL: test_x86_sse_storeu_ps:
452452 ; X64: # %bb.0:
453453 ; X64-NEXT: vmovups %xmm0, (%rdi)
454 ; X64-NEXT: ret{{[l|q]}}
454 ; X64-NEXT: retq
455455 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
456456 ret void
457457 }
471471 ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
472472 ; X86-NEXT: vmovups %ymm0, (%eax)
473473 ; X86-NEXT: vzeroupper
474 ; X86-NEXT: ret{{[l|q]}}
474 ; X86-NEXT: retl
475475 ;
476476 ; X64-LABEL: test_x86_avx_storeu_dq_256:
477477 ; X64: # %bb.0:
482482 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
483483 ; X64-NEXT: vmovups %ymm0, (%rdi)
484484 ; X64-NEXT: vzeroupper
485 ; X64-NEXT: ret{{[l|q]}}
485 ; X64-NEXT: retq
486486 %a2 = add <32 x i8> %a1,
487487 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
488488 ret void
499499 ; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0
500500 ; X86-NEXT: vmovupd %ymm0, (%eax)
501501 ; X86-NEXT: vzeroupper
502 ; X86-NEXT: ret{{[l|q]}}
502 ; X86-NEXT: retl
503503 ;
504504 ; X64-LABEL: test_x86_avx_storeu_pd_256:
505505 ; X64: # %bb.0:
507507 ; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0
508508 ; X64-NEXT: vmovupd %ymm0, (%rdi)
509509 ; X64-NEXT: vzeroupper
510 ; X64-NEXT: ret{{[l|q]}}
510 ; X64-NEXT: retq
511511 %a2 = fadd <4 x double> %a1,
512512 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
513513 ret void
521521 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
522522 ; X86-NEXT: vmovups %ymm0, (%eax)
523523 ; X86-NEXT: vzeroupper
524 ; X86-NEXT: ret{{[l|q]}}
524 ; X86-NEXT: retl
525525 ;
526526 ; X64-LABEL: test_x86_avx_storeu_ps_256:
527527 ; X64: # %bb.0:
528528 ; X64-NEXT: vmovups %ymm0, (%rdi)
529529 ; X64-NEXT: vzeroupper
530 ; X64-NEXT: ret{{[l|q]}}
530 ; X64-NEXT: retq
531531 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
532532 ret void
533533 }
286286 ; X86: # %bb.0:
287287 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
288288 ; X86-NEXT: vlddqu (%eax), %ymm0 # encoding: [0xc5,0xff,0xf0,0x00]
289 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
289 ; X86-NEXT: retl # encoding: [0xc3]
290290 ;
291291 ; X64-LABEL: test_x86_avx_ldu_dq_256:
292292 ; X64: # %bb.0:
293293 ; X64-NEXT: vlddqu (%rdi), %ymm0 # encoding: [0xc5,0xff,0xf0,0x07]
294 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
294 ; X64-NEXT: retq # encoding: [0xc3]
295295 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
296296 ret <32 x i8> %res
297297 }
303303 ; X86: # %bb.0:
304304 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
305305 ; X86-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x00]
306 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
306 ; X86-NEXT: retl # encoding: [0xc3]
307307 ;
308308 ; X64-LABEL: test_x86_avx_maskload_pd:
309309 ; X64: # %bb.0:
310310 ; X64-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x07]
311 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
311 ; X64-NEXT: retq # encoding: [0xc3]
312312 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
313313 ret <2 x double> %res
314314 }
320320 ; X86: # %bb.0:
321321 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
322322 ; X86-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x00]
323 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
323 ; X86-NEXT: retl # encoding: [0xc3]
324324 ;
325325 ; X64-LABEL: test_x86_avx_maskload_pd_256:
326326 ; X64: # %bb.0:
327327 ; X64-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x07]
328 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
328 ; X64-NEXT: retq # encoding: [0xc3]
329329 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
330330 ret <4 x double> %res
331331 }
337337 ; X86: # %bb.0:
338338 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
339339 ; X86-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x00]
340 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
340 ; X86-NEXT: retl # encoding: [0xc3]
341341 ;
342342 ; X64-LABEL: test_x86_avx_maskload_ps:
343343 ; X64: # %bb.0:
344344 ; X64-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x07]
345 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
345 ; X64-NEXT: retq # encoding: [0xc3]
346346 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
347347 ret <4 x float> %res
348348 }
354354 ; X86: # %bb.0:
355355 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
356356 ; X86-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x00]
357 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
357 ; X86-NEXT: retl # encoding: [0xc3]
358358 ;
359359 ; X64-LABEL: test_x86_avx_maskload_ps_256:
360360 ; X64: # %bb.0:
361361 ; X64-NEXT: vmaskmovps (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x07]
362 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
362 ; X64-NEXT: retq # encoding: [0xc3]
363363 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
364364 ret <8 x float> %res
365365 }
371371 ; X86: # %bb.0:
372372 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
373373 ; X86-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2f,0x08]
374 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
374 ; X86-NEXT: retl # encoding: [0xc3]
375375 ;
376376 ; X64-LABEL: test_x86_avx_maskstore_pd:
377377 ; X64: # %bb.0:
378378 ; X64-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2f,0x0f]
379 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
379 ; X64-NEXT: retq # encoding: [0xc3]
380380 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
381381 ret void
382382 }
389389 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
390390 ; X86-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2f,0x08]
391391 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
392 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
392 ; X86-NEXT: retl # encoding: [0xc3]
393393 ;
394394 ; X64-LABEL: test_x86_avx_maskstore_pd_256:
395395 ; X64: # %bb.0:
396396 ; X64-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2f,0x0f]
397397 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
398 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
398 ; X64-NEXT: retq # encoding: [0xc3]
399399 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
400400 ret void
401401 }
407407 ; X86: # %bb.0:
408408 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
409409 ; X86-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2e,0x08]
410 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
410 ; X86-NEXT: retl # encoding: [0xc3]
411411 ;
412412 ; X64-LABEL: test_x86_avx_maskstore_ps:
413413 ; X64: # %bb.0:
414414 ; X64-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2e,0x0f]
415 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
415 ; X64-NEXT: retq # encoding: [0xc3]
416416 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
417417 ret void
418418 }
425425 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
426426 ; X86-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2e,0x08]
427427 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
428 ; X86-NEXT: ret{{[l|q]}} # encoding: [0xc3]
428 ; X86-NEXT: retl # encoding: [0xc3]
429429 ;
430430 ; X64-LABEL: test_x86_avx_maskstore_ps_256:
431431 ; X64: # %bb.0:
432432 ; X64-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2e,0x0f]
433433 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
434 ; X64-NEXT: ret{{[l|q]}} # encoding: [0xc3]
434 ; X64-NEXT: retq # encoding: [0xc3]
435435 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
436436 ret void
437437 }
719719 ; X86-AVX: # %bb.0:
720720 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
721721 ; X86-AVX-NEXT: vpermilps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x00]
722 ; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
722 ; X86-AVX-NEXT: retl # encoding: [0xc3]
723723 ;
724724 ; X86-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
725725 ; X86-AVX512VL: # %bb.0:
726726 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
727727 ; X86-AVX512VL-NEXT: vpermilps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x00]
728 ; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
728 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
729729 ;
730730 ; X64-AVX-LABEL: test_x86_avx_vpermilvar_ps_load:
731731 ; X64-AVX: # %bb.0:
732732 ; X64-AVX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x07]
733 ; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
733 ; X64-AVX-NEXT: retq # encoding: [0xc3]
734734 ;
735735 ; X64-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
736736 ; X64-AVX512VL: # %bb.0:
737737 ; X64-AVX512VL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x07]
738 ; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
738 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
739739 %a2 = load <4 x i32>, <4 x i32>* %a1
740740 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
741741 ret <4 x float> %res
950950 ; X86-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
951951 ; X86-AVX-NEXT: vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00]
952952 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
953 ; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
953 ; X86-AVX-NEXT: retl # encoding: [0xc3]
954954 ;
955955 ; X86-AVX512VL-LABEL: movnt_dq:
956956 ; X86-AVX512VL: # %bb.0:
959959 ; X86-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
960960 ; X86-AVX512VL-NEXT: vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00]
961961 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
962 ; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
962 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
963963 ;
964964 ; X64-AVX-LABEL: movnt_dq:
965965 ; X64-AVX: # %bb.0:
967967 ; X64-AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
968968 ; X64-AVX-NEXT: vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07]
969969 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
970 ; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
970 ; X64-AVX-NEXT: retq # encoding: [0xc3]
971971 ;
972972 ; X64-AVX512VL-LABEL: movnt_dq:
973973 ; X64-AVX512VL: # %bb.0:
975975 ; X64-AVX512VL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
976976 ; X64-AVX512VL-NEXT: vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07]
977977 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
978 ; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
978 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
979979 %a2 = add <2 x i64> %a1,
980980 %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32>
981981 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
989989 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
990990 ; X86-AVX-NEXT: vmovntps %ymm0, (%eax) # encoding: [0xc5,0xfc,0x2b,0x00]
991991 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
992 ; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
992 ; X86-AVX-NEXT: retl # encoding: [0xc3]
993993 ;
994994 ; X86-AVX512VL-LABEL: movnt_ps:
995995 ; X86-AVX512VL: # %bb.0:
996996 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
997997 ; X86-AVX512VL-NEXT: vmovntps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x00]
998998 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
999 ; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
999 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
10001000 ;
10011001 ; X64-AVX-LABEL: movnt_ps:
10021002 ; X64-AVX: # %bb.0:
10031003 ; X64-AVX-NEXT: vmovntps %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x2b,0x07]
10041004 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1005 ; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1005 ; X64-AVX-NEXT: retq # encoding: [0xc3]
10061006 ;
10071007 ; X64-AVX512VL-LABEL: movnt_ps:
10081008 ; X64-AVX512VL: # %bb.0:
10091009 ; X64-AVX512VL-NEXT: vmovntps %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x07]
10101010 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1011 ; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1011 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
10121012 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
10131013 ret void
10141014 }
10231023 ; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
10241024 ; X86-AVX-NEXT: vmovntpd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x2b,0x00]
10251025 ; X86-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1026 ; X86-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1026 ; X86-AVX-NEXT: retl # encoding: [0xc3]
10271027 ;
10281028 ; X86-AVX512VL-LABEL: movnt_pd:
10291029 ; X86-AVX512VL: # %bb.0:
10321032 ; X86-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
10331033 ; X86-AVX512VL-NEXT: vmovntpd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00]
10341034 ; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1035 ; X86-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1035 ; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
10361036 ;
10371037 ; X64-AVX-LABEL: movnt_pd:
10381038 ; X64-AVX: # %bb.0:
10401040 ; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
10411041 ; X64-AVX-NEXT: vmovntpd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x2b,0x07]
10421042 ; X64-AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1043 ; X64-AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1043 ; X64-AVX-NEXT: retq # encoding: [0xc3]
10441044 ;
10451045 ; X64-AVX512VL-LABEL: movnt_pd:
10461046 ; X64-AVX512VL: # %bb.0:
10481048 ; X64-AVX512VL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
10491049 ; X64-AVX512VL-NEXT: vmovntpd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x07]
10501050 ; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1051 ; X64-AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1051 ; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
10521052 %a2 = fadd <4 x double> %a1,
10531053 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
10541054 ret void
366366 ; X86: # %bb.0:
367367 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
368368 ; X86-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
369 ; X86-NEXT: ret{{[l|q]}}
369 ; X86-NEXT: retl
370370 ;
371371 ; X64-LABEL: test_mm256_broadcastsi128_si256_mem:
372372 ; X64: # %bb.0:
373373 ; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
374 ; X64-NEXT: ret{{[l|q]}}
374 ; X64-NEXT: retq
375375 %a0 = load <2 x i64>, <2 x i64>* %p0
376376 %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32>
377377 ret <4 x i64> %res
765765 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
766766 ; X86-NEXT: vpgatherdd %xmm2, (%eax,%xmm0,2), %xmm1
767767 ; X86-NEXT: vmovdqa %xmm1, %xmm0
768 ; X86-NEXT: ret{{[l|q]}}
768 ; X86-NEXT: retl
769769 ;
770770 ; X64-LABEL: test_mm_i32gather_epi32:
771771 ; X64: # %bb.0:
773773 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
774774 ; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm0,2), %xmm1
775775 ; X64-NEXT: vmovdqa %xmm1, %xmm0
776 ; X64-NEXT: ret{{[l|q]}}
776 ; X64-NEXT: retq
777777 %arg0 = bitcast i32 *%a0 to i8*
778778 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
779779 %mask = bitcast <2 x i64> to <4 x i32>
788788 ; X86: # %bb.0:
789789 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
790790 ; X86-NEXT: vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0
791 ; X86-NEXT: ret{{[l|q]}}
791 ; X86-NEXT: retl
792792 ;
793793 ; X64-LABEL: test_mm_mask_i32gather_epi32:
794794 ; X64: # %bb.0:
795795 ; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
796 ; X64-NEXT: ret{{[l|q]}}
796 ; X64-NEXT: retq
797797 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
798798 %arg1 = bitcast i32 *%a1 to i8*
799799 %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
811811 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
812812 ; X86-NEXT: vpgatherdd %ymm2, (%eax,%ymm0,2), %ymm1
813813 ; X86-NEXT: vmovdqa %ymm1, %ymm0
814 ; X86-NEXT: ret{{[l|q]}}
814 ; X86-NEXT: retl
815815 ;
816816 ; X64-LABEL: test_mm256_i32gather_epi32:
817817 ; X64: # %bb.0:
819819 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
820820 ; X64-NEXT: vpgatherdd %ymm2, (%rdi,%ymm0,2), %ymm1
821821 ; X64-NEXT: vmovdqa %ymm1, %ymm0
822 ; X64-NEXT: ret{{[l|q]}}
822 ; X64-NEXT: retq
823823 %arg0 = bitcast i32 *%a0 to i8*
824824 %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
825825 %mask = bitcast <4 x i64> to <8 x i32>
834834 ; X86: # %bb.0:
835835 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
836836 ; X86-NEXT: vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0
837 ; X86-NEXT: ret{{[l|q]}}
837 ; X86-NEXT: retl
838838 ;
839839 ; X64-LABEL: test_mm256_mask_i32gather_epi32:
840840 ; X64: # %bb.0:
841841 ; X64-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
842 ; X64-NEXT: ret{{[l|q]}}
842 ; X64-NEXT: retq
843843 %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
844844 %arg1 = bitcast i32 *%a1 to i8*
845845 %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
857857 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
858858 ; X86-NEXT: vpgatherdq %xmm2, (%eax,%xmm0,2), %xmm1
859859 ; X86-NEXT: vmovdqa %xmm1, %xmm0
860 ; X86-NEXT: ret{{[l|q]}}
860 ; X86-NEXT: retl
861861 ;
862862 ; X64-LABEL: test_mm_i32gather_epi64:
863863 ; X64: # %bb.0:
865865 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
866866 ; X64-NEXT: vpgatherdq %xmm2, (%rdi,%xmm0,2), %xmm1
867867 ; X64-NEXT: vmovdqa %xmm1, %xmm0
868 ; X64-NEXT: ret{{[l|q]}}
868 ; X64-NEXT: retq
869869 %arg0 = bitcast i64 *%a0 to i8*
870870 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
871871 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> undef, i8* %arg0, <4 x i32> %arg1, <2 x i64> , i8 2)
878878 ; X86: # %bb.0:
879879 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
880880 ; X86-NEXT: vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0
881 ; X86-NEXT: ret{{[l|q]}}
881 ; X86-NEXT: retl
882882 ;
883883 ; X64-LABEL: test_mm_mask_i32gather_epi64:
884884 ; X64: # %bb.0:
885885 ; X64-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
886 ; X64-NEXT: ret{{[l|q]}}
886 ; X64-NEXT: retq
887887 %arg1 = bitcast i64 *%a1 to i8*
888888 %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
889889 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <2 x i64> %a3, i8 2)
898898 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
899899 ; X86-NEXT: vpgatherdq %ymm2, (%eax,%xmm0,2), %ymm1
900900 ; X86-NEXT: vmovdqa %ymm1, %ymm0
901 ; X86-NEXT: ret{{[l|q]}}
901 ; X86-NEXT: retl
902902 ;
903903 ; X64-LABEL: test_mm256_i32gather_epi64:
904904 ; X64: # %bb.0:
906906 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
907907 ; X64-NEXT: vpgatherdq %ymm2, (%rdi,%xmm0,2), %ymm1
908908 ; X64-NEXT: vmovdqa %ymm1, %ymm0
909 ; X64-NEXT: ret{{[l|q]}}
909 ; X64-NEXT: retq
910910 %arg0 = bitcast i64 *%a0 to i8*
911911 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
912912 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8* %arg0, <4 x i32> %arg1, <4 x i64> , i8 2)
919919 ; X86: # %bb.0:
920920 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
921921 ; X86-NEXT: vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0
922 ; X86-NEXT: ret{{[l|q]}}
922 ; X86-NEXT: retl
923923 ;
924924 ; X64-LABEL: test_mm256_mask_i32gather_epi64:
925925 ; X64: # %bb.0:
926926 ; X64-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
927 ; X64-NEXT: ret{{[l|q]}}
927 ; X64-NEXT: retq
928928 %arg1 = bitcast i64 *%a1 to i8*
929929 %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
930930 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <4 x i64> %a3, i8 2)
939939 ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
940940 ; X86-NEXT: vgatherdpd %xmm2, (%eax,%xmm0,2), %xmm1
941941 ; X86-NEXT: vmovapd %xmm1, %xmm0
942 ; X86-NEXT: ret{{[l|q]}}
942 ; X86-NEXT: retl
943943 ;
944944 ; X64-LABEL: test_mm_i32gather_pd:
945945 ; X64: # %bb.0:
947947 ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
948948 ; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1
949949 ; X64-NEXT: vmovapd %xmm1, %xmm0
950 ; X64-NEXT: ret{{[l|q]}}
950 ; X64-NEXT: retq
951951 %arg0 = bitcast double *%a0 to i8*
952952 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
953953 %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer
963963 ; X86: # %bb.0:
964964 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
965965 ; X86-NEXT: vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0
966 ; X86-NEXT: ret{{[l|q]}}
966 ; X86-NEXT: retl
967967 ;
968968 ; X64-LABEL: test_mm_mask_i32gather_pd:
969969 ; X64: # %bb.0:
970970 ; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0
971 ; X64-NEXT: ret{{[l|q]}}
971 ; X64-NEXT: retq
972972 %arg1 = bitcast double *%a1 to i8*
973973 %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
974974 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %arg1, <4 x i32> %arg2, <2 x double> %a3, i8 2)
983983 ; X86-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
984984 ; X86-NEXT: vgatherdpd %ymm2, (%eax,%xmm0,2), %ymm1
985985 ; X86-NEXT: vmovapd %ymm1, %ymm0
986 ; X86-NEXT: ret{{[l|q]}}
986 ; X86-NEXT: retl
987987 ;
988988 ; X64-LABEL: test_mm256_i32gather_pd:
989989 ; X64: # %bb.0:
991991 ; X64-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
992992 ; X64-NEXT: vgatherdpd %ymm2, (%rdi,%xmm0,2), %ymm1
993993 ; X64-NEXT: vmovapd %ymm1, %ymm0
994 ; X64-NEXT: ret{{[l|q]}}
994 ; X64-NEXT: retq
995995 %arg0 = bitcast double *%a0 to i8*
996996 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
997997 %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0)
10051005 ; X86: # %bb.0:
10061006 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
10071007 ; X86-NEXT: vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0
1008 ; X86-NEXT: ret{{[l|q]}}
1008 ; X86-NEXT: retl
10091009 ;
10101010 ; X64-LABEL: test_mm256_mask_i32gather_pd:
10111011 ; X64: # %bb.0:
10121012 ; X64-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0
1013 ; X64-NEXT: ret{{[l|q]}}
1013 ; X64-NEXT: retq
10141014 %arg1 = bitcast double *%a1 to i8*
10151015 %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
10161016 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %arg1, <4 x i32> %arg2, <4 x double> %a3, i8 2)
10251025 ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
10261026 ; X86-NEXT: vgatherdps %xmm2, (%eax,%xmm0,2), %xmm1
10271027 ; X86-NEXT: vmovaps %xmm1, %xmm0
1028 ; X86-NEXT: ret{{[l|q]}}
1028 ; X86-NEXT: retl
10291029 ;
10301030 ; X64-LABEL: test_mm_i32gather_ps:
10311031 ; X64: # %bb.0:
10331033 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
10341034 ; X64-NEXT: vgatherdps %xmm2, (%rdi,%xmm0,2), %xmm1
10351035 ; X64-NEXT: vmovaps %xmm1, %xmm0
1036 ; X64-NEXT: ret{{[l|q]}}
1036 ; X64-NEXT: retq
10371037 %arg0 = bitcast float *%a0 to i8*
10381038 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
10391039 %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
10491049 ; X86: # %bb.0:
10501050 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
10511051 ; X86-NEXT: vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0
1052 ; X86-NEXT: ret{{[l|q]}}
1052 ; X86-NEXT: retl
10531053 ;
10541054 ; X64-LABEL: test_mm_mask_i32gather_ps:
10551055 ; X64: # %bb.0:
10561056 ; X64-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0
1057 ; X64-NEXT: ret{{[l|q]}}
1057 ; X64-NEXT: retq
10581058 %arg1 = bitcast float *%a1 to i8*
10591059 %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
10601060 %call = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %arg1, <4 x i32> %arg2, <4 x float> %a3, i8 2)
10691069 ; X86-NEXT: vcmpeqps %ymm1, %ymm1, %ymm2
10701070 ; X86-NEXT: vgatherdps %ymm2, (%eax,%ymm0,2), %ymm1
10711071 ; X86-NEXT: vmovaps %ymm1, %ymm0
1072 ; X86-NEXT: ret{{[l|q]}}
1072 ; X86-NEXT: retl
10731073 ;
10741074 ; X64-LABEL: test_mm256_i32gather_ps:
10751075 ; X64: # %bb.0:
10771077 ; X64-NEXT: vcmpeqps %ymm1, %ymm1, %ymm2
10781078 ; X64-NEXT: vgatherdps %ymm2, (%rdi,%ymm0,2), %ymm1
10791079 ; X64-NEXT: vmovaps %ymm1, %ymm0
1080 ; X64-NEXT: ret{{[l|q]}}
1080 ; X64-NEXT: retq
10811081 %arg0 = bitcast float *%a0 to i8*
10821082 %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
10831083 %mask = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i8 0)
10911091 ; X86: # %bb.0:
10921092 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
10931093 ; X86-NEXT: vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0
1094 ; X86-NEXT: ret{{[l|q]}}
1094 ; X86-NEXT: retl
10951095 ;
10961096 ; X64-LABEL: test_mm256_mask_i32gather_ps:
10971097 ; X64: # %bb.0:
10981098 ; X64-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0
1099 ; X64-NEXT: ret{{[l|q]}}
1099 ; X64-NEXT: retq
11001100 %arg1 = bitcast float *%a1 to i8*
11011101 %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
11021102 %call = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %arg1, <8 x i32> %arg2, <8 x float> %a3, i8 2)
11111111 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
11121112 ; X86-NEXT: vpgatherqd %xmm2, (%eax,%xmm0,2), %xmm1
11131113 ; X86-NEXT: vmovdqa %xmm1, %xmm0
1114 ; X86-NEXT: ret{{[l|q]}}
1114 ; X86-NEXT: retl
11151115 ;
11161116 ; X64-LABEL: test_mm_i64gather_epi32:
11171117 ; X64: # %bb.0:
11191119 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
11201120 ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%xmm0,2), %xmm1
11211121 ; X64-NEXT: vmovdqa %xmm1, %xmm0
1122 ; X64-NEXT: ret{{[l|q]}}
1122 ; X64-NEXT: retq
11231123 %arg0 = bitcast i32 *%a0 to i8*
11241124 %mask = bitcast <2 x i64> to <4 x i32>
11251125 %call = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> undef, i8* %arg0, <2 x i64> %a1, <4 x i32> %mask, i8 2)
11331133 ; X86: # %bb.0:
11341134 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
11351135 ; X86-NEXT: vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0
1136 ; X86-NEXT: ret{{[l|q]}}
1136 ; X86-NEXT: retl
11371137 ;
11381138 ; X64-LABEL: test_mm_mask_i64gather_epi32:
11391139 ; X64: # %bb.0:
11401140 ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
1141 ; X64-NEXT: ret{{[l|q]}}
1141 ; X64-NEXT: retq
11421142 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
11431143 %arg1 = bitcast i32 *%a1 to i8*
11441144 %arg3 = bitcast <2 x i64> %a3 to <4 x i32>
11561156 ; X86-NEXT: vpgatherqd %xmm2, (%eax,%ymm0,2), %xmm1
11571157 ; X86-NEXT: vmovdqa %xmm1, %xmm0
11581158 ; X86-NEXT: vzeroupper
1159 ; X86-NEXT: ret{{[l|q]}}
1159 ; X86-NEXT: retl
11601160 ;
11611161 ; X64-LABEL: test_mm256_i64gather_epi32:
11621162 ; X64: # %bb.0:
11651165 ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%ymm0,2), %xmm1
11661166 ; X64-NEXT: vmovdqa %xmm1, %xmm0
11671167 ; X64-NEXT: vzeroupper
1168 ; X64-NEXT: ret{{[l|q]}}
1168 ; X64-NEXT: retq
11691169 %arg0 = bitcast i32 *%a0 to i8*
11701170 %mask = bitcast <2 x i64> to <4 x i32>
11711171 %call = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8* %arg0, <4 x i64> %a1, <4 x i32> %mask, i8 2)
11801180 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
11811181 ; X86-NEXT: vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0
11821182 ; X86-NEXT: vzeroupper
1183 ; X86-NEXT: ret{{[l|q]}}
1183 ; X86-NEXT: retl
11841184 ;
11851185 ; X64-LABEL: test_mm256_mask_i64gather_epi32:
11861186 ; X64: # %bb.0:
11871187 ; X64-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
11881188 ; X64-NEXT: vzeroupper
1189 ; X64-NEXT: ret{{[l|q]}}
1189 ; X64-NEXT: retq
11901190 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
11911191 %arg1 = bitcast i32 *%a1 to i8*
11921192 %arg3 = bitcast <2 x i64> %a3 to <4 x i32>
12031203 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
12041204 ; X86-NEXT: vpgatherqq %xmm2, (%eax,%xmm0,2), %xmm1
12051205 ; X86-NEXT: vmovdqa %xmm1, %xmm0
1206 ; X86-NEXT: ret{{[l|q]}}
1206 ; X86-NEXT: retl
12071207 ;
12081208 ; X64-LABEL: test_mm_i64gather_epi64:
12091209 ; X64: # %bb.0:
12111211 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
12121212 ; X64-NEXT: vpgatherqq %xmm2, (%rdi,%xmm0,2), %xmm1
12131213 ; X64-NEXT: vmovdqa %xmm1, %xmm0
1214 ; X64-NEXT: ret{{[l|q]}}
1214 ; X64-NEXT: retq
12151215 %arg0 = bitcast i64 *%a0 to i8*
12161216 %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> undef, i8* %arg0, <2 x i64> %a1, <2 x i64> , i8 2)
12171217 ret <2 x i64> %call
12231223 ; X86: # %bb.0:
12241224 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
12251225 ; X86-NEXT: vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0
1226 ; X86-NEXT: ret{{[l|q]}}
1226 ; X86-NEXT: retl
12271227 ;
12281228 ; X64-LABEL: test_mm_mask_i64gather_epi64:
12291229 ; X64: # %bb.0:
12301230 ; X64-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
1231 ; X64-NEXT: ret{{[l|q]}}
1231 ; X64-NEXT: retq
12321232 %arg1 = bitcast i64 *%a1 to i8*
12331233 %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %arg1, <2 x i64> %a2, <2 x i64> %a3, i8 2)
12341234 ret <2 x i64> %call
12421242 ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
12431243 ; X86-NEXT: vpgatherqq %ymm2, (%eax,%ymm0,2), %ymm1
12441244 ; X86-NEXT: vmovdqa %ymm1, %ymm0
1245 ; X86-NEXT: ret{{[l|q]}}
1245 ; X86-NEXT: retl
12461246 ;
12471247 ; X64-LABEL: test_mm256_i64gather_epi64:
12481248 ; X64: # %bb.0:
12501250 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
12511251 ; X64-NEXT: vpgatherqq %ymm2, (%rdi,%ymm0,2), %ymm1
12521252 ; X64-NEXT: vmovdqa %ymm1, %ymm0
1253 ; X64-NEXT: ret{{[l|q]}}
1253 ; X64-NEXT: retq
12541254 %arg0 = bitcast i64 *%a0 to i8*
12551255 %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8* %arg0, <4 x i64> %a1, <4 x i64> , i8 2)
12561256 ret <4 x i64> %call
12621262 ; X86: # %bb.0:
12631263 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
12641264 ; X86-NEXT: vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0
1265 ; X86-NEXT: ret{{[l|q]}}
1265 ; X86-NEXT: retl
12661266 ;
12671267 ; X64-LABEL: test_mm256_mask_i64gather_epi64:
12681268 ; X64: # %bb.0:
12691269 ; X64-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
1270 ; X64-NEXT: ret{{[l|q]}}
1270 ; X64-NEXT: retq
12711271 %arg1 = bitcast i64 *%a1 to i8*
12721272 %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %arg1, <4 x i64> %a2, <4 x i64> %a3, i8 2)
12731273 ret <4 x i64> %call
12811281 ; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
12821282 ; X86-NEXT: vgatherqpd %xmm2, (%eax,%xmm0,2), %xmm1
12831283 ; X86-NEXT: vmovapd %xmm1, %xmm0
1284 ; X86-NEXT: ret{{[l|q]}}
1284 ; X86-NEXT: retl
12851285 ;
12861286 ; X64-LABEL: test_mm_i64gather_pd:
12871287 ; X64: # %bb.0:
12891289 ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
12901290 ; X64-NEXT: vgatherqpd %xmm2, (%rdi,%xmm0,2), %xmm1
12911291 ; X64-NEXT: vmovapd %xmm1, %xmm0
1292 ; X64-NEXT: ret{{[l|q]}}
1292 ; X64-NEXT: retq
12931293 %arg0 = bitcast double *%a0 to i8*
12941294 %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer
12951295 %sext = sext <2 x i1> %cmp to <2 x i64>
13041304 ; X86: # %bb.0:
13051305 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
13061306 ; X86-NEXT: vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0
1307 ; X86-NEXT: ret{{[l|q]}}
1307 ; X86-NEXT: retl
13081308 ;
13091309 ; X64-LABEL: test_mm_mask_i64gather_pd:
13101310 ; X64: # %bb.0:
13111311 ; X64-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0
1312 ; X64-NEXT: ret{{[l|q]}}
1312 ; X64-NEXT: retq
13131313 %arg1 = bitcast double *%a1 to i8*
13141314 %call = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %arg1, <2 x i64> %a2, <2 x double> %a3, i8 2)
13151315 ret <2 x double> %call
13231323 ; X86-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
13241324 ; X86-NEXT: vgatherqpd %ymm2, (%eax,%ymm0,2), %ymm1
13251325 ; X86-NEXT: vmovapd %ymm1, %ymm0
1326 ; X86-NEXT: ret{{[l|q]}}
1326 ; X86-NEXT: retl
13271327 ;
13281328 ; X64-LABEL: test_mm256_i64gather_pd:
13291329 ; X64: # %bb.0:
13311331 ; X64-NEXT: vcmpeqpd %ymm1, %ymm1, %ymm2
13321332 ; X64-NEXT: vgatherqpd %ymm2, (%rdi,%ymm0,2), %ymm1
13331333 ; X64-NEXT: vmovapd %ymm1, %ymm0
1334 ; X64-NEXT: ret{{[l|q]}}
1334 ; X64-NEXT: retq
13351335 %arg0 = bitcast double *%a0 to i8*
13361336 %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0)
13371337 %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8* %arg0, <4 x i64> %a1, <4 x double> %mask, i8 2)
13441344 ; X86: # %bb.0:
13451345 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
13461346 ; X86-NEXT: vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0
1347 ; X86-NEXT: ret{{[l|q]}}
1347 ; X86-NEXT: retl
13481348 ;
13491349 ; X64-LABEL: test_mm256_mask_i64gather_pd:
13501350 ; X64: # %bb.0:
13511351 ; X64-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0
1352 ; X64-NEXT: ret{{[l|q]}}
1352 ; X64-NEXT: retq
13531353 %arg1 = bitcast i64 *%a1 to i8*
13541354 %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %arg1, <4 x i64> %a2, <4 x double> %a3, i8 2)
13551355 ret <4 x double> %call
13631363 ; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
13641364 ; X86-NEXT: vgatherqps %xmm2, (%eax,%xmm0,2), %xmm1
13651365 ; X86-NEXT: vmovaps %xmm1, %xmm0
1366 ; X86-NEXT: ret{{[l|q]}}
1366 ; X86-NEXT: retl
13671367 ;
13681368 ; X64-LABEL: test_mm_i64gather_ps:
13691369 ; X64: # %bb.0:
13711371 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
13721372 ; X64-NEXT: vgatherqps %xmm2, (%rdi,%xmm0,2), %xmm1
13731373 ; X64-NEXT: vmovaps %xmm1, %xmm0
1374 ; X64-NEXT: ret{{[l|q]}}
1374 ; X64-NEXT: retq
13751375 %arg0 = bitcast float *%a0 to i8*
13761376 %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
13771377 %sext = sext <4 x i1> %cmp to <4 x i32>
13861386 ; X86: # %bb.0:
13871387 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
13881388 ; X86-NEXT: vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0
1389 ; X86-NEXT: ret{{[l|q]}}
1389 ; X86-NEXT: retl
13901390 ;
13911391 ; X64-LABEL: test_mm_mask_i64gather_ps:
13921392 ; X64: # %bb.0:
13931393 ; X64-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0
1394 ; X64-NEXT: ret{{[l|q]}}
1394 ; X64-NEXT: retq
13951395 %arg1 = bitcast float *%a1 to i8*
13961396 %call = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %arg1, <2 x i64> %a2, <4 x float> %a3, i8 2)
13971397 ret <4 x float> %call
14061406 ; X86-NEXT: vgatherqps %xmm2, (%eax,%ymm0,2), %xmm1
14071407 ; X86-NEXT: vmovaps %xmm1, %xmm0
14081408 ; X86-NEXT: vzeroupper
1409 ; X86-NEXT: ret{{[l|q]}}
1409 ; X86-NEXT: retl
14101410 ;
14111411 ; X64-LABEL: test_mm256_i64gather_ps:
14121412 ; X64: # %bb.0:
14151415 ; X64-NEXT: vgatherqps %xmm2, (%rdi,%ymm0,2), %xmm1
14161416 ; X64-NEXT: vmovaps %xmm1, %xmm0
14171417 ; X64-NEXT: vzeroupper
1418 ; X64-NEXT: ret{{[l|q]}}
1418 ; X64-NEXT: retq
14191419 %arg0 = bitcast float *%a0 to i8*
14201420 %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
14211421 %sext = sext <4 x i1> %cmp to <4 x i32>
14311431 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
14321432 ; X86-NEXT: vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0
14331433 ; X86-NEXT: vzeroupper
1434 ; X86-NEXT: ret{{[l|q]}}
1434 ; X86-NEXT: retl
14351435 ;
14361436 ; X64-LABEL: test_mm256_mask_i64gather_ps:
14371437 ; X64: # %bb.0:
14381438 ; X64-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0
14391439 ; X64-NEXT: vzeroupper
1440 ; X64-NEXT: ret{{[l|q]}}
1440 ; X64-NEXT: retq
14411441 %arg1 = bitcast float *%a1 to i8*
14421442 %call = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %arg1, <4 x i64> %a2, <4 x float> %a3, i8 2)
14431443 ret <4 x float> %call
14951495 ; X86: # %bb.0:
14961496 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
14971497 ; X86-NEXT: vpmaskmovd (%eax), %xmm0, %xmm0
1498 ; X86-NEXT: ret{{[l|q]}}
1498 ; X86-NEXT: retl
14991499 ;
15001500 ; X64-LABEL: test_mm_maskload_epi32:
15011501 ; X64: # %bb.0:
15021502 ; X64-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm0
1503 ; X64-NEXT: ret{{[l|q]}}
1503 ; X64-NEXT: retq
15041504 %arg0 = bitcast i32* %a0 to i8*
15051505 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
15061506 %call = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %arg0, <4 x i32> %arg1)
15141514 ; X86: # %bb.0:
15151515 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
15161516 ; X86-NEXT: vpmaskmovd (%eax), %ymm0, %ymm0
1517 ; X86-NEXT: ret{{[l|q]}}
1517 ; X86-NEXT: retl
15181518 ;
15191519 ; X64-LABEL: test_mm256_maskload_epi32:
15201520 ; X64: # %bb.0:
15211521 ; X64-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm0
1522 ; X64-NEXT: ret{{[l|q]}}
1522 ; X64-NEXT: retq
15231523 %arg0 = bitcast i32* %a0 to i8*
15241524 %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
15251525 %call = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %arg0, <8 x i32> %arg1)
15331533 ; X86: # %bb.0:
15341534 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
15351535 ; X86-NEXT: vpmaskmovq (%eax), %xmm0, %xmm0
1536 ; X86-NEXT: ret{{[l|q]}}
1536 ; X86-NEXT: retl
15371537 ;
15381538 ; X64-LABEL: test_mm_maskload_epi64:
15391539 ; X64: # %bb.0:
15401540 ; X64-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm0
1541 ; X64-NEXT: ret{{[l|q]}}
1541 ; X64-NEXT: retq
15421542 %arg0 = bitcast i64* %a0 to i8*
15431543 %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %arg0, <2 x i64> %a1)
15441544 ret <2 x i64> %res
15501550 ; X86: # %bb.0:
15511551 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
15521552 ; X86-NEXT: vpmaskmovq (%eax), %ymm0, %ymm0
1553 ; X86-NEXT: ret{{[l|q]}}
1553 ; X86-NEXT: retl
15541554 ;
15551555 ; X64-LABEL: test_mm256_maskload_epi64:
15561556 ; X64: # %bb.0:
15571557 ; X64-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0
1558 ; X64-NEXT: ret{{[l|q]}}
1558 ; X64-NEXT: retq
15591559 %arg0 = bitcast i64* %a0 to i8*
15601560 %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %arg0, <4 x i64> %a1)
15611561 ret <4 x i64> %res
15671567 ; X86: # %bb.0:
15681568 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
15691569 ; X86-NEXT: vpmaskmovd %xmm1, %xmm0, (%eax)
1570 ; X86-NEXT: ret{{[l|q]}}
1570 ; X86-NEXT: retl
15711571 ;
15721572 ; X64-LABEL: test_mm_maskstore_epi32:
15731573 ; X64: # %bb.0:
15741574 ; X64-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi)
1575 ; X64-NEXT: ret{{[l|q]}}
1575 ; X64-NEXT: retq
15761576 %arg0 = bitcast float* %a0 to i8*
15771577 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
15781578 %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
15871587 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
15881588 ; X86-NEXT: vpmaskmovd %ymm1, %ymm0, (%eax)
15891589 ; X86-NEXT: vzeroupper
1590 ; X86-NEXT: ret{{[l|q]}}
1590 ; X86-NEXT: retl
15911591 ;
15921592 ; X64-LABEL: test_mm256_maskstore_epi32:
15931593 ; X64: # %bb.0:
15941594 ; X64-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi)
15951595 ; X64-NEXT: vzeroupper
1596 ; X64-NEXT: ret{{[l|q]}}
1596 ; X64-NEXT: retq
15971597 %arg0 = bitcast float* %a0 to i8*
15981598 %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
15991599 %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
16071607 ; X86: # %bb.0:
16081608 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
16091609 ; X86-NEXT: vpmaskmovq %xmm1, %xmm0, (%eax)
1610 ; X86-NEXT: ret{{[l|q]}}
1610 ; X86-NEXT: retl
16111611 ;
16121612 ; X64-LABEL: test_mm_maskstore_epi64:
16131613 ; X64: # %bb.0:
16141614 ; X64-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi)
1615 ; X64-NEXT: ret{{[l|q]}}
1615 ; X64-NEXT: retq
16161616 %arg0 = bitcast i64* %a0 to i8*
16171617 call void @llvm.x86.avx2.maskstore.q(i8* %arg0, <2 x i64> %a1, <2 x i64> %a2)
16181618 ret void
16251625 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
16261626 ; X86-NEXT: vpmaskmovq %ymm1, %ymm0, (%eax)
16271627 ; X86-NEXT: vzeroupper
1628 ; X86-NEXT: ret{{[l|q]}}
1628 ; X86-NEXT: retl
16291629 ;
16301630 ; X64-LABEL: test_mm256_maskstore_epi64:
16311631 ; X64: # %bb.0:
16321632 ; X64-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi)
16331633 ; X64-NEXT: vzeroupper
1634 ; X64-NEXT: ret{{[l|q]}}
1634 ; X64-NEXT: retq
16351635 %arg0 = bitcast i64* %a0 to i8*
16361636 call void @llvm.x86.avx2.maskstore.q.256(i8* %arg0, <4 x i64> %a1, <4 x i64> %a2)
16371637 ret void
24642464 ; X86: # %bb.0:
24652465 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
24662466 ; X86-NEXT: vmovntdqa (%eax), %ymm0
2467 ; X86-NEXT: ret{{[l|q]}}
2467 ; X86-NEXT: retl
24682468 ;
24692469 ; X64-LABEL: test_mm256_stream_load_si256:
24702470 ; X64: # %bb.0:
24712471 ; X64-NEXT: vmovntdqa (%rdi), %ymm0
2472 ; X64-NEXT: ret{{[l|q]}}
2472 ; X64-NEXT: retq
24732473 %arg0 = bitcast <4 x i64> *%a0 to i8*
24742474 %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %arg0)
24752475 ret <4 x i64> %res
7373 ; X86-SSE-NEXT: psrad $31, %xmm0
7474 ; X86-SSE-NEXT: pcmpgtd {{\.LCPI.*}}, %xmm1
7575 ; X86-SSE-NEXT: packssdw %xmm1, %xmm0
76 ; X86-SSE-NEXT: ret{{[l|q]}}
76 ; X86-SSE-NEXT: retl
7777 ;
7878 ; X86-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32:
7979 ; X86-AVX: # %bb.0:
8080 ; X86-AVX-NEXT: vpsrad $31, %xmm0, %xmm0
8181 ; X86-AVX-NEXT: vpcmpgtd {{\.LCPI.*}}, %xmm1, %xmm1
8282 ; X86-AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
83 ; X86-AVX-NEXT: ret{{[l|q]}}
83 ; X86-AVX-NEXT: retl
8484 ;
8585 ; X64-SSE-LABEL: trunc_ashr_v4i32_icmp_v4i32:
8686 ; X64-SSE: # %bb.0:
8787 ; X64-SSE-NEXT: psrad $31, %xmm0
8888 ; X64-SSE-NEXT: pcmpgtd {{.*}}(%rip), %xmm1
8989 ; X64-SSE-NEXT: packssdw %xmm1, %xmm0
90 ; X64-SSE-NEXT: ret{{[l|q]}}
90 ; X64-SSE-NEXT: retq
9191 ;
9292 ; X64-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32:
9393 ; X64-AVX: # %bb.0:
9494 ; X64-AVX-NEXT: vpsrad $31, %xmm0, %xmm0
9595 ; X64-AVX-NEXT: vpcmpgtd {{.*}}(%rip), %xmm1, %xmm1
9696 ; X64-AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
97 ; X64-AVX-NEXT: ret{{[l|q]}}
97 ; X64-AVX-NEXT: retq
9898 %1 = ashr <4 x i32> %a,
9999 %2 = icmp sgt <4 x i32> %b,
100100 %3 = sext <4 x i1> %2 to <4 x i32>
194194 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
195195
196196 define <2 x i64> @test_mm_cmpgt_epi64(<2 x i64> %a0, <2 x i64> %a1) {
197 ; X32-LABEL: test_mm_cmpgt_epi64:
198 ; X32: # %bb.0:
199 ; X32-NEXT: pcmpgtq %xmm1, %xmm0
200 ; X32-NEXT: retl
201 ;
202 ; X64-LABEL: test_mm_cmpgt_epi64:
203 ; X64: # %bb.0:
204 ; X64-NEXT: pcmpgtq %xmm1, %xmm0
205 ; X64-NEXT: retq
197 ; ALL-LABEL: test_mm_cmpgt_epi64:
198 ; ALL: # %bb.0:
199 ; ALL-NEXT: pcmpgtq %xmm1, %xmm0
200 ; ALL-NEXT: ret{{[l|q]}}
206201 %cmp = icmp sgt <2 x i64> %a0, %a1
207202 %res = sext <2 x i1> %cmp to <2 x i64>
208203 ret <2 x i64> %res
209204 }
210205
211206 define i32 @test_mm_cmpistra(<2 x i64> %a0, <2 x i64> %a1) {
212 ; X32-LABEL: test_mm_cmpistra:
213 ; X32: # %bb.0:
214 ; X32-NEXT: xorl %eax, %eax
215 ; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
216 ; X32-NEXT: seta %al
217 ; X32-NEXT: retl
218 ;
219 ; X64-LABEL: test_mm_cmpistra:
220 ; X64: # %bb.0:
221 ; X64-NEXT: xorl %eax, %eax
222 ; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
223 ; X64-NEXT: seta %al
224 ; X64-NEXT: retq
207 ; ALL-LABEL: test_mm_cmpistra:
208 ; ALL: # %bb.0:
209 ; ALL-NEXT: xorl %eax, %eax
210 ; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
211 ; ALL-NEXT: seta %al
212 ; ALL-NEXT: ret{{[l|q]}}
225213 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
226214 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
227215 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
230218 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
231219
232220 define i32 @test_mm_cmpistrc(<2 x i64> %a0, <2 x i64> %a1) {
233 ; X32-LABEL: test_mm_cmpistrc:
234 ; X32: # %bb.0:
235 ; X32-NEXT: xorl %eax, %eax
236 ; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
237 ; X32-NEXT: setb %al
238 ; X32-NEXT: retl
239 ;
240 ; X64-LABEL: test_mm_cmpistrc:
241 ; X64: # %bb.0:
242 ; X64-NEXT: xorl %eax, %eax
243 ; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
244 ; X64-NEXT: setb %al
245 ; X64-NEXT: retq
221 ; ALL-LABEL: test_mm_cmpistrc:
222 ; ALL: # %bb.0:
223 ; ALL-NEXT: xorl %eax, %eax
224 ; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
225 ; ALL-NEXT: setb %al
226 ; ALL-NEXT: ret{{[l|q]}}
246227 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
247228 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
248229 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
251232 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
252233
253234 define i32 @test_mm_cmpistri(<2 x i64> %a0, <2 x i64> %a1) {
254 ; X32-LABEL: test_mm_cmpistri:
255 ; X32: # %bb.0:
256 ; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
257 ; X32-NEXT: movl %ecx, %eax
258 ; X32-NEXT: retl
259 ;
260 ; X64-LABEL: test_mm_cmpistri:
261 ; X64: # %bb.0:
262 ; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
263 ; X64-NEXT: movl %ecx, %eax
264 ; X64-NEXT: retq
235 ; ALL-LABEL: test_mm_cmpistri:
236 ; ALL: # %bb.0:
237 ; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
238 ; ALL-NEXT: movl %ecx, %eax
239 ; ALL-NEXT: ret{{[l|q]}}
265240 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
266241 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
267242 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
270245 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
271246
272247 define <2 x i64> @test_mm_cmpistrm(<2 x i64> %a0, <2 x i64> %a1) {
273 ; X32-LABEL: test_mm_cmpistrm:
274 ; X32: # %bb.0:
275 ; X32-NEXT: pcmpistrm $7, %xmm1, %xmm0
276 ; X32-NEXT: retl
277 ;
278 ; X64-LABEL: test_mm_cmpistrm:
279 ; X64: # %bb.0:
280 ; X64-NEXT: pcmpistrm $7, %xmm1, %xmm0
281 ; X64-NEXT: retq
248 ; ALL-LABEL: test_mm_cmpistrm:
249 ; ALL: # %bb.0:
250 ; ALL-NEXT: pcmpistrm $7, %xmm1, %xmm0
251 ; ALL-NEXT: ret{{[l|q]}}
282252 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
283253 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
284254 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
288258 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
289259
290260 define i32 @test_mm_cmpistro(<2 x i64> %a0, <2 x i64> %a1) {
291 ; X32-LABEL: test_mm_cmpistro:
292 ; X32: # %bb.0:
293 ; X32-NEXT: xorl %eax, %eax
294 ; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
295 ; X32-NEXT: seto %al
296 ; X32-NEXT: retl
297 ;
298 ; X64-LABEL: test_mm_cmpistro:
299 ; X64: # %bb.0:
300 ; X64-NEXT: xorl %eax, %eax
301 ; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
302 ; X64-NEXT: seto %al
303 ; X64-NEXT: retq
261 ; ALL-LABEL: test_mm_cmpistro:
262 ; ALL: # %bb.0:
263 ; ALL-NEXT: xorl %eax, %eax
264 ; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
265 ; ALL-NEXT: seto %al
266 ; ALL-NEXT: ret{{[l|q]}}
304267 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
305268 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
306269 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
309272 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
310273
311274 define i32 @test_mm_cmpistrs(<2 x i64> %a0, <2 x i64> %a1) {
312 ; X32-LABEL: test_mm_cmpistrs:
313 ; X32: # %bb.0:
314 ; X32-NEXT: xorl %eax, %eax
315 ; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
316 ; X32-NEXT: sets %al
317 ; X32-NEXT: retl
318 ;
319 ; X64-LABEL: test_mm_cmpistrs:
320 ; X64: # %bb.0:
321 ; X64-NEXT: xorl %eax, %eax
322 ; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
323 ; X64-NEXT: sets %al
324 ; X64-NEXT: retq
275 ; ALL-LABEL: test_mm_cmpistrs:
276 ; ALL: # %bb.0:
277 ; ALL-NEXT: xorl %eax, %eax
278 ; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
279 ; ALL-NEXT: sets %al
280 ; ALL-NEXT: ret{{[l|q]}}
325281 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
326282 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
327283 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
330286 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
331287
332288 define i32 @test_mm_cmpistrz(<2 x i64> %a0, <2 x i64> %a1) {
333 ; X32-LABEL: test_mm_cmpistrz:
334 ; X32: # %bb.0:
335 ; X32-NEXT: xorl %eax, %eax
336 ; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
337 ; X32-NEXT: sete %al
338 ; X32-NEXT: retl
339 ;
340 ; X64-LABEL: test_mm_cmpistrz:
341 ; X64: # %bb.0:
342 ; X64-NEXT: xorl %eax, %eax
343 ; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
344 ; X64-NEXT: sete %al
345 ; X64-NEXT: retq
289 ; ALL-LABEL: test_mm_cmpistrz:
290 ; ALL: # %bb.0:
291 ; ALL-NEXT: xorl %eax, %eax
292 ; ALL-NEXT: pcmpistri $7, %xmm1, %xmm0
293 ; ALL-NEXT: sete %al
294 ; ALL-NEXT: ret{{[l|q]}}
346295 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
347296 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
348297 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %arg0, <16 x i8> %arg1, i8 7)
106106 asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
107107 # Generically match a LCP symbol.
108108 asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
109 if getattr(args, 'x86_extra_scrub', False):
109 if getattr(args, 'extra_scrub', False):
110110 # Avoid generating different checks for 32- and 64-bit because of 'retl' vs 'retq'.
111111 asm = SCRUB_X86_RET_RE.sub(r'ret{{[l|q]}}', asm)
112112 # Strip kill operands inserted into the asm.
22 import string
33 import subprocess
44 import sys
5 import copy
56
67 if sys.version_info[0] > 2:
78 class string:
7980 body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
8081 return body
8182
83 def do_scrub(body, scrubber, scrubber_args, extra):
84 if scrubber_args:
85 local_args = copy.deepcopy(scrubber_args)
86 local_args[0].extra_scrub = extra
87 return scrubber(body, *local_args)
88 return scrubber(body, *scrubber_args)
89
8290 # Build up a dictionary of all the function bodies.
91 class function_body(object):
92 def __init__(self, string, extra):
93 self.scrub = string
94 self.extrascrub = extra
95 def __str__(self):
96 return self.scrub
97
8398 def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose):
8499 for m in function_re.finditer(raw_tool_output):
85100 if not m:
86101 continue
87102 func = m.group('func')
88 scrubbed_body = scrubber(m.group('body'), *scrubber_args)
103 body = m.group('body')
104 scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
105 scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
89106 if m.groupdict().has_key('analysis'):
90107 analysis = m.group('analysis')
91108 if analysis.lower() != 'cost model analysis':
98115 for l in scrubbed_body.splitlines():
99116 print(' ' + l, file=sys.stderr)
100117 for prefix in prefixes:
101 if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
102 if prefix == prefixes[-1]:
103 print('WARNING: Found conflicting asm under the '
104 'same prefix: %r!' % (prefix,), file=sys.stderr)
118 if func in func_dict[prefix] and str(func_dict[prefix][func]) != scrubbed_body:
119 if func_dict[prefix][func] and func_dict[prefix][func].extrascrub == scrubbed_extra:
120 func_dict[prefix][func].scrub = scrubbed_extra
121 continue
105122 else:
106 func_dict[prefix][func] = None
107 continue
108
109 func_dict[prefix][func] = scrubbed_body
123 if prefix == prefixes[-1]:
124 print('WARNING: Found conflicting asm under the '
125 'same prefix: %r!' % (prefix,), file=sys.stderr)
126 else:
127 func_dict[prefix][func] = None
128 continue
129
130 func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra)
110131
111132 ##### Generator of LLVM IR CHECK lines
112133
187208
188209 printed_prefixes.append(checkprefix)
189210 output_lines.append(check_label_format % (checkprefix, func_name))
190 func_body = func_dict[checkprefix][func_name].splitlines()
211 func_body = str(func_dict[checkprefix][func_name]).splitlines()
191212
192213 # For ASM output, just emit the check lines.
193214 if is_asm == True:
2727 parser.add_argument(
2828 '--function', help='The function in the test file to update')
2929 parser.add_argument(
30 '--x86_extra_scrub', action='store_true',
31 help='Use more regex for x86 matching to reduce diffs between various subtargets')
30 '--extra_scrub', action='store_true',
31 help='Always use additional regex to further reduce diffs between various subtargets')
3232 parser.add_argument('tests', nargs='+')
3333 args = parser.parse_args()
3434