llvm.org GIT mirror llvm / eb09365
[x86] Fix AVX maskload/store intrinsic prototypes. The mask value type for maskload/maskstore GCC builtins is never a vector of packed floats/doubles. This patch fixes the following issues: 1. The mask argument for builtin_ia32_maskloadpd and builtin_ia32_maskstorepd should be of type llvm_v2i64_ty and not llvm_v2f64_ty. 2. The mask argument for builtin_ia32_maskloadpd256 and builtin_ia32_maskstorepd256 should be of type llvm_v4i64_ty and not llvm_v4f64_ty. 3. The mask argument for builtin_ia32_maskloadps and builtin_ia32_maskstoreps should be of type llvm_v4i32_ty and not llvm_v4f32_ty. 4. The mask argument for builtin_ia32_maskloadps256 and builtin_ia32_maskstoreps256 should be of type llvm_v8i32_ty and not llvm_v8f32_ty. Differential Revision: http://reviews.llvm.org/D13776 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250817 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 4 years ago
4 changed file(s) with 48 addition(s) and 50 deletion(s). Raw diff Collapse all Expand all
17591759 // Conditional load ops
17601760 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
17611761 def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">,
1762 Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty],
1762 Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2i64_ty],
17631763 [IntrReadArgMem]>;
17641764 def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">,
1765 Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty],
1765 Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4i32_ty],
17661766 [IntrReadArgMem]>;
17671767 def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">,
1768 Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty],
1768 Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4i64_ty],
17691769 [IntrReadArgMem]>;
17701770 def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
1771 Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty],
1771 Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8i32_ty],
17721772 [IntrReadArgMem]>;
17731773 def int_x86_avx512_mask_loadu_ps_512 : GCCBuiltin<"__builtin_ia32_loadups512_mask">,
17741774 Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
17881788 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
17891789 def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">,
17901790 Intrinsic<[], [llvm_ptr_ty,
1791 llvm_v2f64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>;
1791 llvm_v2i64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>;
17921792 def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">,
17931793 Intrinsic<[], [llvm_ptr_ty,
1794 llvm_v4f32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
1794 llvm_v4i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
17951795 def int_x86_avx_maskstore_pd_256 :
17961796 GCCBuiltin<"__builtin_ia32_maskstorepd256">,
17971797 Intrinsic<[], [llvm_ptr_ty,
1798 llvm_v4f64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
1798 llvm_v4i64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
17991799 def int_x86_avx_maskstore_ps_256 :
18001800 GCCBuiltin<"__builtin_ia32_maskstoreps256">,
18011801 Intrinsic<[], [llvm_ptr_ty,
1802 llvm_v8f32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
1802 llvm_v8i32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
18031803 def int_x86_avx512_mask_storeu_ps_512 :
18041804 GCCBuiltin<"__builtin_ia32_storeups512_mask">,
18051805 Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
25352535 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
25362536
25372537
2538 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
2538 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {
25392539 ; CHECK-LABEL: test_x86_avx_maskload_pd:
25402540 ; CHECK: ## BB#0:
25412541 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
25422542 ; CHECK-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0
25432543 ; CHECK-NEXT: retl
2544 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
2544 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
25452545 ret <2 x double> %res
25462546 }
2547 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
2548
2549
2550 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
2547 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
2548
2549
2550 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {
25512551 ; CHECK-LABEL: test_x86_avx_maskload_pd_256:
25522552 ; CHECK: ## BB#0:
25532553 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
25542554 ; CHECK-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0
25552555 ; CHECK-NEXT: retl
2556 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
2556 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
25572557 ret <4 x double> %res
25582558 }
2559 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
2560
2561
2562 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
2559 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
2560
2561
2562 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {
25632563 ; CHECK-LABEL: test_x86_avx_maskload_ps:
25642564 ; CHECK: ## BB#0:
25652565 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
25662566 ; CHECK-NEXT: vmaskmovps (%eax), %xmm0, %xmm0
25672567 ; CHECK-NEXT: retl
2568 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
2568 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
25692569 ret <4 x float> %res
25702570 }
2571 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
2572
2573
2574 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
2571 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
2572
2573
2574 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {
25752575 ; CHECK-LABEL: test_x86_avx_maskload_ps_256:
25762576 ; CHECK: ## BB#0:
25772577 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
25782578 ; CHECK-NEXT: vmaskmovps (%eax), %ymm0, %ymm0
25792579 ; CHECK-NEXT: retl
2580 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
2580 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
25812581 ret <8 x float> %res
25822582 }
2583 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
2584
2585
2586 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
2583 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
2584
2585
2586 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) {
25872587 ; CHECK-LABEL: test_x86_avx_maskstore_pd:
25882588 ; CHECK: ## BB#0:
25892589 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
25902590 ; CHECK-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax)
25912591 ; CHECK-NEXT: retl
2592 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
2592 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
25932593 ret void
25942594 }
2595 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
2596
2597
2598 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
2595 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
2596
2597
2598 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) {
25992599 ; CHECK-LABEL: test_x86_avx_maskstore_pd_256:
26002600 ; CHECK: ## BB#0:
26012601 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
26022602 ; CHECK-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax)
26032603 ; CHECK-NEXT: vzeroupper
26042604 ; CHECK-NEXT: retl
2605 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
2605 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
26062606 ret void
26072607 }
2608 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
2609
2610
2611 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
2608 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
2609
2610
2611 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) {
26122612 ; CHECK-LABEL: test_x86_avx_maskstore_ps:
26132613 ; CHECK: ## BB#0:
26142614 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
26152615 ; CHECK-NEXT: vmaskmovps %xmm1, %xmm0, (%eax)
26162616 ; CHECK-NEXT: retl
2617 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
2617 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
26182618 ret void
26192619 }
2620 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
2621
2622
2623 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
2620 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
2621
2622
2623 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) {
26242624 ; CHECK-LABEL: test_x86_avx_maskstore_ps_256:
26252625 ; CHECK: ## BB#0:
26262626 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
26272627 ; CHECK-NEXT: vmaskmovps %ymm1, %ymm0, (%eax)
26282628 ; CHECK-NEXT: vzeroupper
26292629 ; CHECK-NEXT: retl
2630 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
2630 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
26312631 ret void
26322632 }
2633 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
2633 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
26342634
26352635
26362636 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
8787 ret void
8888 }
8989
90 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
90 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
9191
9292 ; CHECK_O0: _f_f
9393 ; CHECK-O0: vmovss LCPI
104104 br i1 undef, label %cif_mixed_test_all, label %cif_mixed_test_any_check
105105
106106 cif_mixed_test_all: ; preds = %cif_mask_mixed
107 call void @llvm.x86.avx.maskstore.ps.256(i8* undef, <8 x float> 0>, <8 x float> undef) nounwind
107 call void @llvm.x86.avx.maskstore.ps.256(i8* undef, <8 x i32> 0>, <8 x float> undef) nounwind
108108 unreachable
109109
110110 cif_mixed_test_any_check: ; preds = %cif_mask_mixed
4141 }
4242
4343 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
44 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
45 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
4644 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone