llvm.org GIT mirror llvm / 1dcb078
[X86][AVX512] Converted the MOVDDUP/MOVSLDUP/MOVSHDUP masked intrinsics to generic IR git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274443 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
3 changed file(s) with 156 addition(s) and 72 deletion(s). Raw diff Collapse all Expand all
15251525 GCCBuiltin<"__builtin_ia32_shufps512_mask">,
15261526 Intrinsic<[llvm_v16f32_ty],
15271527 [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
1528 [IntrNoMem]>;
1529
1530 def int_x86_avx512_mask_movshdup_128 :
1531 GCCBuiltin<"__builtin_ia32_movshdup128_mask">,
1532 Intrinsic<[llvm_v4f32_ty],
1533 [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
1534 [IntrNoMem]>;
1535
1536 def int_x86_avx512_mask_movshdup_256 :
1537 GCCBuiltin<"__builtin_ia32_movshdup256_mask">,
1538 Intrinsic<[llvm_v8f32_ty],
1539 [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
1540 [IntrNoMem]>;
1541
1542 def int_x86_avx512_mask_movshdup_512 :
1543 GCCBuiltin<"__builtin_ia32_movshdup512_mask">,
1544 Intrinsic<[llvm_v16f32_ty],
1545 [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
1546 [IntrNoMem]>;
1547
1548 def int_x86_avx512_mask_movsldup_128 :
1549 GCCBuiltin<"__builtin_ia32_movsldup128_mask">,
1550 Intrinsic<[llvm_v4f32_ty],
1551 [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
1552 [IntrNoMem]>;
1553
1554 def int_x86_avx512_mask_movsldup_256 :
1555 GCCBuiltin<"__builtin_ia32_movsldup256_mask">,
1556 Intrinsic<[llvm_v8f32_ty],
1557 [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
1558 [IntrNoMem]>;
1559
1560 def int_x86_avx512_mask_movsldup_512 :
1561 GCCBuiltin<"__builtin_ia32_movsldup512_mask">,
1562 Intrinsic<[llvm_v16f32_ty],
1563 [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
1564 [IntrNoMem]>;
1565
1566 def int_x86_avx512_mask_movddup_128 :
1567 GCCBuiltin<"__builtin_ia32_movddup128_mask">,
1568 Intrinsic<[llvm_v2f64_ty],
1569 [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
1570 [IntrNoMem]>;
1571
1572 def int_x86_avx512_mask_movddup_256 :
1573 GCCBuiltin<"__builtin_ia32_movddup256_mask">,
1574 Intrinsic<[llvm_v4f64_ty],
1575 [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
1576 [IntrNoMem]>;
1577
1578 def int_x86_avx512_mask_movddup_512 :
1579 GCCBuiltin<"__builtin_ia32_movddup512_mask">,
1580 Intrinsic<[llvm_v8f64_ty],
1581 [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
15821528 [IntrNoMem]>;
15831529 }
15841530
817817 X86ISD::FMIN, X86ISD::FMIN_RND),
818818 X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
819819 X86ISD::FMIN, X86ISD::FMIN_RND),
820 X86_INTRINSIC_DATA(avx512_mask_movddup_128, INTR_TYPE_1OP_MASK,
821 X86ISD::MOVDDUP, 0),
822 X86_INTRINSIC_DATA(avx512_mask_movddup_256, INTR_TYPE_1OP_MASK,
823 X86ISD::MOVDDUP, 0),
824 X86_INTRINSIC_DATA(avx512_mask_movddup_512, INTR_TYPE_1OP_MASK,
825 X86ISD::MOVDDUP, 0),
826820 X86_INTRINSIC_DATA(avx512_mask_move_sd, INTR_TYPE_SCALAR_MASK,
827821 X86ISD::MOVSD, 0),
828822 X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK,
829823 X86ISD::MOVSS, 0),
830 X86_INTRINSIC_DATA(avx512_mask_movshdup_128, INTR_TYPE_1OP_MASK,
831 X86ISD::MOVSHDUP, 0),
832 X86_INTRINSIC_DATA(avx512_mask_movshdup_256, INTR_TYPE_1OP_MASK,
833 X86ISD::MOVSHDUP, 0),
834 X86_INTRINSIC_DATA(avx512_mask_movshdup_512, INTR_TYPE_1OP_MASK,
835 X86ISD::MOVSHDUP, 0),
836 X86_INTRINSIC_DATA(avx512_mask_movsldup_128, INTR_TYPE_1OP_MASK,
837 X86ISD::MOVSLDUP, 0),
838 X86_INTRINSIC_DATA(avx512_mask_movsldup_256, INTR_TYPE_1OP_MASK,
839 X86ISD::MOVSLDUP, 0),
840 X86_INTRINSIC_DATA(avx512_mask_movsldup_512, INTR_TYPE_1OP_MASK,
841 X86ISD::MOVSLDUP, 0),
842824 X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
843825 X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
844826 X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
22 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=X64
33
44 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c
5
6 define <8 x double> @test_mm512_movddup_pd(<8 x double> %a0) {
7 ; X32-LABEL: test_mm512_movddup_pd:
8 ; X32: # BB#0:
9 ; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
10 ; X32-NEXT: retl
11 ;
12 ; X64-LABEL: test_mm512_movddup_pd:
13 ; X64: # BB#0:
14 ; X64-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
15 ; X64-NEXT: retq
16 %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32>
17 ret <8 x double> %res
18 }
19
20 define <8 x double> @test_mm512_mask_movddup_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
21 ; X32-LABEL: test_mm512_mask_movddup_pd:
22 ; X32: # BB#0:
23 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
24 ; X32-NEXT: kmovw %eax, %k1
25 ; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm1[0,0,2,2,4,4,6,6]
26 ; X32-NEXT: retl
27 ;
28 ; X64-LABEL: test_mm512_mask_movddup_pd:
29 ; X64: # BB#0:
30 ; X64-NEXT: kmovw %edi, %k1
31 ; X64-NEXT: vmovddup {{.*#+}} zmm0 = zmm1[0,0,2,2,4,4,6,6]
32 ; X64-NEXT: retq
33 %arg1 = bitcast i8 %a1 to <8 x i1>
34 %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32>
35 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
36 ret <8 x double> %res1
37 }
38
39 define <8 x double> @test_mm512_maskz_movddup_pd(i8 %a0, <8 x double> %a1) {
40 ; X32-LABEL: test_mm512_maskz_movddup_pd:
41 ; X32: # BB#0:
42 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
43 ; X32-NEXT: kmovw %eax, %k1
44 ; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
45 ; X32-NEXT: retl
46 ;
47 ; X64-LABEL: test_mm512_maskz_movddup_pd:
48 ; X64: # BB#0:
49 ; X64-NEXT: kmovw %edi, %k1
50 ; X64-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
51 ; X64-NEXT: retq
52 %arg0 = bitcast i8 %a0 to <8 x i1>
53 %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32>
54 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
55 ret <8 x double> %res1
56 }
57
58 define <16 x float> @test_mm512_movehdup_ps(<16 x float> %a0) {
59 ; X32-LABEL: test_mm512_movehdup_ps:
60 ; X32: # BB#0:
61 ; X32-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
62 ; X32-NEXT: retl
63 ;
64 ; X64-LABEL: test_mm512_movehdup_ps:
65 ; X64: # BB#0:
66 ; X64-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
67 ; X64-NEXT: retq
68 %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32>
69 ret <16 x float> %res
70 }
71
72 define <16 x float> @test_mm512_mask_movehdup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
73 ; X32-LABEL: test_mm512_mask_movehdup_ps:
74 ; X32: # BB#0:
75 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
76 ; X32-NEXT: kmovw %eax, %k1
77 ; X32-NEXT: vmovshdup {{.*#+}} zmm0 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
78 ; X32-NEXT: retl
79 ;
80 ; X64-LABEL: test_mm512_mask_movehdup_ps:
81 ; X64: # BB#0:
82 ; X64-NEXT: kmovw %edi, %k1
83 ; X64-NEXT: vmovshdup {{.*#+}} zmm0 = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
84 ; X64-NEXT: retq
85 %arg1 = bitcast i16 %a1 to <16 x i1>
86 %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32>
87 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
88 ret <16 x float> %res1
89 }
90
91 define <16 x float> @test_mm512_maskz_movehdup_ps(i16 %a0, <16 x float> %a1) {
92 ; X32-LABEL: test_mm512_maskz_movehdup_ps:
93 ; X32: # BB#0:
94 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
95 ; X32-NEXT: kmovw %eax, %k1
96 ; X32-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
97 ; X32-NEXT: retl
98 ;
99 ; X64-LABEL: test_mm512_maskz_movehdup_ps:
100 ; X64: # BB#0:
101 ; X64-NEXT: kmovw %edi, %k1
102 ; X64-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
103 ; X64-NEXT: retq
104 %arg0 = bitcast i16 %a0 to <16 x i1>
105 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32>
106 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
107 ret <16 x float> %res1
108 }
109
110 define <16 x float> @test_mm512_moveldup_ps(<16 x float> %a0) {
111 ; X32-LABEL: test_mm512_moveldup_ps:
112 ; X32: # BB#0:
113 ; X32-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
114 ; X32-NEXT: retl
115 ;
116 ; X64-LABEL: test_mm512_moveldup_ps:
117 ; X64: # BB#0:
118 ; X64-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
119 ; X64-NEXT: retq
120 %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32>
121 ret <16 x float> %res
122 }
123
124 define <16 x float> @test_mm512_mask_moveldup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
125 ; X32-LABEL: test_mm512_mask_moveldup_ps:
126 ; X32: # BB#0:
127 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
128 ; X32-NEXT: kmovw %eax, %k1
129 ; X32-NEXT: vmovsldup {{.*#+}} zmm0 = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
130 ; X32-NEXT: retl
131 ;
132 ; X64-LABEL: test_mm512_mask_moveldup_ps:
133 ; X64: # BB#0:
134 ; X64-NEXT: kmovw %edi, %k1
135 ; X64-NEXT: vmovsldup {{.*#+}} zmm0 = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
136 ; X64-NEXT: retq
137 %arg1 = bitcast i16 %a1 to <16 x i1>
138 %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32>
139 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
140 ret <16 x float> %res1
141 }
142
143 define <16 x float> @test_mm512_maskz_moveldup_ps(i16 %a0, <16 x float> %a1) {
144 ; X32-LABEL: test_mm512_maskz_moveldup_ps:
145 ; X32: # BB#0:
146 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
147 ; X32-NEXT: kmovw %eax, %k1
148 ; X32-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
149 ; X32-NEXT: retl
150 ;
151 ; X64-LABEL: test_mm512_maskz_moveldup_ps:
152 ; X64: # BB#0:
153 ; X64-NEXT: kmovw %edi, %k1
154 ; X64-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
155 ; X64-NEXT: retq
156 %arg0 = bitcast i16 %a0 to <16 x i1>
157 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32>
158 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
159 ret <16 x float> %res1
160 }
5161
6162 define <8 x i64> @test_mm512_unpackhi_epi32(<8 x i64> %a0, <8 x i64> %a1) {
7163 ; X32-LABEL: test_mm512_unpackhi_epi32: