llvm.org GIT mirror llvm / a0486c6
[X86] Remove and autoupgrade vpconflict intrinsics that take a mask and passthru argument. We have unmasked versions as of r352172 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352270 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 1 year, 4 months ago
7 changed file(s) with 379 addition(s) and 98 deletion(s). Raw diff Collapse all Expand all
40134013 def int_x86_avx512_conflict_q_512 :
40144014 GCCBuiltin<"__builtin_ia32_vpconflictdi_512">,
40154015 Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], [IntrNoMem]>;
4016
4017 def int_x86_avx512_mask_conflict_d_128 : // FIXME: remove
4018 Intrinsic<[llvm_v4i32_ty],
4019 [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
4020 [IntrNoMem]>;
4021 def int_x86_avx512_mask_conflict_d_256 : // FIXME: remove
4022 Intrinsic<[llvm_v8i32_ty],
4023 [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
4024 [IntrNoMem]>;
4025 def int_x86_avx512_mask_conflict_d_512 : // FIXME: remove
4026 Intrinsic<[llvm_v16i32_ty],
4027 [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
4028 [IntrNoMem]>;
4029
4030 def int_x86_avx512_mask_conflict_q_128 : // FIXME: remove
4031 Intrinsic<[llvm_v2i64_ty],
4032 [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
4033 [IntrNoMem]>;
4034 def int_x86_avx512_mask_conflict_q_256 : // FIXME: remove
4035 Intrinsic<[llvm_v4i64_ty],
4036 [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
4037 [IntrNoMem]>;
4038 def int_x86_avx512_mask_conflict_q_512 : // FIXME: remove
4039 Intrinsic<[llvm_v8i64_ty],
4040 [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
4041 [IntrNoMem]>;
40424016 }
40434017
40444018 // Compares
298298 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
299299 Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
300300 Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
301 Name.startswith("avx512.mask.conflict.") || // Added in 9.0
301302 Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
302303 Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
303304 Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
15021503 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
15031504 else
15041505 llvm_unreachable("Unexpected intrinsic");
1506 } else if (Name.startswith("conflict.")) {
1507 if (Name[9] == 'd' && VecWidth == 128)
1508 IID = Intrinsic::x86_avx512_conflict_d_128;
1509 else if (Name[9] == 'd' && VecWidth == 256)
1510 IID = Intrinsic::x86_avx512_conflict_d_256;
1511 else if (Name[9] == 'd' && VecWidth == 512)
1512 IID = Intrinsic::x86_avx512_conflict_d_512;
1513 else if (Name[9] == 'q' && VecWidth == 128)
1514 IID = Intrinsic::x86_avx512_conflict_q_128;
1515 else if (Name[9] == 'q' && VecWidth == 256)
1516 IID = Intrinsic::x86_avx512_conflict_q_256;
1517 else if (Name[9] == 'q' && VecWidth == 512)
1518 IID = Intrinsic::x86_avx512_conflict_q_512;
1519 else
1520 llvm_unreachable("Unexpected intrinsic");
15051521 } else
15061522 return false;
15071523
500500 X86ISD::COMPRESS, 0),
501501 X86_INTRINSIC_DATA(avx512_mask_compress_w_512, COMPRESS_EXPAND_IN_REG,
502502 X86ISD::COMPRESS, 0),
503 X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK,
504 X86ISD::CONFLICT, 0),
505 X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK,
506 X86ISD::CONFLICT, 0),
507 X86_INTRINSIC_DATA(avx512_mask_conflict_d_512, INTR_TYPE_1OP_MASK,
508 X86ISD::CONFLICT, 0),
509 X86_INTRINSIC_DATA(avx512_mask_conflict_q_128, INTR_TYPE_1OP_MASK,
510 X86ISD::CONFLICT, 0),
511 X86_INTRINSIC_DATA(avx512_mask_conflict_q_256, INTR_TYPE_1OP_MASK,
512 X86ISD::CONFLICT, 0),
513 X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
514 X86ISD::CONFLICT, 0),
515503 X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2DQ_MASK,
516504 X86ISD::CVTP2SI, X86ISD::MCVTP2SI),
517505 X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
11 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd | FileCheck %s --check-prefixes=CHECK,X86
22 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd | FileCheck %s --check-prefixes=CHECK,X64
3
4 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
5
6 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
7 ; CHECK-LABEL: test_conflict_d:
8 ; CHECK: # %bb.0:
9 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0
10 ; CHECK-NEXT: ret{{[l|q]}}
11 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> undef, i16 -1)
12 ret <16 x i32> %res
13 }
14
15 define <16 x i32> @test_mask_conflict_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
16 ; X86-LABEL: test_mask_conflict_d:
17 ; X86: # %bb.0:
18 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
19 ; X86-NEXT: vpconflictd %zmm0, %zmm1 {%k1}
20 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0
21 ; X86-NEXT: retl
22 ;
23 ; X64-LABEL: test_mask_conflict_d:
24 ; X64: # %bb.0:
25 ; X64-NEXT: kmovw %edi, %k1
26 ; X64-NEXT: vpconflictd %zmm0, %zmm1 {%k1}
27 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0
28 ; X64-NEXT: retq
29 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
30 ret <16 x i32> %res
31 }
32
33 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
34 ; X86-LABEL: test_maskz_conflict_d:
35 ; X86: # %bb.0:
36 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
37 ; X86-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
38 ; X86-NEXT: retl
39 ;
40 ; X64-LABEL: test_maskz_conflict_d:
41 ; X64: # %bb.0:
42 ; X64-NEXT: kmovw %edi, %k1
43 ; X64-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
44 ; X64-NEXT: retq
45 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
46 ret <16 x i32> %res
47 }
48
49 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
50
51 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
52 ; CHECK-LABEL: test_conflict_q:
53 ; CHECK: # %bb.0:
54 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0
55 ; CHECK-NEXT: ret{{[l|q]}}
56 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> undef, i8 -1)
57 ret <8 x i64> %res
58 }
59
60 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
61 ; X86-LABEL: test_mask_conflict_q:
62 ; X86: # %bb.0:
63 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
64 ; X86-NEXT: kmovw %eax, %k1
65 ; X86-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
66 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0
67 ; X86-NEXT: retl
68 ;
69 ; X64-LABEL: test_mask_conflict_q:
70 ; X64: # %bb.0:
71 ; X64-NEXT: kmovw %edi, %k1
72 ; X64-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
73 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0
74 ; X64-NEXT: retq
75 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
76 ret <8 x i64> %res
77 }
78
79 define <8 x i64> @test_maskz_conflict_q(<8 x i64> %a, i8 %mask) {
80 ; X86-LABEL: test_maskz_conflict_q:
81 ; X86: # %bb.0:
82 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
83 ; X86-NEXT: kmovw %eax, %k1
84 ; X86-NEXT: vpconflictq %zmm0, %zmm0 {%k1} {z}
85 ; X86-NEXT: retl
86 ;
87 ; X64-LABEL: test_maskz_conflict_q:
88 ; X64: # %bb.0:
89 ; X64-NEXT: kmovw %edi, %k1
90 ; X64-NEXT: vpconflictq %zmm0, %zmm0 {%k1} {z}
91 ; X64-NEXT: retq
92 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 %mask)
93 ret <8 x i64> %res
94 }
395
496 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
597 ; CHECK-LABEL: test_lzcnt_d:
11 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd | FileCheck %s --check-prefixes=CHECK,X86
22 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd | FileCheck %s --check-prefixes=CHECK,X64
33
4 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
5
6 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
7 ; CHECK-LABEL: test_conflict_q:
4 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
5 ; CHECK-LABEL: test_conflict_d:
86 ; CHECK: # %bb.0:
9 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0
7 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0
108 ; CHECK-NEXT: ret{{[l|q]}}
11 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
12 ret <8 x i64> %res
9 %1 = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a)
10 ret <16 x i32> %1
1311 }
1412
15 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
13 define <16 x i32> @test_mask_conflict_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
14 ; X86-LABEL: test_mask_conflict_d:
15 ; X86: # %bb.0:
16 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
17 ; X86-NEXT: vpconflictd %zmm0, %zmm1 {%k1}
18 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0
19 ; X86-NEXT: retl
20 ;
21 ; X64-LABEL: test_mask_conflict_d:
22 ; X64: # %bb.0:
23 ; X64-NEXT: kmovw %edi, %k1
24 ; X64-NEXT: vpconflictd %zmm0, %zmm1 {%k1}
25 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0
26 ; X64-NEXT: retq
27 %1 = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a)
28 %2 = bitcast i16 %mask to <16 x i1>
29 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %b
30 ret <16 x i32> %3
31 }
1632
1733 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
1834 ; X86-LABEL: test_maskz_conflict_d:
2642 ; X64-NEXT: kmovw %edi, %k1
2743 ; X64-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
2844 ; X64-NEXT: retq
29 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
30 ret <16 x i32> %res
45 %1 = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a)
46 %2 = bitcast i16 %mask to <16 x i1>
47 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
48 ret <16 x i32> %3
49 }
50
51 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
52 ; CHECK-LABEL: test_conflict_q:
53 ; CHECK: # %bb.0:
54 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0
55 ; CHECK-NEXT: ret{{[l|q]}}
56 %1 = call <8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %a)
57 ret <8 x i64> %1
3158 }
3259
3360 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
4572 ; X64-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
4673 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0
4774 ; X64-NEXT: retq
48 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
49 ret <8 x i64> %res
75 %1 = call <8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %a)
76 %2 = bitcast i8 %mask to <8 x i1>
77 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %b
78 ret <8 x i64> %3
79 }
80
81 define <8 x i64> @test_maskz_conflict_q(<8 x i64> %a, i8 %mask) {
82 ; X86-LABEL: test_maskz_conflict_q:
83 ; X86: # %bb.0:
84 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
85 ; X86-NEXT: kmovw %eax, %k1
86 ; X86-NEXT: vpconflictq %zmm0, %zmm0 {%k1} {z}
87 ; X86-NEXT: retl
88 ;
89 ; X64-LABEL: test_maskz_conflict_q:
90 ; X64: # %bb.0:
91 ; X64-NEXT: kmovw %edi, %k1
92 ; X64-NEXT: vpconflictq %zmm0, %zmm0 {%k1} {z}
93 ; X64-NEXT: retq
94 %1 = call <8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %a)
95 %2 = bitcast i8 %mask to <8 x i1>
96 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
97 ret <8 x i64> %3
5098 }
5199
52100 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
109157 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %b
110158 ret <8 x i64> %3
111159 }
160
161 declare <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32>)
162 declare <8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64>)
179179 }
180180 declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)
181181
182 declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8)
183
184 define <4 x i32> @test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
185 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
186 ; X86: # %bb.0:
187 ; X86-NEXT: vpconflictd %xmm0, %xmm2
188 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
189 ; X86-NEXT: kmovw %eax, %k1
190 ; X86-NEXT: vpconflictd %xmm0, %xmm1 {%k1}
191 ; X86-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z}
192 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0
193 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0
194 ; X86-NEXT: retl
195 ;
196 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
197 ; X64: # %bb.0:
198 ; X64-NEXT: vpconflictd %xmm0, %xmm2
199 ; X64-NEXT: kmovw %edi, %k1
200 ; X64-NEXT: vpconflictd %xmm0, %xmm1 {%k1}
201 ; X64-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z}
202 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0
203 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0
204 ; X64-NEXT: retq
205 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
206 %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
207 %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
208 %res2 = add <4 x i32> %res, %res1
209 %res4 = add <4 x i32> %res2, %res3
210 ret <4 x i32> %res4
211 }
212
213 declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8)
214
215 define <8 x i32> @test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
216 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
217 ; X86: # %bb.0:
218 ; X86-NEXT: vpconflictd %ymm0, %ymm2
219 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
220 ; X86-NEXT: kmovw %eax, %k1
221 ; X86-NEXT: vpconflictd %ymm0, %ymm1 {%k1}
222 ; X86-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z}
223 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0
224 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0
225 ; X86-NEXT: retl
226 ;
227 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
228 ; X64: # %bb.0:
229 ; X64-NEXT: vpconflictd %ymm0, %ymm2
230 ; X64-NEXT: kmovw %edi, %k1
231 ; X64-NEXT: vpconflictd %ymm0, %ymm1 {%k1}
232 ; X64-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z}
233 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0
234 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0
235 ; X64-NEXT: retq
236 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
237 %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
238 %res2 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> zeroinitializer, i8 %x2)
239 %res3 = add <8 x i32> %res, %res1
240 %res4 = add <8 x i32> %res2, %res3
241 ret <8 x i32> %res4
242 }
243
244 declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8)
245
246 define <2 x i64> @test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
247 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
248 ; X86: # %bb.0:
249 ; X86-NEXT: vpconflictq %xmm0, %xmm2
250 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
251 ; X86-NEXT: kmovw %eax, %k1
252 ; X86-NEXT: vpconflictq %xmm0, %xmm1 {%k1}
253 ; X86-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z}
254 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0
255 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0
256 ; X86-NEXT: retl
257 ;
258 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
259 ; X64: # %bb.0:
260 ; X64-NEXT: vpconflictq %xmm0, %xmm2
261 ; X64-NEXT: kmovw %edi, %k1
262 ; X64-NEXT: vpconflictq %xmm0, %xmm1 {%k1}
263 ; X64-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z}
264 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0
265 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0
266 ; X64-NEXT: retq
267 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
268 %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
269 %res2 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> zeroinitializer, i8 %x2)
270 %res3 = add <2 x i64> %res, %res1
271 %res4 = add <2 x i64> %res2, %res3
272 ret <2 x i64> %res4
273 }
274
275 declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8)
276
277 define <4 x i64> @test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
278 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
279 ; X86: # %bb.0:
280 ; X86-NEXT: vpconflictq %ymm0, %ymm2
281 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
282 ; X86-NEXT: kmovw %eax, %k1
283 ; X86-NEXT: vpconflictq %ymm0, %ymm1 {%k1}
284 ; X86-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z}
285 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0
286 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0
287 ; X86-NEXT: retl
288 ;
289 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
290 ; X64: # %bb.0:
291 ; X64-NEXT: vpconflictq %ymm0, %ymm2
292 ; X64-NEXT: kmovw %edi, %k1
293 ; X64-NEXT: vpconflictq %ymm0, %ymm1 {%k1}
294 ; X64-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z}
295 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0
296 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0
297 ; X64-NEXT: retq
298 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
299 %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
300 %res2 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> zeroinitializer, i8 %x2)
301 %res3 = add <4 x i64> %res, %res1
302 %res4 = add <4 x i64> %res2, %res3
303 ret <4 x i64> %res4
304 }
305
117117 }
118118 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) #0
119119
120 declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8)
121
122 define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
120 define <4 x i32> @test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
123121 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
124122 ; X86: # %bb.0:
123 ; X86-NEXT: vpconflictd %xmm0, %xmm2
125124 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
126125 ; X86-NEXT: kmovw %eax, %k1
127126 ; X86-NEXT: vpconflictd %xmm0, %xmm1 {%k1}
128 ; X86-NEXT: vpconflictd %xmm0, %xmm2 {%k1} {z}
129 ; X86-NEXT: vpconflictd %xmm0, %xmm0
130 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0
127 ; X86-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z}
128 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0
131129 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0
132130 ; X86-NEXT: retl
133131 ;
134132 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
135133 ; X64: # %bb.0:
136 ; X64-NEXT: kmovw %edi, %k1
137 ; X64-NEXT: vpconflictd %xmm0, %xmm2 {%k1} {z}
134 ; X64-NEXT: vpconflictd %xmm0, %xmm2
135 ; X64-NEXT: kmovw %edi, %k1
138136 ; X64-NEXT: vpconflictd %xmm0, %xmm1 {%k1}
139 ; X64-NEXT: vpconflictd %xmm0, %xmm0
140 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0
137 ; X64-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z}
138 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0
141139 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0
142140 ; X64-NEXT: retq
143 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
144 %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
145 %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
146 %res2 = add <4 x i32> %res, %res1
147 %res4 = add <4 x i32> %res2, %res3
141 %1 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0)
142 %2 = bitcast i8 %x2 to <8 x i1>
143 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32>
144 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1
145 %4 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0)
146 %5 = call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %x0)
147 %6 = bitcast i8 %x2 to <8 x i1>
148 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32>
149 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
150 %res2 = add <4 x i32> %3, %4
151 %res4 = add <4 x i32> %res2, %7
148152 ret <4 x i32> %res4
149153 }
150154
151 declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8)
152
153 define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
155 define <8 x i32> @test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
154156 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
155157 ; X86: # %bb.0:
158 ; X86-NEXT: vpconflictd %ymm0, %ymm2
156159 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
157160 ; X86-NEXT: kmovw %eax, %k1
158161 ; X86-NEXT: vpconflictd %ymm0, %ymm1 {%k1}
159 ; X86-NEXT: vpconflictd %ymm0, %ymm0
162 ; X86-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z}
163 ; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0
160164 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0
161165 ; X86-NEXT: retl
162166 ;
163167 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
164168 ; X64: # %bb.0:
169 ; X64-NEXT: vpconflictd %ymm0, %ymm2
165170 ; X64-NEXT: kmovw %edi, %k1
166171 ; X64-NEXT: vpconflictd %ymm0, %ymm1 {%k1}
167 ; X64-NEXT: vpconflictd %ymm0, %ymm0
172 ; X64-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z}
173 ; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0
168174 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0
169175 ; X64-NEXT: retq
170 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
171 %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
172 %res2 = add <8 x i32> %res, %res1
173 ret <8 x i32> %res2
174 }
175
176 declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8)
177
178 define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
176 %1 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0)
177 %2 = bitcast i8 %x2 to <8 x i1>
178 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
179 %4 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0)
180 %5 = call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %x0)
181 %6 = bitcast i8 %x2 to <8 x i1>
182 %7 = select <8 x i1> %6, <8 x i32> %5, <8 x i32> zeroinitializer
183 %res3 = add <8 x i32> %3, %4
184 %res4 = add <8 x i32> %7, %res3
185 ret <8 x i32> %res4
186 }
187
188 define <2 x i64> @test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
179189 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
180190 ; X86: # %bb.0:
191 ; X86-NEXT: vpconflictq %xmm0, %xmm2
181192 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
182193 ; X86-NEXT: kmovw %eax, %k1
183194 ; X86-NEXT: vpconflictq %xmm0, %xmm1 {%k1}
184 ; X86-NEXT: vpconflictq %xmm0, %xmm0
195 ; X86-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z}
196 ; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0
185197 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0
186198 ; X86-NEXT: retl
187199 ;
188200 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
189201 ; X64: # %bb.0:
202 ; X64-NEXT: vpconflictq %xmm0, %xmm2
190203 ; X64-NEXT: kmovw %edi, %k1
191204 ; X64-NEXT: vpconflictq %xmm0, %xmm1 {%k1}
192 ; X64-NEXT: vpconflictq %xmm0, %xmm0
205 ; X64-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z}
206 ; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0
193207 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0
194208 ; X64-NEXT: retq
195 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
196 %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
197 %res2 = add <2 x i64> %res, %res1
198 ret <2 x i64> %res2
199 }
200
201 declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8)
202
203 define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
209 %1 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0)
210 %2 = bitcast i8 %x2 to <8 x i1>
211 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32>
212 %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x1
213 %4 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0)
214 %5 = call <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %x0)
215 %6 = bitcast i8 %x2 to <8 x i1>
216 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <2 x i32>
217 %7 = select <2 x i1> %extract, <2 x i64> %5, <2 x i64> zeroinitializer
218 %res3 = add <2 x i64> %3, %4
219 %res4 = add <2 x i64> %7, %res3
220 ret <2 x i64> %res4
221 }
222
223 define <4 x i64> @test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
204224 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
205225 ; X86: # %bb.0:
226 ; X86-NEXT: vpconflictq %ymm0, %ymm2
206227 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
207228 ; X86-NEXT: kmovw %eax, %k1
208229 ; X86-NEXT: vpconflictq %ymm0, %ymm1 {%k1}
209 ; X86-NEXT: vpconflictq %ymm0, %ymm0
230 ; X86-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z}
231 ; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0
210232 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0
211233 ; X86-NEXT: retl
212234 ;
213235 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
214236 ; X64: # %bb.0:
237 ; X64-NEXT: vpconflictq %ymm0, %ymm2
215238 ; X64-NEXT: kmovw %edi, %k1
216239 ; X64-NEXT: vpconflictq %ymm0, %ymm1 {%k1}
217 ; X64-NEXT: vpconflictq %ymm0, %ymm0
240 ; X64-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z}
241 ; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0
218242 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0
219243 ; X64-NEXT: retq
220 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
221 %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
222 %res2 = add <4 x i64> %res, %res1
223 ret <4 x i64> %res2
224 }
225
244 %1 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0)
245 %2 = bitcast i8 %x2 to <8 x i1>
246 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32>
247 %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x1
248 %4 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0)
249 %5 = call <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %x0)
250 %6 = bitcast i8 %x2 to <8 x i1>
251 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32>
252 %7 = select <4 x i1> %extract, <4 x i64> %5, <4 x i64> zeroinitializer
253 %res3 = add <4 x i64> %3, %4
254 %res4 = add <4 x i64> %7, %res3
255 ret <4 x i64> %res4
256 }
257
258 declare <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32>)
259 declare <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32>)
260 declare <2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64>)
261 declare <4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64>)