llvm.org GIT mirror llvm / 60a0ef0
[X86] Legalize v32i1 without BWI via splitting to v16i1 rather than the default of promoting to v32i8. Summary: For the most part its better to keep v32i1 as a mask type of a narrower width than trying to promote it to a ymm register. I had to add some overrides to the methods that get the types for the calling convention so that we still use v32i8 for argument/return purposes. There are still some regressions in here. I definitely saw some around shuffles. I think we probably should move vXi1 shuffle from lowering to a DAG combine where I think the extend and truncate we have to emit would be better combined. I think we also need a DAG combine to remove trunc from (extract_vector_elt (trunc)) Overall this removes something like 13000 CHECK lines from lit tests. Reviewers: zvi, RKSimon, delena, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D42031 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323201 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
16 changed file(s) with 16413 addition(s) and 29748 deletion(s). Raw diff Collapse all Expand all
17191719
17201720 TargetLoweringBase::LegalizeTypeAction
17211721 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1722 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1723 return TypeSplitVector;
1724
17221725 if (ExperimentalVectorWideningLegalization &&
17231726 VT.getVectorNumElements() != 1 &&
17241727 VT.getVectorElementType().getSimpleVT() != MVT::i1)
17251728 return TypeWidenVector;
17261729
17271730 return TargetLoweringBase::getPreferredVectorAction(VT);
1731 }
1732
1733 MVT X86TargetLowering::getRegisterTypeForCallingConv(MVT VT) const {
1734 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1735 return MVT::v32i8;
1736 return TargetLowering::getRegisterTypeForCallingConv(VT);
1737 }
1738
1739 MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1740 EVT VT) const {
1741 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1742 return MVT::v32i8;
1743 return TargetLowering::getRegisterTypeForCallingConv(Context, VT);
1744 }
1745
1746 unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1747 EVT VT) const {
1748 if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
1749 return 1;
1750 return TargetLowering::getNumRegistersForCallingConv(Context, VT);
17281751 }
17291752
17301753 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
10831083 /// \brief Customize the preferred legalization strategy for certain types.
10841084 LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
10851085
1086 MVT getRegisterTypeForCallingConv(MVT VT) const override;
1087
1088 MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1089 EVT VT) const override;
1090
1091 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1092 EVT VT) const override;
1093
10861094 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
10871095
10881096 bool supportSwiftError() const override;
5959 define <32 x i8> @avg_v32i8_mask(<32 x i8> %a, <32 x i8> %b, <32 x i8> %src, i32 %mask) nounwind {
6060 ; AVX512F-LABEL: avg_v32i8_mask:
6161 ; AVX512F: # %bb.0:
62 ; AVX512F-NEXT: pushq %rbp
63 ; AVX512F-NEXT: movq %rsp, %rbp
64 ; AVX512F-NEXT: andq $-32, %rsp
65 ; AVX512F-NEXT: subq $32, %rsp
66 ; AVX512F-NEXT: movl %edi, (%rsp)
62 ; AVX512F-NEXT: kmovw %edi, %k1
63 ; AVX512F-NEXT: shrl $16, %edi
6764 ; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
68 ; AVX512F-NEXT: kmovw (%rsp), %k1
69 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
65 ; AVX512F-NEXT: kmovw %edi, %k2
7066 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
7167 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
7268 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
7369 ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
7470 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
7571 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
76 ; AVX512F-NEXT: movq %rbp, %rsp
77 ; AVX512F-NEXT: popq %rbp
7872 ; AVX512F-NEXT: retq
7973 ;
8074 ; AVX512BWVL-LABEL: avg_v32i8_mask:
9791 define <32 x i8> @avg_v32i8_maskz(<32 x i8> %a, <32 x i8> %b, i32 %mask) nounwind {
9892 ; AVX512F-LABEL: avg_v32i8_maskz:
9993 ; AVX512F: # %bb.0:
100 ; AVX512F-NEXT: pushq %rbp
101 ; AVX512F-NEXT: movq %rsp, %rbp
102 ; AVX512F-NEXT: andq $-32, %rsp
103 ; AVX512F-NEXT: subq $32, %rsp
104 ; AVX512F-NEXT: movl %edi, (%rsp)
94 ; AVX512F-NEXT: kmovw %edi, %k1
95 ; AVX512F-NEXT: shrl $16, %edi
10596 ; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
106 ; AVX512F-NEXT: kmovw (%rsp), %k1
107 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
97 ; AVX512F-NEXT: kmovw %edi, %k2
10898 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
10999 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
110100 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
111101 ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
112102 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
113103 ; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
114 ; AVX512F-NEXT: movq %rbp, %rsp
115 ; AVX512F-NEXT: popq %rbp
116104 ; AVX512F-NEXT: retq
117105 ;
118106 ; AVX512BWVL-LABEL: avg_v32i8_maskz:
134122 define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64 %mask) nounwind {
135123 ; AVX512F-LABEL: avg_v64i8_mask:
136124 ; AVX512F: # %bb.0:
137 ; AVX512F-NEXT: pushq %rbp
138 ; AVX512F-NEXT: movq %rsp, %rbp
139 ; AVX512F-NEXT: andq $-32, %rsp
140 ; AVX512F-NEXT: subq $64, %rsp
141 ; AVX512F-NEXT: movl %edi, (%rsp)
142 ; AVX512F-NEXT: shrq $32, %rdi
143 ; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
125 ; AVX512F-NEXT: movq %rdi, %rax
126 ; AVX512F-NEXT: movq %rdi, %rcx
127 ; AVX512F-NEXT: kmovw %edi, %k1
128 ; AVX512F-NEXT: movl %edi, %edx
129 ; AVX512F-NEXT: shrl $16, %edx
130 ; AVX512F-NEXT: shrq $32, %rax
131 ; AVX512F-NEXT: shrq $48, %rcx
144132 ; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1
145133 ; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0
146 ; AVX512F-NEXT: kmovw (%rsp), %k1
147 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
134 ; AVX512F-NEXT: kmovw %ecx, %k2
135 ; AVX512F-NEXT: kmovw %eax, %k3
136 ; AVX512F-NEXT: kmovw %edx, %k4
148137 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
149138 ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
150 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
139 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k4} {z}
151140 ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
152141 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
153142 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
154 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
155 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
156 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
143 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z}
157144 ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
158145 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
159146 ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
160147 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
161148 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
162 ; AVX512F-NEXT: movq %rbp, %rsp
163 ; AVX512F-NEXT: popq %rbp
164149 ; AVX512F-NEXT: retq
165150 ;
166151 ; AVX512BWVL-LABEL: avg_v64i8_mask:
183168 define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind {
184169 ; AVX512F-LABEL: avg_v64i8_maskz:
185170 ; AVX512F: # %bb.0:
186 ; AVX512F-NEXT: pushq %rbp
187 ; AVX512F-NEXT: movq %rsp, %rbp
188 ; AVX512F-NEXT: andq $-32, %rsp
189 ; AVX512F-NEXT: subq $64, %rsp
190 ; AVX512F-NEXT: movl %edi, (%rsp)
191 ; AVX512F-NEXT: shrq $32, %rdi
192 ; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
171 ; AVX512F-NEXT: movq %rdi, %rax
172 ; AVX512F-NEXT: movq %rdi, %rcx
173 ; AVX512F-NEXT: kmovw %edi, %k1
174 ; AVX512F-NEXT: movl %edi, %edx
175 ; AVX512F-NEXT: shrl $16, %edx
176 ; AVX512F-NEXT: shrq $32, %rax
177 ; AVX512F-NEXT: shrq $48, %rcx
193178 ; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1
194179 ; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0
195 ; AVX512F-NEXT: kmovw (%rsp), %k1
196 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
180 ; AVX512F-NEXT: kmovw %ecx, %k2
181 ; AVX512F-NEXT: kmovw %eax, %k3
182 ; AVX512F-NEXT: kmovw %edx, %k4
197183 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
198184 ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
199 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
185 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k4} {z}
200186 ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
201187 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
202188 ; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
203 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
204 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
205 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
189 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z}
206190 ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
207191 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
208192 ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
209193 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
210194 ; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1
211 ; AVX512F-NEXT: movq %rbp, %rsp
212 ; AVX512F-NEXT: popq %rbp
213195 ; AVX512F-NEXT: retq
214196 ;
215197 ; AVX512BWVL-LABEL: avg_v64i8_maskz:
339321 define <32 x i16> @avg_v32i16_mask(<32 x i16> %a, <32 x i16> %b, <32 x i16> %src, i32 %mask) nounwind {
340322 ; AVX512F-LABEL: avg_v32i16_mask:
341323 ; AVX512F: # %bb.0:
342 ; AVX512F-NEXT: pushq %rbp
343 ; AVX512F-NEXT: movq %rsp, %rbp
344 ; AVX512F-NEXT: andq $-32, %rsp
345 ; AVX512F-NEXT: subq $32, %rsp
346 ; AVX512F-NEXT: movl %edi, (%rsp)
347 ; AVX512F-NEXT: kmovw (%rsp), %k1
348 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
349 ; AVX512F-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
350 ; AVX512F-NEXT: vpmovdb %zmm6, %xmm6
351 ; AVX512F-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7 {%k1} {z}
352 ; AVX512F-NEXT: vpmovdb %zmm7, %xmm7
324 ; AVX512F-NEXT: kmovw %edi, %k1
325 ; AVX512F-NEXT: shrl $16, %edi
353326 ; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1
354327 ; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0
355 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero
356 ; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
357 ; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
328 ; AVX512F-NEXT: kmovw %edi, %k2
329 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
330 ; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
358331 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
359 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
360 ; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
361 ; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
332 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
333 ; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
362334 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
363 ; AVX512F-NEXT: movq %rbp, %rsp
364 ; AVX512F-NEXT: popq %rbp
365335 ; AVX512F-NEXT: retq
366336 ;
367337 ; AVX512BWVL-LABEL: avg_v32i16_mask:
384354 define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
385355 ; AVX512F-LABEL: avg_v32i16_maskz:
386356 ; AVX512F: # %bb.0:
387 ; AVX512F-NEXT: pushq %rbp
388 ; AVX512F-NEXT: movq %rsp, %rbp
389 ; AVX512F-NEXT: andq $-32, %rsp
390 ; AVX512F-NEXT: subq $32, %rsp
391 ; AVX512F-NEXT: movl %edi, (%rsp)
392 ; AVX512F-NEXT: kmovw (%rsp), %k1
393 ; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
394 ; AVX512F-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z}
395 ; AVX512F-NEXT: vpmovdb %zmm4, %xmm4
396 ; AVX512F-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
397 ; AVX512F-NEXT: vpmovdb %zmm5, %xmm5
357 ; AVX512F-NEXT: kmovw %edi, %k1
358 ; AVX512F-NEXT: shrl $16, %edi
398359 ; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1
399360 ; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0
400 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
401 ; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
402 ; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
361 ; AVX512F-NEXT: kmovw %edi, %k2
362 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
363 ; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
403364 ; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
404 ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
405 ; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2
406 ; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2
365 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
366 ; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
407367 ; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1
408 ; AVX512F-NEXT: movq %rbp, %rsp
409 ; AVX512F-NEXT: popq %rbp
410368 ; AVX512F-NEXT: retq
411369 ;
412370 ; AVX512BWVL-LABEL: avg_v32i16_maskz:
781781 define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32> %y) {
782782 ; KNL-LABEL: test_insertelement_v32i1:
783783 ; KNL: ## %bb.0:
784 ; KNL-NEXT: pushq %rbp
785 ; KNL-NEXT: .cfi_def_cfa_offset 16
786 ; KNL-NEXT: .cfi_offset %rbp, -16
787 ; KNL-NEXT: movq %rsp, %rbp
788 ; KNL-NEXT: .cfi_def_cfa_register %rbp
789 ; KNL-NEXT: andq $-32, %rsp
790 ; KNL-NEXT: subq $32, %rsp
791 ; KNL-NEXT: xorl %eax, %eax
792784 ; KNL-NEXT: cmpl %esi, %edi
793785 ; KNL-NEXT: setb %al
794 ; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k1
795 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
796 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
797 ; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k1
798 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
799 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
800 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
801 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
802 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
803 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
804 ; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
805 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
806 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
807 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
808 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
809 ; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
810 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
811 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
812 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
813 ; KNL-NEXT: kmovw %k0, (%rsp)
814 ; KNL-NEXT: movl (%rsp), %eax
815 ; KNL-NEXT: movq %rbp, %rsp
816 ; KNL-NEXT: popq %rbp
786 ; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0
787 ; KNL-NEXT: kmovw %k0, %ecx
788 ; KNL-NEXT: shll $16, %ecx
789 ; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0
790 ; KNL-NEXT: kshiftrw $4, %k0, %k1
791 ; KNL-NEXT: kmovw %eax, %k2
792 ; KNL-NEXT: kxorw %k2, %k1, %k1
793 ; KNL-NEXT: kshiftlw $15, %k1, %k1
794 ; KNL-NEXT: kshiftrw $11, %k1, %k1
795 ; KNL-NEXT: kxorw %k0, %k1, %k0
796 ; KNL-NEXT: kmovw %k0, %eax
797 ; KNL-NEXT: orl %ecx, %eax
817798 ; KNL-NEXT: vzeroupper
818799 ; KNL-NEXT: retq
819800 ;
1013994 ; KNL-NEXT: vpxor %ymm2, %ymm1, %ymm1
1014995 ; KNL-NEXT: vpxor %ymm2, %ymm0, %ymm0
1015996 ; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
1016 ; KNL-NEXT: vpextrb $2, %xmm0, %eax
997 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
998 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
999 ; KNL-NEXT: kshiftrw $2, %k0, %k0
1000 ; KNL-NEXT: kmovw %k0, %eax
10171001 ; KNL-NEXT: andl $1, %eax
10181002 ; KNL-NEXT: vzeroupper
10191003 ; KNL-NEXT: retq
10401024 ; KNL-NEXT: vpxor %ymm0, %ymm1, %ymm0
10411025 ; KNL-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
10421026 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
1043 ; KNL-NEXT: vpextrb $15, %xmm0, %eax
1027 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1028 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1029 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1030 ; KNL-NEXT: kmovw %k0, %eax
10441031 ; KNL-NEXT: andb $1, %al
10451032 ; KNL-NEXT: movb $4, %cl
10461033 ; KNL-NEXT: subb %al, %cl
10731060 ; KNL-NEXT: vpxor %ymm0, %ymm1, %ymm0
10741061 ; KNL-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
10751062 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
1076 ; KNL-NEXT: vpextrb $15, %xmm0, %eax
1063 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1064 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1065 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1066 ; KNL-NEXT: kmovw %k0, %eax
10771067 ; KNL-NEXT: andb $1, %al
10781068 ; KNL-NEXT: movb $4, %cl
10791069 ; KNL-NEXT: subb %al, %cl
17161706 ; KNL-NEXT: movq %rsp, %rbp
17171707 ; KNL-NEXT: .cfi_def_cfa_register %rbp
17181708 ; KNL-NEXT: andq $-32, %rsp
1719 ; KNL-NEXT: subq $96, %rsp
1709 ; KNL-NEXT: subq $64, %rsp
17201710 ; KNL-NEXT: ## kill: def %esi killed %esi def %rsi
17211711 ; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
17221712 ; KNL-NEXT: vpxor %ymm1, %ymm0, %ymm0
17231713 ; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
17241714 ; KNL-NEXT: andl $31, %esi
17251715 ; KNL-NEXT: testb %dil, %dil
1726 ; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
1727 ; KNL-NEXT: setne 32(%rsp,%rsi)
1728 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
1729 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
1730 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
1731 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
1732 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
1733 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1734 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
1716 ; KNL-NEXT: vmovdqa %ymm0, (%rsp)
1717 ; KNL-NEXT: setne (%rsp,%rsi)
1718 ; KNL-NEXT: vpmovsxbd (%rsp), %zmm0
17351719 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
17361720 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1737 ; KNL-NEXT: kmovw %k0, (%rsp)
1738 ; KNL-NEXT: movl (%rsp), %eax
1721 ; KNL-NEXT: kmovw %k0, %ecx
1722 ; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0
1723 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1724 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1725 ; KNL-NEXT: kmovw %k0, %eax
1726 ; KNL-NEXT: shll $16, %eax
1727 ; KNL-NEXT: orl %ecx, %eax
17391728 ; KNL-NEXT: movq %rbp, %rsp
17401729 ; KNL-NEXT: popq %rbp
17411730 ; KNL-NEXT: vzeroupper
17811770 ; KNL-NEXT: movq %rsp, %rbp
17821771 ; KNL-NEXT: .cfi_def_cfa_register %rbp
17831772 ; KNL-NEXT: andq $-64, %rsp
1784 ; KNL-NEXT: subq $192, %rsp
1773 ; KNL-NEXT: subq $128, %rsp
17851774 ; KNL-NEXT: ## kill: def %esi killed %esi def %rsi
17861775 ; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
17871776 ; KNL-NEXT: vpxor %ymm2, %ymm0, %ymm0
17911780 ; KNL-NEXT: andl $63, %esi
17921781 ; KNL-NEXT: testb %dil, %dil
17931782 ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1794 ; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
1795 ; KNL-NEXT: setne 64(%rsp,%rsi)
1796 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
1783 ; KNL-NEXT: vmovdqa %ymm0, (%rsp)
1784 ; KNL-NEXT: setne (%rsp,%rsi)
1785 ; KNL-NEXT: vmovdqa (%rsp), %ymm0
17971786 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
1798 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2
1799 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
1787 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm2
18001788 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
18011789 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
1802 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1790 ; KNL-NEXT: kmovw %k0, %eax
1791 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
18031792 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
18041793 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
18051794 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1806 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1795 ; KNL-NEXT: kmovw %k0, %ecx
1796 ; KNL-NEXT: shll $16, %ecx
1797 ; KNL-NEXT: orl %eax, %ecx
1798 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
1799 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1800 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1801 ; KNL-NEXT: kmovw %k0, %edx
18071802 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm0
18081803 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
18091804 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
18101805 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1811 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1812 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
1813 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
1814 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1815 ; KNL-NEXT: kmovw %k0, (%rsp)
1816 ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx
1817 ; KNL-NEXT: movl (%rsp), %eax
1806 ; KNL-NEXT: kmovw %k0, %eax
1807 ; KNL-NEXT: shll $16, %eax
1808 ; KNL-NEXT: orl %edx, %eax
18181809 ; KNL-NEXT: shlq $32, %rax
18191810 ; KNL-NEXT: orq %rcx, %rax
18201811 ; KNL-NEXT: movq %rbp, %rsp
18621853 ; KNL-NEXT: movq %rsp, %rbp
18631854 ; KNL-NEXT: .cfi_def_cfa_register %rbp
18641855 ; KNL-NEXT: andq $-128, %rsp
1865 ; KNL-NEXT: subq $384, %rsp ## imm = 0x180
1856 ; KNL-NEXT: subq $256, %rsp ## imm = 0x100
18661857 ; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
18671858 ; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm0, %xmm0
18681859 ; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm0, %xmm0
19761967 ; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp)
19771968 ; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
19781969 ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
1979 ; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp)
1980 ; KNL-NEXT: setne 128(%rsp,%rax)
1970 ; KNL-NEXT: vmovdqa %ymm2, (%rsp)
1971 ; KNL-NEXT: setne (%rsp,%rax)
1972 ; KNL-NEXT: vmovdqa (%rsp), %ymm2
1973 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
19811974 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
1982 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2
1983 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
19841975 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
1985 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm4
1986 ; KNL-NEXT: vpmovsxbd %xmm4, %zmm4
1976 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm4
19871977 ; KNL-NEXT: vpslld $31, %zmm4, %zmm4
19881978 ; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0
1989 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1979 ; KNL-NEXT: kmovw %k0, %eax
1980 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
1981 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
1982 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
1983 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
1984 ; KNL-NEXT: kmovw %k0, %ecx
1985 ; KNL-NEXT: shll $16, %ecx
1986 ; KNL-NEXT: orl %eax, %ecx
1987 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm2
1988 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
1989 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
1990 ; KNL-NEXT: kmovw %k0, %edx
1991 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm2
1992 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
1993 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
1994 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
1995 ; KNL-NEXT: kmovw %k0, %eax
1996 ; KNL-NEXT: shll $16, %eax
1997 ; KNL-NEXT: orl %edx, %eax
1998 ; KNL-NEXT: shlq $32, %rax
1999 ; KNL-NEXT: orq %rcx, %rax
2000 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm2
2001 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
2002 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
2003 ; KNL-NEXT: kmovw %k0, %ecx
2004 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
19902005 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
19912006 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
19922007 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
1993 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1994 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm1
1995 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
2008 ; KNL-NEXT: kmovw %k0, %esi
2009 ; KNL-NEXT: shll $16, %esi
2010 ; KNL-NEXT: orl %ecx, %esi
2011 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
19962012 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
19972013 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
1998 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1999 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm1
2000 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2001 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2002 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2003 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm1
2004 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
2005 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2006 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2007 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2008 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm1
2009 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2010 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2011 ; KNL-NEXT: kmovw %k0, (%rsp)
2012 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
2013 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
2014 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2015 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2016 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2014 ; KNL-NEXT: kmovw %k0, %ecx
2015 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
20172016 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
20182017 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
20192018 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2020 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2021 ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx
2022 ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax
2023 ; KNL-NEXT: shlq $32, %rax
2024 ; KNL-NEXT: orq %rcx, %rax
2025 ; KNL-NEXT: movl (%rsp), %ecx
2026 ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %edx
2019 ; KNL-NEXT: kmovw %k0, %edx
2020 ; KNL-NEXT: shll $16, %edx
2021 ; KNL-NEXT: orl %ecx, %edx
20272022 ; KNL-NEXT: shlq $32, %rdx
2028 ; KNL-NEXT: orq %rcx, %rdx
2023 ; KNL-NEXT: orq %rsi, %rdx
20292024 ; KNL-NEXT: movq %rbp, %rsp
20302025 ; KNL-NEXT: popq %rbp
20312026 ; KNL-NEXT: vzeroupper
21772172 ; KNL-NEXT: movq %rsp, %rbp
21782173 ; KNL-NEXT: .cfi_def_cfa_register %rbp
21792174 ; KNL-NEXT: andq $-128, %rsp
2180 ; KNL-NEXT: subq $384, %rsp ## imm = 0x180
2175 ; KNL-NEXT: subq $256, %rsp ## imm = 0x100
21812176 ; KNL-NEXT: ## kill: def %esi killed %esi def %rsi
21822177 ; KNL-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
21832178 ; KNL-NEXT: vpxor %ymm4, %ymm0, %ymm0
21932188 ; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp)
21942189 ; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp)
21952190 ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
2196 ; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp)
2197 ; KNL-NEXT: setne 128(%rsp,%rsi)
2191 ; KNL-NEXT: vmovdqa %ymm0, (%rsp)
2192 ; KNL-NEXT: setne (%rsp,%rsi)
2193 ; KNL-NEXT: vmovdqa (%rsp), %ymm2
2194 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
21982195 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1
2199 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2
2200 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3
22012196 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0
2202 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm4
2203 ; KNL-NEXT: vpmovsxbd %xmm4, %zmm4
2197 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm4
22042198 ; KNL-NEXT: vpslld $31, %zmm4, %zmm4
22052199 ; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0
2206 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2200 ; KNL-NEXT: kmovw %k0, %eax
2201 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
2202 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
2203 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
2204 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
2205 ; KNL-NEXT: kmovw %k0, %ecx
2206 ; KNL-NEXT: shll $16, %ecx
2207 ; KNL-NEXT: orl %eax, %ecx
2208 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm2
2209 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
2210 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
2211 ; KNL-NEXT: kmovw %k0, %edx
2212 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm2
2213 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
2214 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
2215 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
2216 ; KNL-NEXT: kmovw %k0, %eax
2217 ; KNL-NEXT: shll $16, %eax
2218 ; KNL-NEXT: orl %edx, %eax
2219 ; KNL-NEXT: shlq $32, %rax
2220 ; KNL-NEXT: orq %rcx, %rax
2221 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm2
2222 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
2223 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
2224 ; KNL-NEXT: kmovw %k0, %ecx
2225 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
22072226 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
22082227 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
22092228 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2210 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2211 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm1
2212 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
2229 ; KNL-NEXT: kmovw %k0, %esi
2230 ; KNL-NEXT: shll $16, %esi
2231 ; KNL-NEXT: orl %ecx, %esi
2232 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
22132233 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
22142234 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2215 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2216 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm1
2217 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2218 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2219 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2220 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm1
2221 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
2222 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2223 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2224 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2225 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm1
2226 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2227 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2228 ; KNL-NEXT: kmovw %k0, (%rsp)
2229 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
2230 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
2231 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2232 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2233 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2235 ; KNL-NEXT: kmovw %k0, %ecx
2236 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
22342237 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
22352238 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
22362239 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2237 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2238 ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx
2239 ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax
2240 ; KNL-NEXT: shlq $32, %rax
2241 ; KNL-NEXT: orq %rcx, %rax
2242 ; KNL-NEXT: movl (%rsp), %ecx
2243 ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %edx
2240 ; KNL-NEXT: kmovw %k0, %edx
2241 ; KNL-NEXT: shll $16, %edx
2242 ; KNL-NEXT: orl %ecx, %edx
22442243 ; KNL-NEXT: shlq $32, %rdx
2245 ; KNL-NEXT: orq %rcx, %rdx
2244 ; KNL-NEXT: orq %rsi, %rdx
22462245 ; KNL-NEXT: movq %rbp, %rsp
22472246 ; KNL-NEXT: popq %rbp
22482247 ; KNL-NEXT: vzeroupper
974974 ;
975975 ; KNL-LABEL: test16:
976976 ; KNL: ## %bb.0:
977 ; KNL-NEXT: pushq %rbp
978 ; KNL-NEXT: .cfi_def_cfa_offset 16
979 ; KNL-NEXT: .cfi_offset %rbp, -16
980 ; KNL-NEXT: movq %rsp, %rbp
981 ; KNL-NEXT: .cfi_def_cfa_register %rbp
982 ; KNL-NEXT: andq $-32, %rsp
983 ; KNL-NEXT: subq $64, %rsp
984 ; KNL-NEXT: movl %edi, (%rsp)
977 ; KNL-NEXT: movq %rdi, %rax
978 ; KNL-NEXT: movl %edi, %ecx
979 ; KNL-NEXT: kmovw %edi, %k0
985980 ; KNL-NEXT: shrq $32, %rdi
986 ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
987 ; KNL-NEXT: kmovw (%rsp), %k1
988 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
989 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
981 ; KNL-NEXT: shrq $48, %rax
982 ; KNL-NEXT: shrl $16, %ecx
983 ; KNL-NEXT: kmovw %ecx, %k1
984 ; KNL-NEXT: kmovw %eax, %k2
985 ; KNL-NEXT: kmovw %edi, %k3
986 ; KNL-NEXT: movb $1, %al
987 ; KNL-NEXT: kmovw %eax, %k4
988 ; KNL-NEXT: kshiftrw $5, %k0, %k5
989 ; KNL-NEXT: kxorw %k4, %k5, %k4
990 ; KNL-NEXT: kshiftlw $15, %k4, %k4
991 ; KNL-NEXT: kshiftrw $10, %k4, %k4
992 ; KNL-NEXT: kxorw %k0, %k4, %k4
993 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
990994 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
991 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
995 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
992996 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
993 ; KNL-NEXT: movl $1, %eax
994 ; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
995 ; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
996 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
997 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
998 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
999 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1000 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
997 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
998 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
999 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1000 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
10011001 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1002 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1003 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1004 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1005 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
1006 ; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
1007 ; KNL-NEXT: movq %rbp, %rsp
1008 ; KNL-NEXT: popq %rbp
1002 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
10091003 ; KNL-NEXT: retq
10101004 ;
10111005 ; SKX-LABEL: test16:
10361030 ;
10371031 ; AVX512DQ-LABEL: test16:
10381032 ; AVX512DQ: ## %bb.0:
1039 ; AVX512DQ-NEXT: pushq %rbp
1040 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
1041 ; AVX512DQ-NEXT: .cfi_offset %rbp, -16
1042 ; AVX512DQ-NEXT: movq %rsp, %rbp
1043 ; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp
1044 ; AVX512DQ-NEXT: andq $-32, %rsp
1045 ; AVX512DQ-NEXT: subq $64, %rsp
1046 ; AVX512DQ-NEXT: movl %edi, (%rsp)
1033 ; AVX512DQ-NEXT: movq %rdi, %rax
1034 ; AVX512DQ-NEXT: movl %edi, %ecx
1035 ; AVX512DQ-NEXT: kmovw %edi, %k0
10471036 ; AVX512DQ-NEXT: shrq $32, %rdi
1048 ; AVX512DQ-NEXT: movl %edi, {{[0-9]+}}(%rsp)
1049 ; AVX512DQ-NEXT: kmovw (%rsp), %k0
1050 ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
1051 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0
1037 ; AVX512DQ-NEXT: shrq $48, %rax
1038 ; AVX512DQ-NEXT: shrl $16, %ecx
1039 ; AVX512DQ-NEXT: kmovw %ecx, %k1
1040 ; AVX512DQ-NEXT: kmovw %eax, %k2
1041 ; AVX512DQ-NEXT: kmovw %edi, %k3
1042 ; AVX512DQ-NEXT: movb $1, %al
1043 ; AVX512DQ-NEXT: kmovw %eax, %k4
1044 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k5
1045 ; AVX512DQ-NEXT: kxorw %k4, %k5, %k4
1046 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
1047 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
1048 ; AVX512DQ-NEXT: kxorw %k0, %k4, %k0
1049 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
10521050 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1053 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
1051 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
10541052 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1055 ; AVX512DQ-NEXT: movl $1, %eax
1056 ; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
1057 ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1058 ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k0
1059 ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
1060 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
1061 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1053 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
1054 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1055 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
10621056 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
10631057 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
1064 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1065 ; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
1066 ; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1067 ; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
1068 ; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
1069 ; AVX512DQ-NEXT: movq %rbp, %rsp
1070 ; AVX512DQ-NEXT: popq %rbp
1058 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
10711059 ; AVX512DQ-NEXT: retq
10721060 %a = bitcast i64 %x to <64 x i1>
10731061 %b = insertelement <64 x i1>%a, i1 true, i32 5
10791067 ;
10801068 ; KNL-LABEL: test17:
10811069 ; KNL: ## %bb.0:
1082 ; KNL-NEXT: pushq %rbp
1083 ; KNL-NEXT: .cfi_def_cfa_offset 16
1084 ; KNL-NEXT: .cfi_offset %rbp, -16
1085 ; KNL-NEXT: movq %rsp, %rbp
1086 ; KNL-NEXT: .cfi_def_cfa_register %rbp
1087 ; KNL-NEXT: andq $-32, %rsp
1088 ; KNL-NEXT: subq $64, %rsp
1089 ; KNL-NEXT: movl %edi, (%rsp)
1070 ; KNL-NEXT: movq %rdi, %rax
1071 ; KNL-NEXT: movl %edi, %ecx
1072 ; KNL-NEXT: kmovw %edi, %k0
10901073 ; KNL-NEXT: shrq $32, %rdi
1091 ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp)
1092 ; KNL-NEXT: kmovw (%rsp), %k1
1093 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
1094 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
1095 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1096 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1097 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1098 ; KNL-NEXT: xorl %eax, %eax
1074 ; KNL-NEXT: shrq $48, %rax
1075 ; KNL-NEXT: shrl $16, %ecx
1076 ; KNL-NEXT: kmovw %ecx, %k1
1077 ; KNL-NEXT: kmovw %eax, %k2
1078 ; KNL-NEXT: kmovw %edi, %k3
10991079 ; KNL-NEXT: cmpl %edx, %esi
11001080 ; KNL-NEXT: setg %al
1101 ; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
1102 ; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1103 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
1104 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
1105 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1081 ; KNL-NEXT: kmovw %eax, %k4
1082 ; KNL-NEXT: kshiftrw $5, %k0, %k5
1083 ; KNL-NEXT: kxorw %k4, %k5, %k4
1084 ; KNL-NEXT: kshiftlw $15, %k4, %k4
1085 ; KNL-NEXT: kshiftrw $10, %k4, %k4
1086 ; KNL-NEXT: kxorw %k0, %k4, %k4
1087 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
1088 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1089 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
11061090 ; KNL-NEXT: vpmovdb %zmm1, %xmm1
1107 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
1091 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
1092 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
1093 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
1094 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
11081095 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1109 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1110 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
1111 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1112 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
1113 ; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
1114 ; KNL-NEXT: movq %rbp, %rsp
1115 ; KNL-NEXT: popq %rbp
1096 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
11161097 ; KNL-NEXT: retq
11171098 ;
11181099 ; SKX-LABEL: test17:
11451126 ;
11461127 ; AVX512DQ-LABEL: test17:
11471128 ; AVX512DQ: ## %bb.0:
1148 ; AVX512DQ-NEXT: pushq %rbp
1149 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
1150 ; AVX512DQ-NEXT: .cfi_offset %rbp, -16
1151 ; AVX512DQ-NEXT: movq %rsp, %rbp
1152 ; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp
1153 ; AVX512DQ-NEXT: andq $-32, %rsp
1154 ; AVX512DQ-NEXT: subq $64, %rsp
1155 ; AVX512DQ-NEXT: movl %edi, (%rsp)
1129 ; AVX512DQ-NEXT: movq %rdi, %rax
1130 ; AVX512DQ-NEXT: movl %edi, %ecx
1131 ; AVX512DQ-NEXT: kmovw %edi, %k0
11561132 ; AVX512DQ-NEXT: shrq $32, %rdi
1157 ; AVX512DQ-NEXT: movl %edi, {{[0-9]+}}(%rsp)
1158 ; AVX512DQ-NEXT: kmovw (%rsp), %k0
1159 ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
1160 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0
1161 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1162 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
1163 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1164 ; AVX512DQ-NEXT: xorl %eax, %eax
1133 ; AVX512DQ-NEXT: shrq $48, %rax
1134 ; AVX512DQ-NEXT: shrl $16, %ecx
1135 ; AVX512DQ-NEXT: kmovw %ecx, %k1
1136 ; AVX512DQ-NEXT: kmovw %eax, %k2
1137 ; AVX512DQ-NEXT: kmovw %edi, %k3
11651138 ; AVX512DQ-NEXT: cmpl %edx, %esi
11661139 ; AVX512DQ-NEXT: setg %al
1167 ; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
1168 ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1169 ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k0
1170 ; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
1171 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
1140 ; AVX512DQ-NEXT: kmovw %eax, %k4
1141 ; AVX512DQ-NEXT: kshiftrw $5, %k0, %k5
1142 ; AVX512DQ-NEXT: kxorw %k4, %k5, %k4
1143 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4
1144 ; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4
1145 ; AVX512DQ-NEXT: kxorw %k0, %k4, %k0
1146 ; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0
1147 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
1148 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1
11721149 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
1150 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
1151 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
1152 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
11731153 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
11741154 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
1175 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1176 ; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
1177 ; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
1178 ; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
1179 ; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
1180 ; AVX512DQ-NEXT: movq %rbp, %rsp
1181 ; AVX512DQ-NEXT: popq %rbp
1155 ; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
11821156 ; AVX512DQ-NEXT: retq
11831157 %a = bitcast i64 %x to <64 x i1>
11841158 %b = icmp sgt i32 %y, %z
18141788 ;
18151789 ; KNL-LABEL: ktest_2:
18161790 ; KNL: ## %bb.0:
1817 ; KNL-NEXT: pushq %rbp
1818 ; KNL-NEXT: .cfi_def_cfa_offset 16
1819 ; KNL-NEXT: .cfi_offset %rbp, -16
1820 ; KNL-NEXT: movq %rsp, %rbp
1821 ; KNL-NEXT: .cfi_def_cfa_register %rbp
1822 ; KNL-NEXT: andq $-32, %rsp
1823 ; KNL-NEXT: subq $32, %rsp
18241791 ; KNL-NEXT: vmovups (%rdi), %zmm2
18251792 ; KNL-NEXT: vmovups 64(%rdi), %zmm3
1826 ; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k1
1827 ; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
1828 ; KNL-NEXT: vpmovdb %zmm2, %xmm2
1829 ; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k2
1830 ; KNL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
1831 ; KNL-NEXT: vpmovdb %zmm3, %xmm3
1832 ; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z}
1833 ; KNL-NEXT: vmovups 4(%rdi), %zmm5 {%k1} {z}
1834 ; KNL-NEXT: vcmpltps %zmm5, %zmm0, %k1
1835 ; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z}
1836 ; KNL-NEXT: vpmovdb %zmm5, %xmm5
1837 ; KNL-NEXT: vpor %xmm5, %xmm2, %xmm2
1838 ; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k1
1839 ; KNL-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
1840 ; KNL-NEXT: vpmovdb %zmm4, %xmm4
1841 ; KNL-NEXT: vpor %xmm4, %xmm3, %xmm3
1842 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
1843 ; KNL-NEXT: vpslld $31, %zmm3, %zmm3
1844 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
1845 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1846 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
1847 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
1848 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
1849 ; KNL-NEXT: kmovw %k0, (%rsp)
1850 ; KNL-NEXT: cmpl $0, (%rsp)
1793 ; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1
1794 ; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2
1795 ; KNL-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
1796 ; KNL-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
1797 ; KNL-NEXT: vcmpltps %zmm3, %zmm1, %k0
1798 ; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k3
1799 ; KNL-NEXT: korw %k3, %k2, %k2
1800 ; KNL-NEXT: kmovw %k2, %eax
1801 ; KNL-NEXT: korw %k0, %k1, %k0
1802 ; KNL-NEXT: kmovw %k0, %ecx
1803 ; KNL-NEXT: shll $16, %ecx
1804 ; KNL-NEXT: orl %eax, %ecx
18511805 ; KNL-NEXT: je LBB42_2
18521806 ; KNL-NEXT: ## %bb.1: ## %L1
18531807 ; KNL-NEXT: vmovaps %zmm0, (%rdi)
18541808 ; KNL-NEXT: vmovaps %zmm1, 64(%rdi)
1855 ; KNL-NEXT: jmp LBB42_3
1809 ; KNL-NEXT: vzeroupper
1810 ; KNL-NEXT: retq
18561811 ; KNL-NEXT: LBB42_2: ## %L2
18571812 ; KNL-NEXT: vmovaps %zmm0, 4(%rdi)
18581813 ; KNL-NEXT: vmovaps %zmm1, 68(%rdi)
1859 ; KNL-NEXT: LBB42_3: ## %End
1860 ; KNL-NEXT: movq %rbp, %rsp
1861 ; KNL-NEXT: popq %rbp
18621814 ; KNL-NEXT: vzeroupper
18631815 ; KNL-NEXT: retq
18641816 ;
19161868 ;
19171869 ; AVX512DQ-LABEL: ktest_2:
19181870 ; AVX512DQ: ## %bb.0:
1919 ; AVX512DQ-NEXT: pushq %rbp
1920 ; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
1921 ; AVX512DQ-NEXT: .cfi_offset %rbp, -16
1922 ; AVX512DQ-NEXT: movq %rsp, %rbp
1923 ; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp
1924 ; AVX512DQ-NEXT: andq $-32, %rsp
1925 ; AVX512DQ-NEXT: subq $32, %rsp
19261871 ; AVX512DQ-NEXT: vmovups (%rdi), %zmm2
19271872 ; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3
1928 ; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1
1929 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2
1930 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
1931 ; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k2
1932 ; AVX512DQ-NEXT: vpmovm2d %k2, %zmm3
1933 ; AVX512DQ-NEXT: vpmovdb %zmm3, %xmm3
1934 ; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z}
1935 ; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm5 {%k1} {z}
1936 ; AVX512DQ-NEXT: vcmpltps %zmm5, %zmm0, %k0
1937 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm5
1938 ; AVX512DQ-NEXT: vpmovdb %zmm5, %xmm5
1939 ; AVX512DQ-NEXT: vpor %xmm5, %xmm2, %xmm2
1940 ; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm1, %k0
1941 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm4
1942 ; AVX512DQ-NEXT: vpmovdb %zmm4, %xmm4
1943 ; AVX512DQ-NEXT: vpor %xmm4, %xmm3, %xmm3
1944 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
1945 ; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
1946 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
1947 ; AVX512DQ-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1948 ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
1949 ; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
1950 ; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
1951 ; AVX512DQ-NEXT: kmovw %k0, (%rsp)
1952 ; AVX512DQ-NEXT: cmpl $0, (%rsp)
1873 ; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1
1874 ; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2
1875 ; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z}
1876 ; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z}
1877 ; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm1, %k0
1878 ; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm0, %k3
1879 ; AVX512DQ-NEXT: korw %k3, %k2, %k2
1880 ; AVX512DQ-NEXT: kmovw %k2, %eax
1881 ; AVX512DQ-NEXT: korw %k0, %k1, %k0
1882 ; AVX512DQ-NEXT: kmovw %k0, %ecx
1883 ; AVX512DQ-NEXT: shll $16, %ecx
1884 ; AVX512DQ-NEXT: orl %eax, %ecx
19531885 ; AVX512DQ-NEXT: je LBB42_2
19541886 ; AVX512DQ-NEXT: ## %bb.1: ## %L1
19551887 ; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi)
19561888 ; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi)
1957 ; AVX512DQ-NEXT: jmp LBB42_3
1889 ; AVX512DQ-NEXT: vzeroupper
1890 ; AVX512DQ-NEXT: retq
19581891 ; AVX512DQ-NEXT: LBB42_2: ## %L2
19591892 ; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi)
19601893 ; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi)
1961 ; AVX512DQ-NEXT: LBB42_3: ## %End
1962 ; AVX512DQ-NEXT: movq %rbp, %rsp
1963 ; AVX512DQ-NEXT: popq %rbp
19641894 ; AVX512DQ-NEXT: vzeroupper
19651895 ; AVX512DQ-NEXT: retq
19661896 %addr1 = getelementptr float, float * %base, i64 0
23332263 define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
23342264 ; KNL-LABEL: store_32i1:
23352265 ; KNL: ## %bb.0:
2336 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
2337 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
2266 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
23382267 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
23392268 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2340 ; KNL-NEXT: kmovw %k0, 2(%rdi)
2269 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
23412270 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
23422271 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2343 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2272 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2273 ; KNL-NEXT: kmovw %k1, 2(%rdi)
23442274 ; KNL-NEXT: kmovw %k0, (%rdi)
23452275 ; KNL-NEXT: vzeroupper
23462276 ; KNL-NEXT: retq
23632293 ;
23642294 ; AVX512DQ-LABEL: store_32i1:
23652295 ; AVX512DQ: ## %bb.0:
2366 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1
2367 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
2296 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1
23682297 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
23692298 ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
2370 ; AVX512DQ-NEXT: kmovw %k0, 2(%rdi)
2299 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
23712300 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
23722301 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2373 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
2302 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1
2303 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
23742304 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
23752305 ; AVX512DQ-NEXT: vzeroupper
23762306 ; AVX512DQ-NEXT: retq
23822312 ; KNL-LABEL: store_32i1_1:
23832313 ; KNL: ## %bb.0:
23842314 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
2385 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
2386 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2387 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2388 ; KNL-NEXT: kmovw %k0, 2(%rdi)
23892315 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
23902316 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2317 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm0
2318 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2319 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2320 ; KNL-NEXT: kmovw %k1, 2(%rdi)
23912321 ; KNL-NEXT: kmovw %k0, (%rdi)
23922322 ; KNL-NEXT: vzeroupper
23932323 ; KNL-NEXT: retq
24112341 ; AVX512DQ-LABEL: store_32i1_1:
24122342 ; AVX512DQ: ## %bb.0:
24132343 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
2414 ; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1
2415 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
2416 ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
2417 ; AVX512DQ-NEXT: kmovw %k0, 2(%rdi)
24182344 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
24192345 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
2346 ; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0
2347 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2348 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1
2349 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
24202350 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
24212351 ; AVX512DQ-NEXT: vzeroupper
24222352 ; AVX512DQ-NEXT: retq
24302360 ;
24312361 ; KNL-LABEL: store_64i1:
24322362 ; KNL: ## %bb.0:
2433 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
2434 ; KNL-NEXT: vpslld $31, %zmm3, %zmm3
2435 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
2436 ; KNL-NEXT: kmovw %k0, 6(%rdi)
2437 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
2438 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2
2439 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
2440 ; KNL-NEXT: kmovw %k0, 4(%rdi)
2441 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
2442 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
2443 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
2444 ; KNL-NEXT: kmovw %k0, 2(%rdi)
24452363 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
24462364 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
24472365 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
2366 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
2367 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2368 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
2369 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm0
2370 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2371 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2
2372 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm0
2373 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
2374 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k3
2375 ; KNL-NEXT: kmovw %k3, 6(%rdi)
2376 ; KNL-NEXT: kmovw %k2, 4(%rdi)
2377 ; KNL-NEXT: kmovw %k1, 2(%rdi)
24482378 ; KNL-NEXT: kmovw %k0, (%rdi)
24492379 ; KNL-NEXT: vzeroupper
24502380 ; KNL-NEXT: retq
24672397 ;
24682398 ; AVX512DQ-LABEL: store_64i1:
24692399 ; AVX512DQ: ## %bb.0:
2470 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
2471 ; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
2472 ; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
2473 ; AVX512DQ-NEXT: kmovw %k0, 6(%rdi)
2474 ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
2475 ; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
2476 ; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
2477 ; AVX512DQ-NEXT: kmovw %k0, 4(%rdi)
2478 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
2479 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
2480 ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
2481 ; AVX512DQ-NEXT: kmovw %k0, 2(%rdi)
24822400 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
24832401 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
24842402 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
2403 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0
2404 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2405 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1
2406 ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0
2407 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2408 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k2
2409 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm0
2410 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
2411 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k3
2412 ; AVX512DQ-NEXT: kmovw %k3, 6(%rdi)
2413 ; AVX512DQ-NEXT: kmovw %k2, 4(%rdi)
2414 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
24852415 ; AVX512DQ-NEXT: kmovw %k0, (%rdi)
24862416 ; AVX512DQ-NEXT: vzeroupper
24872417 ; AVX512DQ-NEXT: retq
239239 define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0) {
240240 ; AVX512F-LABEL: test_load_32f64:
241241 ; AVX512F: ## %bb.0:
242 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm5
243 ; AVX512F-NEXT: vpmovsxbd %xmm5, %zmm5
242 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm5
244243 ; AVX512F-NEXT: vpslld $31, %zmm5, %zmm5
245244 ; AVX512F-NEXT: vptestmd %zmm5, %zmm5, %k1
246 ; AVX512F-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k1}
245 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
247246 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
248247 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
249248 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2
250 ; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k2}
251 ; AVX512F-NEXT: kshiftrw $8, %k1, %k1
252 ; AVX512F-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1}
253 ; AVX512F-NEXT: kshiftrw $8, %k2, %k1
249 ; AVX512F-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2}
250 ; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
251 ; AVX512F-NEXT: kshiftrw $8, %k2, %k2
252 ; AVX512F-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k2}
253 ; AVX512F-NEXT: kshiftrw $8, %k1, %k1
254254 ; AVX512F-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
255255 ; AVX512F-NEXT: vmovapd %zmm5, %zmm2
256256 ; AVX512F-NEXT: retq
205205 define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
206206 ; KNL-LABEL: test12_v32i32:
207207 ; KNL: ## %bb.0:
208 ; KNL-NEXT: pushq %rbp
209 ; KNL-NEXT: movq %rsp, %rbp
210 ; KNL-NEXT: andq $-32, %rsp
211 ; KNL-NEXT: subq $32, %rsp
212 ; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
213 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
214 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
215 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
216 ; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
217 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
218 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
219 ; KNL-NEXT: kmovw %k0, (%rsp)
220 ; KNL-NEXT: movl (%rsp), %eax
221 ; KNL-NEXT: movq %rbp, %rsp
222 ; KNL-NEXT: popq %rbp
208 ; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
209 ; KNL-NEXT: kmovw %k0, %ecx
210 ; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k0
211 ; KNL-NEXT: kmovw %k0, %eax
212 ; KNL-NEXT: shll $16, %eax
213 ; KNL-NEXT: orl %ecx, %eax
223214 ; KNL-NEXT: vzeroupper
224215 ; KNL-NEXT: retq
225216 ;
248239 define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
249240 ; KNL-LABEL: test12_v64i16:
250241 ; KNL: ## %bb.0:
251 ; KNL-NEXT: pushq %rbp
252 ; KNL-NEXT: movq %rsp, %rbp
253 ; KNL-NEXT: andq $-32, %rsp
254 ; KNL-NEXT: subq $64, %rsp
255 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1
256 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
257 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
258 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
259242 ; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
260243 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
261244 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
262 ; KNL-NEXT: kmovw %k0, (%rsp)
245 ; KNL-NEXT: kmovw %k0, %eax
246 ; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm0
247 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
248 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
249 ; KNL-NEXT: kmovw %k0, %ecx
250 ; KNL-NEXT: shll $16, %ecx
251 ; KNL-NEXT: orl %eax, %ecx
252 ; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0
253 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
254 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
255 ; KNL-NEXT: kmovw %k0, %edx
263256 ; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0
264257 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
265258 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
266 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
267 ; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0
268 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
269 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
270 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
271 ; KNL-NEXT: movl (%rsp), %ecx
272 ; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax
259 ; KNL-NEXT: kmovw %k0, %eax
260 ; KNL-NEXT: shll $16, %eax
261 ; KNL-NEXT: orl %edx, %eax
273262 ; KNL-NEXT: shlq $32, %rax
274263 ; KNL-NEXT: orq %rcx, %rax
275 ; KNL-NEXT: movq %rbp, %rsp
276 ; KNL-NEXT: popq %rbp
277264 ; KNL-NEXT: vzeroupper
278265 ; KNL-NEXT: retq
279266 ;
1010 ;
1111 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
1212 ; NoVLX: # %bb.0: # %entry
13 ; NoVLX-NEXT: pushq %rbp
14 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
15 ; NoVLX-NEXT: .cfi_offset %rbp, -16
16 ; NoVLX-NEXT: movq %rsp, %rbp
17 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
18 ; NoVLX-NEXT: andq $-32, %rsp
19 ; NoVLX-NEXT: subq $32, %rsp
2013 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
21 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
22 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2314 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2415 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
25 ; NoVLX-NEXT: kmovw %k0, (%rsp)
26 ; NoVLX-NEXT: movl (%rsp), %eax
27 ; NoVLX-NEXT: movq %rbp, %rsp
28 ; NoVLX-NEXT: popq %rbp
16 ; NoVLX-NEXT: kmovw %k0, %eax
2917 ; NoVLX-NEXT: vzeroupper
3018 ; NoVLX-NEXT: retq
3119 entry:
4634 ;
4735 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
4836 ; NoVLX: # %bb.0: # %entry
49 ; NoVLX-NEXT: pushq %rbp
50 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
51 ; NoVLX-NEXT: .cfi_offset %rbp, -16
52 ; NoVLX-NEXT: movq %rsp, %rbp
53 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
54 ; NoVLX-NEXT: andq $-32, %rsp
55 ; NoVLX-NEXT: subq $32, %rsp
5637 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
57 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
58 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
5938 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
6039 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
61 ; NoVLX-NEXT: kmovw %k0, (%rsp)
62 ; NoVLX-NEXT: movl (%rsp), %eax
63 ; NoVLX-NEXT: movq %rbp, %rsp
64 ; NoVLX-NEXT: popq %rbp
40 ; NoVLX-NEXT: kmovw %k0, %eax
6541 ; NoVLX-NEXT: vzeroupper
6642 ; NoVLX-NEXT: retq
6743 entry:
8460 ;
8561 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
8662 ; NoVLX: # %bb.0: # %entry
87 ; NoVLX-NEXT: pushq %rbp
88 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
89 ; NoVLX-NEXT: .cfi_offset %rbp, -16
90 ; NoVLX-NEXT: movq %rsp, %rbp
91 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
92 ; NoVLX-NEXT: andq $-32, %rsp
93 ; NoVLX-NEXT: subq $32, %rsp
9463 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
9564 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
9665 ; NoVLX-NEXT: kmovw %edi, %k1
97 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
98 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
99 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
100 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
101 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
102 ; NoVLX-NEXT: kmovw %k0, (%rsp)
103 ; NoVLX-NEXT: movl (%rsp), %eax
104 ; NoVLX-NEXT: movq %rbp, %rsp
105 ; NoVLX-NEXT: popq %rbp
66 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
67 ; NoVLX-NEXT: kmovw %k0, %eax
10668 ; NoVLX-NEXT: vzeroupper
10769 ; NoVLX-NEXT: retq
10870 entry:
12688 ;
12789 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
12890 ; NoVLX: # %bb.0: # %entry
129 ; NoVLX-NEXT: pushq %rbp
130 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
131 ; NoVLX-NEXT: .cfi_offset %rbp, -16
132 ; NoVLX-NEXT: movq %rsp, %rbp
133 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
134 ; NoVLX-NEXT: andq $-32, %rsp
135 ; NoVLX-NEXT: subq $32, %rsp
13691 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
13792 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
13893 ; NoVLX-NEXT: kmovw %edi, %k1
139 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
140 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
141 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
142 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
143 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
144 ; NoVLX-NEXT: kmovw %k0, (%rsp)
145 ; NoVLX-NEXT: movl (%rsp), %eax
146 ; NoVLX-NEXT: movq %rbp, %rsp
147 ; NoVLX-NEXT: popq %rbp
94 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
95 ; NoVLX-NEXT: kmovw %k0, %eax
14896 ; NoVLX-NEXT: vzeroupper
14997 ; NoVLX-NEXT: retq
15098 entry:
169117 ;
170118 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
171119 ; NoVLX: # %bb.0: # %entry
172 ; NoVLX-NEXT: pushq %rbp
173 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
174 ; NoVLX-NEXT: .cfi_offset %rbp, -16
175 ; NoVLX-NEXT: movq %rsp, %rbp
176 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
177 ; NoVLX-NEXT: andq $-32, %rsp
178 ; NoVLX-NEXT: subq $64, %rsp
179120 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
180 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
181 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
182 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
183 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
184121 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
185122 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
186 ; NoVLX-NEXT: kmovw %k0, (%rsp)
187 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
188 ; NoVLX-NEXT: shlq $32, %rcx
189 ; NoVLX-NEXT: movl (%rsp), %eax
190 ; NoVLX-NEXT: orq %rcx, %rax
191 ; NoVLX-NEXT: movq %rbp, %rsp
192 ; NoVLX-NEXT: popq %rbp
123 ; NoVLX-NEXT: kmovw %k0, %eax
124 ; NoVLX-NEXT: movzwl %ax, %eax
193125 ; NoVLX-NEXT: vzeroupper
194126 ; NoVLX-NEXT: retq
195127 entry:
210142 ;
211143 ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
212144 ; NoVLX: # %bb.0: # %entry
213 ; NoVLX-NEXT: pushq %rbp
214 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
215 ; NoVLX-NEXT: .cfi_offset %rbp, -16
216 ; NoVLX-NEXT: movq %rsp, %rbp
217 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
218 ; NoVLX-NEXT: andq $-32, %rsp
219 ; NoVLX-NEXT: subq $64, %rsp
220145 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
221 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
222 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
223 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
224 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
225146 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
226147 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
227 ; NoVLX-NEXT: kmovw %k0, (%rsp)
228 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
229 ; NoVLX-NEXT: shlq $32, %rcx
230 ; NoVLX-NEXT: movl (%rsp), %eax
231 ; NoVLX-NEXT: orq %rcx, %rax
232 ; NoVLX-NEXT: movq %rbp, %rsp
233 ; NoVLX-NEXT: popq %rbp
148 ; NoVLX-NEXT: kmovw %k0, %eax
149 ; NoVLX-NEXT: movzwl %ax, %eax
234150 ; NoVLX-NEXT: vzeroupper
235151 ; NoVLX-NEXT: retq
236152 entry:
253169 ;
254170 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
255171 ; NoVLX: # %bb.0: # %entry
256 ; NoVLX-NEXT: pushq %rbp
257 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
258 ; NoVLX-NEXT: .cfi_offset %rbp, -16
259 ; NoVLX-NEXT: movq %rsp, %rbp
260 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
261 ; NoVLX-NEXT: andq $-32, %rsp
262 ; NoVLX-NEXT: subq $64, %rsp
263172 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
264173 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
265174 ; NoVLX-NEXT: kmovw %edi, %k1
266 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
267 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
268 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
269 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
270 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
271 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
272 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
273 ; NoVLX-NEXT: kmovw %k0, (%rsp)
274 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
275 ; NoVLX-NEXT: shlq $32, %rcx
276 ; NoVLX-NEXT: movl (%rsp), %eax
277 ; NoVLX-NEXT: orq %rcx, %rax
278 ; NoVLX-NEXT: movq %rbp, %rsp
279 ; NoVLX-NEXT: popq %rbp
175 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
176 ; NoVLX-NEXT: kmovw %k0, %eax
177 ; NoVLX-NEXT: movzwl %ax, %eax
280178 ; NoVLX-NEXT: vzeroupper
281179 ; NoVLX-NEXT: retq
282180 entry:
300198 ;
301199 ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
302200 ; NoVLX: # %bb.0: # %entry
303 ; NoVLX-NEXT: pushq %rbp
304 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
305 ; NoVLX-NEXT: .cfi_offset %rbp, -16
306 ; NoVLX-NEXT: movq %rsp, %rbp
307 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
308 ; NoVLX-NEXT: andq $-32, %rsp
309 ; NoVLX-NEXT: subq $64, %rsp
310201 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
311202 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
312203 ; NoVLX-NEXT: kmovw %edi, %k1
313 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
314 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
315 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
316 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
317 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
318 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
319 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
320 ; NoVLX-NEXT: kmovw %k0, (%rsp)
321 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
322 ; NoVLX-NEXT: shlq $32, %rcx
323 ; NoVLX-NEXT: movl (%rsp), %eax
324 ; NoVLX-NEXT: orq %rcx, %rax
325 ; NoVLX-NEXT: movq %rbp, %rsp
326 ; NoVLX-NEXT: popq %rbp
204 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
205 ; NoVLX-NEXT: kmovw %k0, %eax
206 ; NoVLX-NEXT: movzwl %ax, %eax
327207 ; NoVLX-NEXT: vzeroupper
328208 ; NoVLX-NEXT: retq
329209 entry:
349229 ;
350230 ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
351231 ; NoVLX: # %bb.0: # %entry
352 ; NoVLX-NEXT: pushq %rbp
353 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
354 ; NoVLX-NEXT: .cfi_offset %rbp, -16
355 ; NoVLX-NEXT: movq %rsp, %rbp
356 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
357 ; NoVLX-NEXT: andq $-32, %rsp
358 ; NoVLX-NEXT: subq $64, %rsp
359232 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
360 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
361 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
233 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
362234 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
363 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
235 ; NoVLX-NEXT: kmovw %k0, %ecx
236 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
364237 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
365238 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
366 ; NoVLX-NEXT: kmovw %k0, (%rsp)
367 ; NoVLX-NEXT: movl (%rsp), %ecx
368 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
369 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
370 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
371 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
372 ; NoVLX-NEXT: shlq $32, %rax
373 ; NoVLX-NEXT: orq %rcx, %rax
374 ; NoVLX-NEXT: movq %rbp, %rsp
375 ; NoVLX-NEXT: popq %rbp
239 ; NoVLX-NEXT: kmovw %k0, %eax
240 ; NoVLX-NEXT: shll $16, %eax
241 ; NoVLX-NEXT: orl %ecx, %eax
376242 ; NoVLX-NEXT: vzeroupper
377243 ; NoVLX-NEXT: retq
378244 entry:
394260 ;
395261 ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
396262 ; NoVLX: # %bb.0: # %entry
397 ; NoVLX-NEXT: pushq %rbp
398 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
399 ; NoVLX-NEXT: .cfi_offset %rbp, -16
400 ; NoVLX-NEXT: movq %rsp, %rbp
401 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
402 ; NoVLX-NEXT: andq $-32, %rsp
403 ; NoVLX-NEXT: subq $64, %rsp
404263 ; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0
405 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
406 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
264 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
407265 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
408 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
266 ; NoVLX-NEXT: kmovw %k0, %ecx
267 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
409268 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
410269 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
411 ; NoVLX-NEXT: kmovw %k0, (%rsp)
412 ; NoVLX-NEXT: movl (%rsp), %ecx
413 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
414 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
415 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
416 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
417 ; NoVLX-NEXT: shlq $32, %rax
418 ; NoVLX-NEXT: orq %rcx, %rax
419 ; NoVLX-NEXT: movq %rbp, %rsp
420 ; NoVLX-NEXT: popq %rbp
270 ; NoVLX-NEXT: kmovw %k0, %eax
271 ; NoVLX-NEXT: shll $16, %eax
272 ; NoVLX-NEXT: orl %ecx, %eax
421273 ; NoVLX-NEXT: vzeroupper
422274 ; NoVLX-NEXT: retq
423275 entry:
441293 ;
442294 ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
443295 ; NoVLX: # %bb.0: # %entry
444 ; NoVLX-NEXT: pushq %rbp
445 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
446 ; NoVLX-NEXT: .cfi_offset %rbp, -16
447 ; NoVLX-NEXT: movq %rsp, %rbp
448 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
449 ; NoVLX-NEXT: andq $-32, %rsp
450 ; NoVLX-NEXT: subq $96, %rsp
451 ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
452 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
453 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
454 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
455 ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
456 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
457 ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
296 ; NoVLX-NEXT: kmovw %edi, %k1
297 ; NoVLX-NEXT: shrl $16, %edi
458298 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
459299 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
460 ; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
461300 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
462 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
463 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
464 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
465 ; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
466301 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
467 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
468 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
469 ; NoVLX-NEXT: kmovw %k0, (%rsp)
470 ; NoVLX-NEXT: movl (%rsp), %ecx
471 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
472 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
473 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
474 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
475 ; NoVLX-NEXT: shlq $32, %rax
476 ; NoVLX-NEXT: orq %rcx, %rax
477 ; NoVLX-NEXT: movq %rbp, %rsp
478 ; NoVLX-NEXT: popq %rbp
302 ; NoVLX-NEXT: kmovw %edi, %k2
303 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
304 ; NoVLX-NEXT: kmovw %k0, %ecx
305 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
306 ; NoVLX-NEXT: kmovw %k0, %eax
307 ; NoVLX-NEXT: shll $16, %eax
308 ; NoVLX-NEXT: orl %ecx, %eax
479309 ; NoVLX-NEXT: vzeroupper
480310 ; NoVLX-NEXT: retq
481311 entry:
500330 ;
501331 ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
502332 ; NoVLX: # %bb.0: # %entry
503 ; NoVLX-NEXT: pushq %rbp
504 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
505 ; NoVLX-NEXT: .cfi_offset %rbp, -16
506 ; NoVLX-NEXT: movq %rsp, %rbp
507 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
508 ; NoVLX-NEXT: andq $-32, %rsp
509 ; NoVLX-NEXT: subq $96, %rsp
510 ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
511 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
512 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
513 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
514 ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
515 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
516 ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
333 ; NoVLX-NEXT: kmovw %edi, %k1
334 ; NoVLX-NEXT: shrl $16, %edi
517335 ; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
518 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
519 ; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
520 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
521 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
522 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
523 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
524 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
336 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
337 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
525338 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
526 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
527 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
528 ; NoVLX-NEXT: kmovw %k0, (%rsp)
529 ; NoVLX-NEXT: movl (%rsp), %ecx
530 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
531 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
532 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
533 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
534 ; NoVLX-NEXT: shlq $32, %rax
535 ; NoVLX-NEXT: orq %rcx, %rax
536 ; NoVLX-NEXT: movq %rbp, %rsp
537 ; NoVLX-NEXT: popq %rbp
339 ; NoVLX-NEXT: kmovw %edi, %k2
340 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
341 ; NoVLX-NEXT: kmovw %k0, %ecx
342 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
343 ; NoVLX-NEXT: kmovw %k0, %eax
344 ; NoVLX-NEXT: shll $16, %eax
345 ; NoVLX-NEXT: orl %ecx, %eax
538346 ; NoVLX-NEXT: vzeroupper
539347 ; NoVLX-NEXT: retq
540348 entry:
674482 ;
675483 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
676484 ; NoVLX: # %bb.0: # %entry
677 ; NoVLX-NEXT: pushq %rbp
678 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
679 ; NoVLX-NEXT: .cfi_offset %rbp, -16
680 ; NoVLX-NEXT: movq %rsp, %rbp
681 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
682 ; NoVLX-NEXT: andq $-32, %rsp
683 ; NoVLX-NEXT: subq $32, %rsp
684485 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
685486 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
686487 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
687 ; NoVLX-NEXT: kshiftrw $7, %k0, %k1
688 ; NoVLX-NEXT: kmovw %k1, %r8d
689 ; NoVLX-NEXT: kshiftrw $6, %k0, %k1
690 ; NoVLX-NEXT: kmovw %k1, %r9d
691 ; NoVLX-NEXT: kshiftrw $5, %k0, %k1
692 ; NoVLX-NEXT: kmovw %k1, %r10d
693 ; NoVLX-NEXT: kshiftrw $4, %k0, %k1
694 ; NoVLX-NEXT: kmovw %k1, %esi
695 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
696 ; NoVLX-NEXT: kmovw %k1, %edi
697 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
698 ; NoVLX-NEXT: kmovw %k1, %eax
699 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
700 ; NoVLX-NEXT: kmovw %k1, %ecx
701 ; NoVLX-NEXT: kmovw %k0, %edx
702 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
703 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
704 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
705 ; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
706 ; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
707 ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
708 ; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
709 ; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
710 ; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
711 ; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
712 ; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
713 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
714 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
715 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
716 ; NoVLX-NEXT: kmovw %k0, (%rsp)
717 ; NoVLX-NEXT: movl (%rsp), %eax
718 ; NoVLX-NEXT: movq %rbp, %rsp
719 ; NoVLX-NEXT: popq %rbp
488 ; NoVLX-NEXT: kmovw %k0, %eax
720489 ; NoVLX-NEXT: vzeroupper
721490 ; NoVLX-NEXT: retq
722491 entry:
737506 ;
738507 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
739508 ; NoVLX: # %bb.0: # %entry
740 ; NoVLX-NEXT: pushq %rbp
741 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
742 ; NoVLX-NEXT: .cfi_offset %rbp, -16
743 ; NoVLX-NEXT: movq %rsp, %rbp
744 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
745 ; NoVLX-NEXT: andq $-32, %rsp
746 ; NoVLX-NEXT: subq $32, %rsp
747509 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
748510 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
749511 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
750 ; NoVLX-NEXT: kshiftrw $7, %k0, %k1
751 ; NoVLX-NEXT: kmovw %k1, %r8d
752 ; NoVLX-NEXT: kshiftrw $6, %k0, %k1
753 ; NoVLX-NEXT: kmovw %k1, %r9d
754 ; NoVLX-NEXT: kshiftrw $5, %k0, %k1
755 ; NoVLX-NEXT: kmovw %k1, %r10d
756 ; NoVLX-NEXT: kshiftrw $4, %k0, %k1
757 ; NoVLX-NEXT: kmovw %k1, %esi
758 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
759 ; NoVLX-NEXT: kmovw %k1, %edi
760 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
761 ; NoVLX-NEXT: kmovw %k1, %eax
762 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
763 ; NoVLX-NEXT: kmovw %k1, %ecx
764 ; NoVLX-NEXT: kmovw %k0, %edx
765 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
766 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
767 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
768 ; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
769 ; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
770 ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
771 ; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
772 ; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
773 ; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
774 ; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
775 ; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
776 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
777 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
778 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
779 ; NoVLX-NEXT: kmovw %k0, (%rsp)
780 ; NoVLX-NEXT: movl (%rsp), %eax
781 ; NoVLX-NEXT: movq %rbp, %rsp
782 ; NoVLX-NEXT: popq %rbp
512 ; NoVLX-NEXT: kmovw %k0, %eax
783513 ; NoVLX-NEXT: vzeroupper
784514 ; NoVLX-NEXT: retq
785515 entry:
802532 ;
803533 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
804534 ; NoVLX: # %bb.0: # %entry
805 ; NoVLX-NEXT: pushq %rbp
806 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
807 ; NoVLX-NEXT: .cfi_offset %rbp, -16
808 ; NoVLX-NEXT: movq %rsp, %rbp
809 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
810 ; NoVLX-NEXT: andq $-32, %rsp
811 ; NoVLX-NEXT: subq $32, %rsp
812535 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
813536 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
814537 ; NoVLX-NEXT: kmovw %edi, %k1
815538 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
816 ; NoVLX-NEXT: kshiftrw $7, %k0, %k1
817 ; NoVLX-NEXT: kmovw %k1, %r8d
818 ; NoVLX-NEXT: kshiftrw $6, %k0, %k1
819 ; NoVLX-NEXT: kmovw %k1, %r9d
820 ; NoVLX-NEXT: kshiftrw $5, %k0, %k1
821 ; NoVLX-NEXT: kmovw %k1, %r10d
822 ; NoVLX-NEXT: kshiftrw $4, %k0, %k1
823 ; NoVLX-NEXT: kmovw %k1, %esi
824 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
825 ; NoVLX-NEXT: kmovw %k1, %edi
826 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
827 ; NoVLX-NEXT: kmovw %k1, %eax
828 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
829 ; NoVLX-NEXT: kmovw %k1, %ecx
830 ; NoVLX-NEXT: kmovw %k0, %edx
831 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
832 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
833 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
834 ; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
835 ; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
836 ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
837 ; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
838 ; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
839 ; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
840 ; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
841 ; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
842 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
843 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
844 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
845 ; NoVLX-NEXT: kmovw %k0, (%rsp)
846 ; NoVLX-NEXT: movl (%rsp), %eax
847 ; NoVLX-NEXT: movq %rbp, %rsp
848 ; NoVLX-NEXT: popq %rbp
539 ; NoVLX-NEXT: kmovw %k0, %eax
849540 ; NoVLX-NEXT: vzeroupper
850541 ; NoVLX-NEXT: retq
851542 entry:
869560 ;
870561 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
871562 ; NoVLX: # %bb.0: # %entry
872 ; NoVLX-NEXT: pushq %rbp
873 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
874 ; NoVLX-NEXT: .cfi_offset %rbp, -16
875 ; NoVLX-NEXT: movq %rsp, %rbp
876 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
877 ; NoVLX-NEXT: andq $-32, %rsp
878 ; NoVLX-NEXT: subq $32, %rsp
879563 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
880564 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
881565 ; NoVLX-NEXT: kmovw %edi, %k1
882566 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
883 ; NoVLX-NEXT: kshiftrw $7, %k0, %k1
884 ; NoVLX-NEXT: kmovw %k1, %r8d
885 ; NoVLX-NEXT: kshiftrw $6, %k0, %k1
886 ; NoVLX-NEXT: kmovw %k1, %r9d
887 ; NoVLX-NEXT: kshiftrw $5, %k0, %k1
888 ; NoVLX-NEXT: kmovw %k1, %r10d
889 ; NoVLX-NEXT: kshiftrw $4, %k0, %k1
890 ; NoVLX-NEXT: kmovw %k1, %esi
891 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
892 ; NoVLX-NEXT: kmovw %k1, %edi
893 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
894 ; NoVLX-NEXT: kmovw %k1, %eax
895 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
896 ; NoVLX-NEXT: kmovw %k1, %ecx
897 ; NoVLX-NEXT: kmovw %k0, %edx
898 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
899 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
900 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
901 ; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
902 ; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
903 ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
904 ; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
905 ; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
906 ; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
907 ; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
908 ; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
909 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
910 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
911 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
912 ; NoVLX-NEXT: kmovw %k0, (%rsp)
913 ; NoVLX-NEXT: movl (%rsp), %eax
914 ; NoVLX-NEXT: movq %rbp, %rsp
915 ; NoVLX-NEXT: popq %rbp
567 ; NoVLX-NEXT: kmovw %k0, %eax
916568 ; NoVLX-NEXT: vzeroupper
917569 ; NoVLX-NEXT: retq
918570 entry:
937589 ;
938590 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
939591 ; NoVLX: # %bb.0: # %entry
940 ; NoVLX-NEXT: pushq %rbp
941 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
942 ; NoVLX-NEXT: .cfi_offset %rbp, -16
943 ; NoVLX-NEXT: movq %rsp, %rbp
944 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
945 ; NoVLX-NEXT: andq $-32, %rsp
946 ; NoVLX-NEXT: subq $64, %rsp
947592 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
948593 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
949594 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
950 ; NoVLX-NEXT: kshiftrw $7, %k0, %k1
951 ; NoVLX-NEXT: kmovw %k1, %r8d
952 ; NoVLX-NEXT: kshiftrw $6, %k0, %k1
953 ; NoVLX-NEXT: kmovw %k1, %r9d
954 ; NoVLX-NEXT: kshiftrw $5, %k0, %k1
955 ; NoVLX-NEXT: kmovw %k1, %r10d
956 ; NoVLX-NEXT: kshiftrw $4, %k0, %k1
957 ; NoVLX-NEXT: kmovw %k1, %esi
958 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
959 ; NoVLX-NEXT: kmovw %k1, %edi
960 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
961 ; NoVLX-NEXT: kmovw %k1, %eax
962 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
963 ; NoVLX-NEXT: kmovw %k1, %ecx
964 ; NoVLX-NEXT: kmovw %k0, %edx
965 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
966 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
967 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
968 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
969 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
970 ; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
971 ; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
972 ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
973 ; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
974 ; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
975 ; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
976 ; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
977 ; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
978 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
979 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
980 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
981 ; NoVLX-NEXT: kmovw %k0, (%rsp)
982 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
983 ; NoVLX-NEXT: shlq $32, %rcx
984 ; NoVLX-NEXT: movl (%rsp), %eax
985 ; NoVLX-NEXT: orq %rcx, %rax
986 ; NoVLX-NEXT: movq %rbp, %rsp
987 ; NoVLX-NEXT: popq %rbp
595 ; NoVLX-NEXT: kmovw %k0, %eax
596 ; NoVLX-NEXT: movzwl %ax, %eax
988597 ; NoVLX-NEXT: vzeroupper
989598 ; NoVLX-NEXT: retq
990599 entry:
1005614 ;
1006615 ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
1007616 ; NoVLX: # %bb.0: # %entry
1008 ; NoVLX-NEXT: pushq %rbp
1009 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1010 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1011 ; NoVLX-NEXT: movq %rsp, %rbp
1012 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1013 ; NoVLX-NEXT: andq $-32, %rsp
1014 ; NoVLX-NEXT: subq $64, %rsp
1015617 ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
1016618 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
1017619 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
1018 ; NoVLX-NEXT: kshiftrw $7, %k0, %k1
1019 ; NoVLX-NEXT: kmovw %k1, %r8d
1020 ; NoVLX-NEXT: kshiftrw $6, %k0, %k1
1021 ; NoVLX-NEXT: kmovw %k1, %r9d
1022 ; NoVLX-NEXT: kshiftrw $5, %k0, %k1
1023 ; NoVLX-NEXT: kmovw %k1, %r10d
1024 ; NoVLX-NEXT: kshiftrw $4, %k0, %k1
1025 ; NoVLX-NEXT: kmovw %k1, %esi
1026 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
1027 ; NoVLX-NEXT: kmovw %k1, %edi
1028 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
1029 ; NoVLX-NEXT: kmovw %k1, %eax
1030 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
1031 ; NoVLX-NEXT: kmovw %k1, %ecx
1032 ; NoVLX-NEXT: kmovw %k0, %edx
1033 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1034 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1035 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1036 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1037 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1038 ; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
1039 ; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
1040 ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1041 ; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
1042 ; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1043 ; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
1044 ; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
1045 ; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
1046 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
1047 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1048 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1049 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1050 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
1051 ; NoVLX-NEXT: shlq $32, %rcx
1052 ; NoVLX-NEXT: movl (%rsp), %eax
1053 ; NoVLX-NEXT: orq %rcx, %rax
1054 ; NoVLX-NEXT: movq %rbp, %rsp
1055 ; NoVLX-NEXT: popq %rbp
620 ; NoVLX-NEXT: kmovw %k0, %eax
621 ; NoVLX-NEXT: movzwl %ax, %eax
1056622 ; NoVLX-NEXT: vzeroupper
1057623 ; NoVLX-NEXT: retq
1058624 entry:
1075641 ;
1076642 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
1077643 ; NoVLX: # %bb.0: # %entry
1078 ; NoVLX-NEXT: pushq %rbp
1079 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1080 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1081 ; NoVLX-NEXT: movq %rsp, %rbp
1082 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1083 ; NoVLX-NEXT: andq $-32, %rsp
1084 ; NoVLX-NEXT: subq $64, %rsp
1085644 ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1086645 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
1087646 ; NoVLX-NEXT: kmovw %edi, %k1
1088647 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
1089 ; NoVLX-NEXT: kshiftrw $7, %k0, %k1
1090 ; NoVLX-NEXT: kmovw %k1, %r8d
1091 ; NoVLX-NEXT: kshiftrw $6, %k0, %k1
1092 ; NoVLX-NEXT: kmovw %k1, %r9d
1093 ; NoVLX-NEXT: kshiftrw $5, %k0, %k1
1094 ; NoVLX-NEXT: kmovw %k1, %r10d
1095 ; NoVLX-NEXT: kshiftrw $4, %k0, %k1
1096 ; NoVLX-NEXT: kmovw %k1, %esi
1097 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
1098 ; NoVLX-NEXT: kmovw %k1, %edi
1099 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
1100 ; NoVLX-NEXT: kmovw %k1, %eax
1101 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
1102 ; NoVLX-NEXT: kmovw %k1, %ecx
1103 ; NoVLX-NEXT: kmovw %k0, %edx
1104 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1105 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1106 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1107 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1108 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1109 ; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
1110 ; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
1111 ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1112 ; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
1113 ; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1114 ; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
1115 ; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
1116 ; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
1117 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
1118 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1119 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1120 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1121 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
1122 ; NoVLX-NEXT: shlq $32, %rcx
1123 ; NoVLX-NEXT: movl (%rsp), %eax
1124 ; NoVLX-NEXT: orq %rcx, %rax
1125 ; NoVLX-NEXT: movq %rbp, %rsp
1126 ; NoVLX-NEXT: popq %rbp
648 ; NoVLX-NEXT: kmovw %k0, %eax
649 ; NoVLX-NEXT: movzwl %ax, %eax
1127650 ; NoVLX-NEXT: vzeroupper
1128651 ; NoVLX-NEXT: retq
1129652 entry:
1147670 ;
1148671 ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
1149672 ; NoVLX: # %bb.0: # %entry
1150 ; NoVLX-NEXT: pushq %rbp
1151 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1152 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1153 ; NoVLX-NEXT: movq %rsp, %rbp
1154 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1155 ; NoVLX-NEXT: andq $-32, %rsp
1156 ; NoVLX-NEXT: subq $64, %rsp
1157673 ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
1158674 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
1159675 ; NoVLX-NEXT: kmovw %edi, %k1
1160676 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
1161 ; NoVLX-NEXT: kshiftrw $7, %k0, %k1
1162 ; NoVLX-NEXT: kmovw %k1, %r8d
1163 ; NoVLX-NEXT: kshiftrw $6, %k0, %k1
1164 ; NoVLX-NEXT: kmovw %k1, %r9d
1165 ; NoVLX-NEXT: kshiftrw $5, %k0, %k1
1166 ; NoVLX-NEXT: kmovw %k1, %r10d
1167 ; NoVLX-NEXT: kshiftrw $4, %k0, %k1
1168 ; NoVLX-NEXT: kmovw %k1, %esi
1169 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
1170 ; NoVLX-NEXT: kmovw %k1, %edi
1171 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
1172 ; NoVLX-NEXT: kmovw %k1, %eax
1173 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
1174 ; NoVLX-NEXT: kmovw %k1, %ecx
1175 ; NoVLX-NEXT: kmovw %k0, %edx
1176 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1177 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1178 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1179 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1180 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1181 ; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
1182 ; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
1183 ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1184 ; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
1185 ; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
1186 ; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
1187 ; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
1188 ; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
1189 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
1190 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
1191 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1192 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1193 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
1194 ; NoVLX-NEXT: shlq $32, %rcx
1195 ; NoVLX-NEXT: movl (%rsp), %eax
1196 ; NoVLX-NEXT: orq %rcx, %rax
1197 ; NoVLX-NEXT: movq %rbp, %rsp
1198 ; NoVLX-NEXT: popq %rbp
677 ; NoVLX-NEXT: kmovw %k0, %eax
678 ; NoVLX-NEXT: movzwl %ax, %eax
1199679 ; NoVLX-NEXT: vzeroupper
1200680 ; NoVLX-NEXT: retq
1201681 entry:
1221701 ;
1222702 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
1223703 ; NoVLX: # %bb.0: # %entry
1224 ; NoVLX-NEXT: pushq %rbp
1225 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1226 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1227 ; NoVLX-NEXT: movq %rsp, %rbp
1228 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1229 ; NoVLX-NEXT: andq $-32, %rsp
1230 ; NoVLX-NEXT: subq $32, %rsp
1231704 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1232 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1233 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1234705 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1235706 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1236 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1237 ; NoVLX-NEXT: movl (%rsp), %eax
1238 ; NoVLX-NEXT: movq %rbp, %rsp
1239 ; NoVLX-NEXT: popq %rbp
707 ; NoVLX-NEXT: kmovw %k0, %eax
1240708 ; NoVLX-NEXT: vzeroupper
1241709 ; NoVLX-NEXT: retq
1242710 entry:
1258726 ;
1259727 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
1260728 ; NoVLX: # %bb.0: # %entry
1261 ; NoVLX-NEXT: pushq %rbp
1262 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1263 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1264 ; NoVLX-NEXT: movq %rsp, %rbp
1265 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1266 ; NoVLX-NEXT: andq $-32, %rsp
1267 ; NoVLX-NEXT: subq $32, %rsp
1268729 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
1269 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1270 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1271730 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1272731 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1273 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1274 ; NoVLX-NEXT: movl (%rsp), %eax
1275 ; NoVLX-NEXT: movq %rbp, %rsp
1276 ; NoVLX-NEXT: popq %rbp
732 ; NoVLX-NEXT: kmovw %k0, %eax
1277733 ; NoVLX-NEXT: vzeroupper
1278734 ; NoVLX-NEXT: retq
1279735 entry:
1297753 ;
1298754 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
1299755 ; NoVLX: # %bb.0: # %entry
1300 ; NoVLX-NEXT: pushq %rbp
1301 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1302 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1303 ; NoVLX-NEXT: movq %rsp, %rbp
1304 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1305 ; NoVLX-NEXT: andq $-32, %rsp
1306 ; NoVLX-NEXT: subq $32, %rsp
1307756 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1308757 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1309758 ; NoVLX-NEXT: kmovw %edi, %k1
1310 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
1311 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1312 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1313 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1314 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1315 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1316 ; NoVLX-NEXT: movl (%rsp), %eax
1317 ; NoVLX-NEXT: movq %rbp, %rsp
1318 ; NoVLX-NEXT: popq %rbp
759 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
760 ; NoVLX-NEXT: kmovw %k0, %eax
1319761 ; NoVLX-NEXT: vzeroupper
1320762 ; NoVLX-NEXT: retq
1321763 entry:
1340782 ;
1341783 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
1342784 ; NoVLX: # %bb.0: # %entry
1343 ; NoVLX-NEXT: pushq %rbp
1344 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1345 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1346 ; NoVLX-NEXT: movq %rsp, %rbp
1347 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1348 ; NoVLX-NEXT: andq $-32, %rsp
1349 ; NoVLX-NEXT: subq $32, %rsp
1350785 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
1351786 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1352787 ; NoVLX-NEXT: kmovw %edi, %k1
1353 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
1354 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1355 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1356 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1357 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1358 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1359 ; NoVLX-NEXT: movl (%rsp), %eax
1360 ; NoVLX-NEXT: movq %rbp, %rsp
1361 ; NoVLX-NEXT: popq %rbp
788 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
789 ; NoVLX-NEXT: kmovw %k0, %eax
1362790 ; NoVLX-NEXT: vzeroupper
1363791 ; NoVLX-NEXT: retq
1364792 entry:
1384812 ;
1385813 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
1386814 ; NoVLX: # %bb.0: # %entry
1387 ; NoVLX-NEXT: pushq %rbp
1388 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1389 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1390 ; NoVLX-NEXT: movq %rsp, %rbp
1391 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1392 ; NoVLX-NEXT: andq $-32, %rsp
1393 ; NoVLX-NEXT: subq $64, %rsp
1394815 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1395 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1396 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1397 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1398 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1399816 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1400817 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1401 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1402 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
1403 ; NoVLX-NEXT: shlq $32, %rcx
1404 ; NoVLX-NEXT: movl (%rsp), %eax
1405 ; NoVLX-NEXT: orq %rcx, %rax
1406 ; NoVLX-NEXT: movq %rbp, %rsp
1407 ; NoVLX-NEXT: popq %rbp
818 ; NoVLX-NEXT: kmovw %k0, %eax
819 ; NoVLX-NEXT: movzwl %ax, %eax
1408820 ; NoVLX-NEXT: vzeroupper
1409821 ; NoVLX-NEXT: retq
1410822 entry:
1426838 ;
1427839 ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
1428840 ; NoVLX: # %bb.0: # %entry
1429 ; NoVLX-NEXT: pushq %rbp
1430 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1431 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1432 ; NoVLX-NEXT: movq %rsp, %rbp
1433 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1434 ; NoVLX-NEXT: andq $-32, %rsp
1435 ; NoVLX-NEXT: subq $64, %rsp
1436841 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
1437 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1438 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1439 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1440 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1441842 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1442843 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1443 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1444 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
1445 ; NoVLX-NEXT: shlq $32, %rcx
1446 ; NoVLX-NEXT: movl (%rsp), %eax
1447 ; NoVLX-NEXT: orq %rcx, %rax
1448 ; NoVLX-NEXT: movq %rbp, %rsp
1449 ; NoVLX-NEXT: popq %rbp
844 ; NoVLX-NEXT: kmovw %k0, %eax
845 ; NoVLX-NEXT: movzwl %ax, %eax
1450846 ; NoVLX-NEXT: vzeroupper
1451847 ; NoVLX-NEXT: retq
1452848 entry:
1470866 ;
1471867 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
1472868 ; NoVLX: # %bb.0: # %entry
1473 ; NoVLX-NEXT: pushq %rbp
1474 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1475 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1476 ; NoVLX-NEXT: movq %rsp, %rbp
1477 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1478 ; NoVLX-NEXT: andq $-32, %rsp
1479 ; NoVLX-NEXT: subq $64, %rsp
1480869 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1481870 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1482871 ; NoVLX-NEXT: kmovw %edi, %k1
1483 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
1484 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1485 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1486 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1487 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1488 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1489 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1490 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1491 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
1492 ; NoVLX-NEXT: shlq $32, %rcx
1493 ; NoVLX-NEXT: movl (%rsp), %eax
1494 ; NoVLX-NEXT: orq %rcx, %rax
1495 ; NoVLX-NEXT: movq %rbp, %rsp
1496 ; NoVLX-NEXT: popq %rbp
872 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
873 ; NoVLX-NEXT: kmovw %k0, %eax
874 ; NoVLX-NEXT: movzwl %ax, %eax
1497875 ; NoVLX-NEXT: vzeroupper
1498876 ; NoVLX-NEXT: retq
1499877 entry:
1518896 ;
1519897 ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
1520898 ; NoVLX: # %bb.0: # %entry
1521 ; NoVLX-NEXT: pushq %rbp
1522 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1523 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1524 ; NoVLX-NEXT: movq %rsp, %rbp
1525 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1526 ; NoVLX-NEXT: andq $-32, %rsp
1527 ; NoVLX-NEXT: subq $64, %rsp
1528899 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
1529900 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1530901 ; NoVLX-NEXT: kmovw %edi, %k1
1531 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
1532 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1533 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1534 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1535 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1536 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1537 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1538 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1539 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
1540 ; NoVLX-NEXT: shlq $32, %rcx
1541 ; NoVLX-NEXT: movl (%rsp), %eax
1542 ; NoVLX-NEXT: orq %rcx, %rax
1543 ; NoVLX-NEXT: movq %rbp, %rsp
1544 ; NoVLX-NEXT: popq %rbp
902 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
903 ; NoVLX-NEXT: kmovw %k0, %eax
904 ; NoVLX-NEXT: movzwl %ax, %eax
1545905 ; NoVLX-NEXT: vzeroupper
1546906 ; NoVLX-NEXT: retq
1547907 entry:
1567927 ;
1568928 ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
1569929 ; NoVLX: # %bb.0: # %entry
1570 ; NoVLX-NEXT: pushq %rbp
1571 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1572 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1573 ; NoVLX-NEXT: movq %rsp, %rbp
1574 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1575 ; NoVLX-NEXT: andq $-32, %rsp
1576 ; NoVLX-NEXT: subq $64, %rsp
1577 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
1578 ; NoVLX-NEXT: vmovq %xmm3, %rax
1579 ; NoVLX-NEXT: movq %rax, %rcx
1580 ; NoVLX-NEXT: movq %rax, %rdx
1581 ; NoVLX-NEXT: vmovd %eax, %xmm2
1582 ; NoVLX-NEXT: shrl $16, %eax
1583 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
1584 ; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
1585 ; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
1586 ; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
1587 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
1588 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
1589 ; NoVLX-NEXT: shrq $32, %rdx
1590 ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
1591 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
1592 ; NoVLX-NEXT: shrq $48, %rcx
1593 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
1594 ; NoVLX-NEXT: movl %eax, %ecx
1595 ; NoVLX-NEXT: shrl $16, %ecx
1596 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
1597 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
1598 ; NoVLX-NEXT: movq %rax, %rcx
1599 ; NoVLX-NEXT: shrq $32, %rcx
1600 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
1601 ; NoVLX-NEXT: vmovq %xmm0, %rcx
1602 ; NoVLX-NEXT: shrq $48, %rax
1603 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
1604 ; NoVLX-NEXT: movl %ecx, %eax
1605 ; NoVLX-NEXT: shrl $16, %eax
1606 ; NoVLX-NEXT: vmovd %ecx, %xmm5
1607 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
1608 ; NoVLX-NEXT: movq %rcx, %rax
1609 ; NoVLX-NEXT: shrq $32, %rax
1610 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
1611 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
1612 ; NoVLX-NEXT: shrq $48, %rcx
1613 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
1614 ; NoVLX-NEXT: movl %eax, %ecx
1615 ; NoVLX-NEXT: shrl $16, %ecx
1616 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
1617 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
1618 ; NoVLX-NEXT: movq %rax, %rcx
1619 ; NoVLX-NEXT: shrq $32, %rcx
1620 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1621 ; NoVLX-NEXT: vmovq %xmm2, %rcx
1622 ; NoVLX-NEXT: shrq $48, %rax
1623 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
1624 ; NoVLX-NEXT: movl %ecx, %eax
1625 ; NoVLX-NEXT: shrl $16, %eax
1626 ; NoVLX-NEXT: vmovd %ecx, %xmm5
1627 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
1628 ; NoVLX-NEXT: movq %rcx, %rax
1629 ; NoVLX-NEXT: shrq $32, %rax
1630 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
1631 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
1632 ; NoVLX-NEXT: shrq $48, %rcx
1633 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
1634 ; NoVLX-NEXT: movl %eax, %ecx
1635 ; NoVLX-NEXT: shrl $16, %ecx
1636 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
1637 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
1638 ; NoVLX-NEXT: movq %rax, %rcx
1639 ; NoVLX-NEXT: shrq $32, %rcx
1640 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
1641 ; NoVLX-NEXT: vmovq %xmm7, %rcx
1642 ; NoVLX-NEXT: shrq $48, %rax
1643 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
1644 ; NoVLX-NEXT: movl %ecx, %eax
1645 ; NoVLX-NEXT: shrl $16, %eax
1646 ; NoVLX-NEXT: vmovd %ecx, %xmm2
1647 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
1648 ; NoVLX-NEXT: movq %rcx, %rax
1649 ; NoVLX-NEXT: shrq $32, %rax
1650 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
1651 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
1652 ; NoVLX-NEXT: shrq $48, %rcx
1653 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
1654 ; NoVLX-NEXT: movl %eax, %ecx
1655 ; NoVLX-NEXT: shrl $16, %ecx
1656 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
1657 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
1658 ; NoVLX-NEXT: movq %rax, %rcx
1659 ; NoVLX-NEXT: shrq $32, %rcx
1660 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
1661 ; NoVLX-NEXT: vmovq %xmm6, %rcx
1662 ; NoVLX-NEXT: shrq $48, %rax
1663 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
1664 ; NoVLX-NEXT: movl %ecx, %eax
1665 ; NoVLX-NEXT: shrl $16, %eax
1666 ; NoVLX-NEXT: vmovd %ecx, %xmm2
1667 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
1668 ; NoVLX-NEXT: movq %rcx, %rax
1669 ; NoVLX-NEXT: shrq $32, %rax
1670 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
1671 ; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
1672 ; NoVLX-NEXT: shrq $48, %rcx
1673 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
1674 ; NoVLX-NEXT: movl %eax, %ecx
1675 ; NoVLX-NEXT: shrl $16, %ecx
1676 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
1677 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
1678 ; NoVLX-NEXT: movq %rax, %rcx
1679 ; NoVLX-NEXT: shrq $32, %rcx
1680 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
1681 ; NoVLX-NEXT: vmovq %xmm1, %rcx
1682 ; NoVLX-NEXT: shrq $48, %rax
1683 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
1684 ; NoVLX-NEXT: movl %ecx, %eax
1685 ; NoVLX-NEXT: shrl $16, %eax
1686 ; NoVLX-NEXT: vmovd %ecx, %xmm2
1687 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
1688 ; NoVLX-NEXT: movq %rcx, %rax
1689 ; NoVLX-NEXT: shrq $32, %rax
1690 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
1691 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
1692 ; NoVLX-NEXT: shrq $48, %rcx
1693 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
1694 ; NoVLX-NEXT: movl %eax, %ecx
1695 ; NoVLX-NEXT: shrl $16, %ecx
1696 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
1697 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
1698 ; NoVLX-NEXT: movq %rax, %rcx
1699 ; NoVLX-NEXT: shrq $32, %rcx
1700 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
1701 ; NoVLX-NEXT: vmovq %xmm4, %rcx
1702 ; NoVLX-NEXT: shrq $48, %rax
1703 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
1704 ; NoVLX-NEXT: movl %ecx, %eax
1705 ; NoVLX-NEXT: shrl $16, %eax
1706 ; NoVLX-NEXT: vmovd %ecx, %xmm2
1707 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
1708 ; NoVLX-NEXT: movq %rcx, %rax
1709 ; NoVLX-NEXT: shrq $32, %rax
1710 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
1711 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
1712 ; NoVLX-NEXT: shrq $48, %rcx
1713 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
1714 ; NoVLX-NEXT: movl %eax, %ecx
1715 ; NoVLX-NEXT: shrl $16, %ecx
1716 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
1717 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
1718 ; NoVLX-NEXT: movq %rax, %rcx
1719 ; NoVLX-NEXT: shrq $32, %rcx
1720 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
1721 ; NoVLX-NEXT: vmovq %xmm8, %rcx
1722 ; NoVLX-NEXT: shrq $48, %rax
1723 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
1724 ; NoVLX-NEXT: movl %ecx, %eax
1725 ; NoVLX-NEXT: shrl $16, %eax
1726 ; NoVLX-NEXT: vmovd %ecx, %xmm4
1727 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
1728 ; NoVLX-NEXT: movq %rcx, %rax
1729 ; NoVLX-NEXT: shrq $32, %rax
1730 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
1731 ; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
1732 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
1733 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
1734 ; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
1735 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1736 ; NoVLX-NEXT: shrq $48, %rcx
1737 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
1738 ; NoVLX-NEXT: movl %eax, %ecx
1739 ; NoVLX-NEXT: shrl $16, %ecx
1740 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
1741 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
1742 ; NoVLX-NEXT: movq %rax, %rcx
1743 ; NoVLX-NEXT: shrq $48, %rax
1744 ; NoVLX-NEXT: shrq $32, %rcx
1745 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
1746 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
1747 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
1748 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
1749 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
1750 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1751 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1752 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1753 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1754 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1755 ; NoVLX-NEXT: movl (%rsp), %ecx
1756 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1757 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1758 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1759 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
1760 ; NoVLX-NEXT: shlq $32, %rax
1761 ; NoVLX-NEXT: orq %rcx, %rax
1762 ; NoVLX-NEXT: movq %rbp, %rsp
1763 ; NoVLX-NEXT: popq %rbp
1764 ; NoVLX-NEXT: vzeroupper
1765 ; NoVLX-NEXT: retq
1766 entry:
1767 %0 = bitcast <8 x i64> %__a to <32 x i16>
1768 %1 = bitcast <8 x i64> %__b to <32 x i16>
1769 %2 = icmp eq <32 x i16> %0, %1
1770 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32>
1771 %4 = bitcast <64 x i1> %3 to i64
1772 ret i64 %4
1773 }
1774
1775 define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
1776 ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
1777 ; VLX: # %bb.0: # %entry
1778 ; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
1779 ; VLX-NEXT: kmovq %k0, %rax
1780 ; VLX-NEXT: vzeroupper
1781 ; VLX-NEXT: retq
1782 ;
1783 ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
1784 ; NoVLX: # %bb.0: # %entry
1785 ; NoVLX-NEXT: pushq %rbp
1786 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1787 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1788 ; NoVLX-NEXT: movq %rsp, %rbp
1789 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1790 ; NoVLX-NEXT: andq $-32, %rsp
1791 ; NoVLX-NEXT: subq $64, %rsp
1792 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
1793 ; NoVLX-NEXT: vmovq %xmm2, %rax
1794 ; NoVLX-NEXT: movq %rax, %rcx
1795 ; NoVLX-NEXT: movq %rax, %rdx
1796 ; NoVLX-NEXT: vmovd %eax, %xmm1
1797 ; NoVLX-NEXT: shrl $16, %eax
1798 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
1799 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
1800 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
1801 ; NoVLX-NEXT: shrq $32, %rdx
1802 ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
1803 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
1804 ; NoVLX-NEXT: shrq $48, %rcx
1805 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
1806 ; NoVLX-NEXT: movl %eax, %ecx
1807 ; NoVLX-NEXT: shrl $16, %ecx
1808 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
1809 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
1810 ; NoVLX-NEXT: movq %rax, %rcx
1811 ; NoVLX-NEXT: shrq $32, %rcx
1812 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
1813 ; NoVLX-NEXT: vmovq %xmm0, %rcx
1814 ; NoVLX-NEXT: shrq $48, %rax
1815 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
1816 ; NoVLX-NEXT: movl %ecx, %eax
1817 ; NoVLX-NEXT: shrl $16, %eax
1818 ; NoVLX-NEXT: vmovd %ecx, %xmm3
1819 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
1820 ; NoVLX-NEXT: movq %rcx, %rax
1821 ; NoVLX-NEXT: shrq $32, %rax
1822 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
1823 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
1824 ; NoVLX-NEXT: shrq $48, %rcx
1825 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
1826 ; NoVLX-NEXT: movl %eax, %ecx
1827 ; NoVLX-NEXT: shrl $16, %ecx
1828 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
1829 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
1830 ; NoVLX-NEXT: movq %rax, %rcx
1831 ; NoVLX-NEXT: shrq $32, %rcx
1832 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1833 ; NoVLX-NEXT: vmovq %xmm4, %rcx
1834 ; NoVLX-NEXT: shrq $48, %rax
1835 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
1836 ; NoVLX-NEXT: movl %ecx, %eax
1837 ; NoVLX-NEXT: shrl $16, %eax
1838 ; NoVLX-NEXT: vmovd %ecx, %xmm3
1839 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
1840 ; NoVLX-NEXT: movq %rcx, %rax
1841 ; NoVLX-NEXT: shrq $32, %rax
1842 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
1843 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
1844 ; NoVLX-NEXT: shrq $48, %rcx
1845 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
1846 ; NoVLX-NEXT: movl %eax, %ecx
1847 ; NoVLX-NEXT: shrl $16, %ecx
1848 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
1849 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
1850 ; NoVLX-NEXT: movq %rax, %rcx
1851 ; NoVLX-NEXT: shrq $32, %rcx
1852 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
1853 ; NoVLX-NEXT: vmovq %xmm1, %rcx
1854 ; NoVLX-NEXT: shrq $48, %rax
1855 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
1856 ; NoVLX-NEXT: movl %ecx, %eax
1857 ; NoVLX-NEXT: shrl $16, %eax
1858 ; NoVLX-NEXT: vmovd %ecx, %xmm4
1859 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
1860 ; NoVLX-NEXT: movq %rcx, %rax
1861 ; NoVLX-NEXT: shrq $32, %rax
1862 ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
1863 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
1864 ; NoVLX-NEXT: shrq $48, %rcx
1865 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
1866 ; NoVLX-NEXT: movl %eax, %ecx
1867 ; NoVLX-NEXT: shrl $16, %ecx
1868 ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
1869 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
1870 ; NoVLX-NEXT: movq %rax, %rcx
1871 ; NoVLX-NEXT: shrq $32, %rcx
1872 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
1873 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1874 ; NoVLX-NEXT: shrq $48, %rax
1875 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
1876 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
1877 ; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1
1878 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
1879 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
1880 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1881 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
1882 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1883 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1884 ; NoVLX-NEXT: kmovw %k0, (%rsp)
1885 ; NoVLX-NEXT: movl (%rsp), %ecx
1886 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
1887 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1888 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
1889 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
1890 ; NoVLX-NEXT: shlq $32, %rax
1891 ; NoVLX-NEXT: orq %rcx, %rax
1892 ; NoVLX-NEXT: movq %rbp, %rsp
1893 ; NoVLX-NEXT: popq %rbp
1894 ; NoVLX-NEXT: vzeroupper
1895 ; NoVLX-NEXT: retq
1896 entry:
1897 %0 = bitcast <8 x i64> %__a to <32 x i16>
1898 %load = load <8 x i64>, <8 x i64>* %__b
1899 %1 = bitcast <8 x i64> %load to <32 x i16>
1900 %2 = icmp eq <32 x i16> %0, %1
1901 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32>
1902 %4 = bitcast <64 x i1> %3 to i64
1903 ret i64 %4
1904 }
1905
1906 define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
1907 ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
1908 ; VLX: # %bb.0: # %entry
1909 ; VLX-NEXT: kmovd %edi, %k1
1910 ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
1911 ; VLX-NEXT: kmovq %k0, %rax
1912 ; VLX-NEXT: vzeroupper
1913 ; VLX-NEXT: retq
1914 ;
1915 ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
1916 ; NoVLX: # %bb.0: # %entry
1917 ; NoVLX-NEXT: pushq %rbp
1918 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
1919 ; NoVLX-NEXT: .cfi_offset %rbp, -16
1920 ; NoVLX-NEXT: movq %rsp, %rbp
1921 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
1922 ; NoVLX-NEXT: andq $-32, %rsp
1923 ; NoVLX-NEXT: subq $96, %rsp
1924 ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
1925930 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
1926931 ; NoVLX-NEXT: vmovq %xmm2, %rax
1927932 ; NoVLX-NEXT: movq %rax, %rcx
20791084 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
20801085 ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
20811086 ; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
2082 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
2083 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
20841087 ; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
2085 ; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
2086 ; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
20871088 ; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
2088 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
2089 ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
20901089 ; NoVLX-NEXT: shrq $48, %rcx
20911090 ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
20921091 ; NoVLX-NEXT: movl %eax, %ecx
21011100 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
21021101 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
21031102 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
2104 ; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
2105 ; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
2106 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
2107 ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
2108 ; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
2109 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
2110 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
2111 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
2112 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2113 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2114 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
21151103 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
2116 ; NoVLX-NEXT: kmovw %k0, (%rsp)
2117 ; NoVLX-NEXT: movl (%rsp), %ecx
2118 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
2119 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2120 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2121 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
2122 ; NoVLX-NEXT: shlq $32, %rax
2123 ; NoVLX-NEXT: orq %rcx, %rax
2124 ; NoVLX-NEXT: movq %rbp, %rsp
2125 ; NoVLX-NEXT: popq %rbp
1104 ; NoVLX-NEXT: kmovw %k0, %ecx
1105 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
1106 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1107 ; NoVLX-NEXT: kmovw %k0, %eax
1108 ; NoVLX-NEXT: shll $16, %eax
1109 ; NoVLX-NEXT: orl %ecx, %eax
21261110 ; NoVLX-NEXT: vzeroupper
21271111 ; NoVLX-NEXT: retq
21281112 entry:
21291113 %0 = bitcast <8 x i64> %__a to <32 x i16>
21301114 %1 = bitcast <8 x i64> %__b to <32 x i16>
21311115 %2 = icmp eq <32 x i16> %0, %1
2132 %3 = bitcast i32 %__u to <32 x i1>
2133 %4 = and <32 x i1> %2, %3
2134 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32>
2135 %6 = bitcast <64 x i1> %5 to i64
2136 ret i64 %6
2137 }
2138
2139 define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
2140 ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
2141 ; VLX: # %bb.0: # %entry
2142 ; VLX-NEXT: kmovd %edi, %k1
2143 ; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
1116 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32>
1117 %4 = bitcast <64 x i1> %3 to i64
1118 ret i64 %4
1119 }
1120
1121 define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
1122 ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
1123 ; VLX: # %bb.0: # %entry
1124 ; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
21441125 ; VLX-NEXT: kmovq %k0, %rax
21451126 ; VLX-NEXT: vzeroupper
21461127 ; VLX-NEXT: retq
21471128 ;
2148 ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
2149 ; NoVLX: # %bb.0: # %entry
2150 ; NoVLX-NEXT: pushq %rbp
2151 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
2152 ; NoVLX-NEXT: .cfi_offset %rbp, -16
2153 ; NoVLX-NEXT: movq %rsp, %rbp
2154 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
2155 ; NoVLX-NEXT: andq $-32, %rsp
2156 ; NoVLX-NEXT: subq $96, %rsp
2157 ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
1129 ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
1130 ; NoVLX: # %bb.0: # %entry
21581131 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
21591132 ; NoVLX-NEXT: vmovq %xmm1, %rax
21601133 ; NoVLX-NEXT: movq %rax, %rcx
22361209 ; NoVLX-NEXT: movq %rax, %rcx
22371210 ; NoVLX-NEXT: shrq $32, %rcx
22381211 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
2239 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
2240 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
2241 ; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
2242 ; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
22431212 ; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
2244 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
2245 ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
22461213 ; NoVLX-NEXT: shrq $48, %rax
22471214 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
22481215 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
2249 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
1216 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
22501217 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
2251 ; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
2252 ; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
2253 ; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm1, %ymm1
2254 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
2255 ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
2256 ; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
2257 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
2258 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
2259 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
2260 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2261 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2262 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
22631218 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
2264 ; NoVLX-NEXT: kmovw %k0, (%rsp)
2265 ; NoVLX-NEXT: movl (%rsp), %ecx
2266 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
2267 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2268 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2269 ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
2270 ; NoVLX-NEXT: shlq $32, %rax
2271 ; NoVLX-NEXT: orq %rcx, %rax
2272 ; NoVLX-NEXT: movq %rbp, %rsp
2273 ; NoVLX-NEXT: popq %rbp
1219 ; NoVLX-NEXT: kmovw %k0, %ecx
1220 ; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm0
1221 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
1222 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
1223 ; NoVLX-NEXT: kmovw %k0, %eax
1224 ; NoVLX-NEXT: shll $16, %eax
1225 ; NoVLX-NEXT: orl %ecx, %eax
22741226 ; NoVLX-NEXT: vzeroupper
22751227 ; NoVLX-NEXT: retq
22761228 entry:
22781230 %load = load <8 x i64>, <8 x i64>* %__b
22791231 %1 = bitcast <8 x i64> %load to <32 x i16>
22801232 %2 = icmp eq <32 x i16> %0, %1
2281 %3 = bitcast i32 %__u to <32 x i1>
2282 %4 = and <32 x i1> %2, %3
2283 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32>
2284 %6 = bitcast <64 x i1> %5 to i64
2285 ret i64 %6
2286 }
2287
2288
2289 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2290 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
2291 ; VLX: # %bb.0: # %entry
2292 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
2293 ; VLX-NEXT: kmovd %k0, %eax
2294 ; VLX-NEXT: # kill: def %al killed %al killed %eax
2295 ; VLX-NEXT: retq
2296 ;
2297 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
2298 ; NoVLX: # %bb.0: # %entry
2299 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2300 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2301 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2302 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2303 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2304 ; NoVLX-NEXT: kmovw %k0, %eax
2305 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2306 ; NoVLX-NEXT: vzeroupper
2307 ; NoVLX-NEXT: retq
2308 entry:
2309 %0 = bitcast <2 x i64> %__a to <4 x i32>
2310 %1 = bitcast <2 x i64> %__b to <4 x i32>
2311 %2 = icmp eq <4 x i32> %0, %1
2312 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32>
2313 %4 = bitcast <8 x i1> %3 to i8
2314 ret i8 %4
2315 }
2316
2317 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2318 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
2319 ; VLX: # %bb.0: # %entry
2320 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
2321 ; VLX-NEXT: kmovd %k0, %eax
2322 ; VLX-NEXT: # kill: def %al killed %al killed %eax
2323 ; VLX-NEXT: retq
2324 ;
2325 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
2326 ; NoVLX: # %bb.0: # %entry
2327 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2328 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2329 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2330 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2331 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2332 ; NoVLX-NEXT: kmovw %k0, %eax
2333 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2334 ; NoVLX-NEXT: vzeroupper
2335 ; NoVLX-NEXT: retq
2336 entry:
2337 %0 = bitcast <2 x i64> %__a to <4 x i32>
2338 %load = load <2 x i64>, <2 x i64>* %__b
2339 %1 = bitcast <2 x i64> %load to <4 x i32>
2340 %2 = icmp eq <4 x i32> %0, %1
2341 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32>
2342 %4 = bitcast <8 x i1> %3 to i8
2343 ret i8 %4
2344 }
2345
2346 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2347 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
2348 ; VLX: # %bb.0: # %entry
2349 ; VLX-NEXT: kmovd %edi, %k1
2350 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
2351 ; VLX-NEXT: kmovd %k0, %eax
2352 ; VLX-NEXT: # kill: def %al killed %al killed %eax
2353 ; VLX-NEXT: retq
2354 ;
2355 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
2356 ; NoVLX: # %bb.0: # %entry
2357 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2358 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2359 ; NoVLX-NEXT: kmovw %edi, %k1
2360 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2361 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2362 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2363 ; NoVLX-NEXT: kmovw %k0, %eax
2364 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2365 ; NoVLX-NEXT: vzeroupper
2366 ; NoVLX-NEXT: retq
2367 entry:
2368 %0 = bitcast <2 x i64> %__a to <4 x i32>
2369 %1 = bitcast <2 x i64> %__b to <4 x i32>
2370 %2 = icmp eq <4 x i32> %0, %1
2371 %3 = bitcast i8 %__u to <8 x i1>
2372 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32>
2373 %4 = and <4 x i1> %2, %extract.i
2374 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32>
2375 %6 = bitcast <8 x i1> %5 to i8
2376 ret i8 %6
2377 }
2378
2379 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2380 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
2381 ; VLX: # %bb.0: # %entry
2382 ; VLX-NEXT: kmovd %edi, %k1
2383 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
2384 ; VLX-NEXT: kmovd %k0, %eax
2385 ; VLX-NEXT: # kill: def %al killed %al killed %eax
2386 ; VLX-NEXT: retq
2387 ;
2388 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
2389 ; NoVLX: # %bb.0: # %entry
2390 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2391 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2392 ; NoVLX-NEXT: kmovw %edi, %k1
2393 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2394 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2395 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2396 ; NoVLX-NEXT: kmovw %k0, %eax
2397 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2398 ; NoVLX-NEXT: vzeroupper
2399 ; NoVLX-NEXT: retq
2400 entry:
2401 %0 = bitcast <2 x i64> %__a to <4 x i32>
2402 %load = load <2 x i64>, <2 x i64>* %__b
2403 %1 = bitcast <2 x i64> %load to <4 x i32>
2404 %2 = icmp eq <4 x i32> %0, %1
2405 %3 = bitcast i8 %__u to <8 x i1>
2406 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32>
2407 %4 = and <4 x i1> %2, %extract.i
2408 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32>
2409 %6 = bitcast <8 x i1> %5 to i8
2410 ret i8 %6
2411 }
2412
2413
2414 define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
2415 ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
2416 ; VLX: # %bb.0: # %entry
2417 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
2418 ; VLX-NEXT: kmovd %k0, %eax
2419 ; VLX-NEXT: # kill: def %al killed %al killed %eax
2420 ; VLX-NEXT: retq
2421 ;
2422 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
2423 ; NoVLX: # %bb.0: # %entry
2424 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2425 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
2426 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2427 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2428 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2429 ; NoVLX-NEXT: kmovw %k0, %eax
2430 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2431 ; NoVLX-NEXT: vzeroupper
2432 ; NoVLX-NEXT: retq
2433 entry:
2434 %0 = bitcast <2 x i64> %__a to <4 x i32>
2435 %load = load i32, i32* %__b
2436 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
2437 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32>
2438 %2 = icmp eq <4 x i32> %0, %1
2439 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32>
2440 %4 = bitcast <8 x i1> %3 to i8
2441 ret i8 %4
2442 }
2443
2444 define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
2445 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
2446 ; VLX: # %bb.0: # %entry
2447 ; VLX-NEXT: kmovd %edi, %k1
2448 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
2449 ; VLX-NEXT: kmovd %k0, %eax
2450 ; VLX-NEXT: # kill: def %al killed %al killed %eax
2451 ; VLX-NEXT: retq
2452 ;
2453 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
2454 ; NoVLX: # %bb.0: # %entry
2455 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2456 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
2457 ; NoVLX-NEXT: kmovw %edi, %k1
2458 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2459 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2460 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2461 ; NoVLX-NEXT: kmovw %k0, %eax
2462 ; NoVLX-NEXT: # kill: def %al killed %al killed %eax
2463 ; NoVLX-NEXT: vzeroupper
2464 ; NoVLX-NEXT: retq
2465 entry:
2466 %0 = bitcast <2 x i64> %__a to <4 x i32>
2467 %load = load i32, i32* %__b
2468 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
2469 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32>
2470 %2 = icmp eq <4 x i32> %0, %1
2471 %3 = bitcast i8 %__u to <8 x i1>
2472 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32>
2473 %4 = and <4 x i1> %extract.i, %2
2474 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32>
2475 %6 = bitcast <8 x i1> %5 to i8
2476 ret i8 %6
2477 }
2478
2479
2480 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2481 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
2482 ; VLX: # %bb.0: # %entry
2483 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
2484 ; VLX-NEXT: kmovd %k0, %eax
2485 ; VLX-NEXT: # kill: def %ax killed %ax killed %eax
2486 ; VLX-NEXT: retq
2487 ;
2488 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
2489 ; NoVLX: # %bb.0: # %entry
2490 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2491 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2492 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2493 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2494 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2495 ; NoVLX-NEXT: kmovw %k0, %eax
2496 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2497 ; NoVLX-NEXT: vzeroupper
2498 ; NoVLX-NEXT: retq
2499 entry:
2500 %0 = bitcast <2 x i64> %__a to <4 x i32>
2501 %1 = bitcast <2 x i64> %__b to <4 x i32>
2502 %2 = icmp eq <4 x i32> %0, %1
2503 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32>
2504 %4 = bitcast <16 x i1> %3 to i16
2505 ret i16 %4
2506 }
2507
2508 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2509 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
2510 ; VLX: # %bb.0: # %entry
2511 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
2512 ; VLX-NEXT: kmovd %k0, %eax
2513 ; VLX-NEXT: # kill: def %ax killed %ax killed %eax
2514 ; VLX-NEXT: retq
2515 ;
2516 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
2517 ; NoVLX: # %bb.0: # %entry
2518 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2519 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2520 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2521 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2522 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2523 ; NoVLX-NEXT: kmovw %k0, %eax
2524 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2525 ; NoVLX-NEXT: vzeroupper
2526 ; NoVLX-NEXT: retq
2527 entry:
2528 %0 = bitcast <2 x i64> %__a to <4 x i32>
2529 %load = load <2 x i64>, <2 x i64>* %__b
2530 %1 = bitcast <2 x i64> %load to <4 x i32>
2531 %2 = icmp eq <4 x i32> %0, %1
2532 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32>
2533 %4 = bitcast <16 x i1> %3 to i16
2534 ret i16 %4
2535 }
2536
2537 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2538 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
2539 ; VLX: # %bb.0: # %entry
2540 ; VLX-NEXT: kmovd %edi, %k1
2541 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
2542 ; VLX-NEXT: kmovd %k0, %eax
2543 ; VLX-NEXT: # kill: def %ax killed %ax killed %eax
2544 ; VLX-NEXT: retq
2545 ;
2546 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
2547 ; NoVLX: # %bb.0: # %entry
2548 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2549 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2550 ; NoVLX-NEXT: kmovw %edi, %k1
2551 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2552 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2553 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2554 ; NoVLX-NEXT: kmovw %k0, %eax
2555 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2556 ; NoVLX-NEXT: vzeroupper
2557 ; NoVLX-NEXT: retq
2558 entry:
2559 %0 = bitcast <2 x i64> %__a to <4 x i32>
2560 %1 = bitcast <2 x i64> %__b to <4 x i32>
2561 %2 = icmp eq <4 x i32> %0, %1
2562 %3 = bitcast i8 %__u to <8 x i1>
2563 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32>
2564 %4 = and <4 x i1> %2, %extract.i
2565 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32>
2566 %6 = bitcast <16 x i1> %5 to i16
2567 ret i16 %6
2568 }
2569
2570 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2571 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
2572 ; VLX: # %bb.0: # %entry
2573 ; VLX-NEXT: kmovd %edi, %k1
2574 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
2575 ; VLX-NEXT: kmovd %k0, %eax
2576 ; VLX-NEXT: # kill: def %ax killed %ax killed %eax
2577 ; VLX-NEXT: retq
2578 ;
2579 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
2580 ; NoVLX: # %bb.0: # %entry
2581 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2582 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2583 ; NoVLX-NEXT: kmovw %edi, %k1
2584 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2585 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2586 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2587 ; NoVLX-NEXT: kmovw %k0, %eax
2588 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2589 ; NoVLX-NEXT: vzeroupper
2590 ; NoVLX-NEXT: retq
2591 entry:
2592 %0 = bitcast <2 x i64> %__a to <4 x i32>
2593 %load = load <2 x i64>, <2 x i64>* %__b
2594 %1 = bitcast <2 x i64> %load to <4 x i32>
2595 %2 = icmp eq <4 x i32> %0, %1
2596 %3 = bitcast i8 %__u to <8 x i1>
2597 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32>
2598 %4 = and <4 x i1> %2, %extract.i
2599 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32>
2600 %6 = bitcast <16 x i1> %5 to i16
2601 ret i16 %6
2602 }
2603
2604
2605 define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
2606 ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
2607 ; VLX: # %bb.0: # %entry
2608 ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
2609 ; VLX-NEXT: kmovd %k0, %eax
2610 ; VLX-NEXT: # kill: def %ax killed %ax killed %eax
2611 ; VLX-NEXT: retq
2612 ;
2613 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
2614 ; NoVLX: # %bb.0: # %entry
2615 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2616 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
2617 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2618 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2619 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2620 ; NoVLX-NEXT: kmovw %k0, %eax
2621 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2622 ; NoVLX-NEXT: vzeroupper
2623 ; NoVLX-NEXT: retq
2624 entry:
2625 %0 = bitcast <2 x i64> %__a to <4 x i32>
2626 %load = load i32, i32* %__b
2627 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
2628 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32>
2629 %2 = icmp eq <4 x i32> %0, %1
2630 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32>
2631 %4 = bitcast <16 x i1> %3 to i16
2632 ret i16 %4
2633 }
2634
2635 define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
2636 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
2637 ; VLX: # %bb.0: # %entry
2638 ; VLX-NEXT: kmovd %edi, %k1
2639 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
2640 ; VLX-NEXT: kmovd %k0, %eax
2641 ; VLX-NEXT: # kill: def %ax killed %ax killed %eax
2642 ; VLX-NEXT: retq
2643 ;
2644 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
2645 ; NoVLX: # %bb.0: # %entry
2646 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2647 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
2648 ; NoVLX-NEXT: kmovw %edi, %k1
2649 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2650 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0
2651 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0
2652 ; NoVLX-NEXT: kmovw %k0, %eax
2653 ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
2654 ; NoVLX-NEXT: vzeroupper
2655 ; NoVLX-NEXT: retq
2656 entry:
2657 %0 = bitcast <2 x i64> %__a to <4 x i32>
2658 %load = load i32, i32* %__b
2659 %vec = insertelement <4 x i32> undef, i32 %load, i32 0
2660 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32>
2661 %2 = icmp eq <4 x i32> %0, %1
2662 %3 = bitcast i8 %__u to <8 x i1>
2663 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32>
2664 %4 = and <4 x i1> %extract.i, %2
2665 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32>
2666 %6 = bitcast <16 x i1> %5 to i16
2667 ret i16 %6
2668 }
2669
2670
2671 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2672 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
2673 ; VLX: # %bb.0: # %entry
2674 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
2675 ; VLX-NEXT: kmovd %k0, %eax
2676 ; VLX-NEXT: retq
2677 ;
2678 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
2679 ; NoVLX: # %bb.0: # %entry
2680 ; NoVLX-NEXT: pushq %rbp
2681 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
2682 ; NoVLX-NEXT: .cfi_offset %rbp, -16
2683 ; NoVLX-NEXT: movq %rsp, %rbp
2684 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
2685 ; NoVLX-NEXT: andq $-32, %rsp
2686 ; NoVLX-NEXT: subq $32, %rsp
2687 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2688 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2689 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2690 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2691 ; NoVLX-NEXT: kmovw %k1, %eax
2692 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2693 ; NoVLX-NEXT: kmovw %k1, %ecx
2694 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2695 ; NoVLX-NEXT: kmovw %k1, %edx
2696 ; NoVLX-NEXT: kmovw %k0, %esi
2697 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
2698 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2699 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
2700 ; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
2701 ; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
2702 ; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2703 ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
2704 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2705 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
2706 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
2707 ; NoVLX-NEXT: kmovw %k0, (%rsp)
2708 ; NoVLX-NEXT: movl (%rsp), %eax
2709 ; NoVLX-NEXT: movq %rbp, %rsp
2710 ; NoVLX-NEXT: popq %rbp
2711 ; NoVLX-NEXT: vzeroupper
2712 ; NoVLX-NEXT: retq
2713 entry:
2714 %0 = bitcast <2 x i64> %__a to <4 x i32>
2715 %1 = bitcast <2 x i64> %__b to <4 x i32>
2716 %2 = icmp eq <4 x i32> %0, %1
2717 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32>
2718 %4 = bitcast <32 x i1> %3 to i32
2719 ret i32 %4
2720 }
2721
2722 define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2723 ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
2724 ; VLX: # %bb.0: # %entry
2725 ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
2726 ; VLX-NEXT: kmovd %k0, %eax
2727 ; VLX-NEXT: retq
2728 ;
2729 ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
2730 ; NoVLX: # %bb.0: # %entry
2731 ; NoVLX-NEXT: pushq %rbp
2732 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
2733 ; NoVLX-NEXT: .cfi_offset %rbp, -16
2734 ; NoVLX-NEXT: movq %rsp, %rbp
2735 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
2736 ; NoVLX-NEXT: andq $-32, %rsp
2737 ; NoVLX-NEXT: subq $32, %rsp
2738 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2739 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
2740 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
2741 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2742 ; NoVLX-NEXT: kmovw %k1, %eax
2743 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2744 ; NoVLX-NEXT: kmovw %k1, %ecx
2745 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2746 ; NoVLX-NEXT: kmovw %k1, %edx
2747 ; NoVLX-NEXT: kmovw %k0, %esi
2748 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
2749 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2750 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
2751 ; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
2752 ; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
2753 ; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2754 ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
2755 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2756 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
2757 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
2758 ; NoVLX-NEXT: kmovw %k0, (%rsp)
2759 ; NoVLX-NEXT: movl (%rsp), %eax
2760 ; NoVLX-NEXT: movq %rbp, %rsp
2761 ; NoVLX-NEXT: popq %rbp
2762 ; NoVLX-NEXT: vzeroupper
2763 ; NoVLX-NEXT: retq
2764 entry:
2765 %0 = bitcast <2 x i64> %__a to <4 x i32>
2766 %load = load <2 x i64>, <2 x i64>* %__b
2767 %1 = bitcast <2 x i64> %load to <4 x i32>
2768 %2 = icmp eq <4 x i32> %0, %1
2769 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32>
2770 %4 = bitcast <32 x i1> %3 to i32
2771 ret i32 %4
2772 }
2773
2774 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
2775 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
2776 ; VLX: # %bb.0: # %entry
2777 ; VLX-NEXT: kmovd %edi, %k1
2778 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
2779 ; VLX-NEXT: kmovd %k0, %eax
2780 ; VLX-NEXT: retq
2781 ;
2782 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
2783 ; NoVLX: # %bb.0: # %entry
2784 ; NoVLX-NEXT: pushq %rbp
2785 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
2786 ; NoVLX-NEXT: .cfi_offset %rbp, -16
2787 ; NoVLX-NEXT: movq %rsp, %rbp
2788 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
2789 ; NoVLX-NEXT: andq $-32, %rsp
2790 ; NoVLX-NEXT: subq $32, %rsp
2791 ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
2792 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2793 ; NoVLX-NEXT: kmovw %edi, %k1
2794 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2795 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2796 ; NoVLX-NEXT: kmovw %k1, %eax
2797 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2798 ; NoVLX-NEXT: kmovw %k1, %ecx
2799 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2800 ; NoVLX-NEXT: kmovw %k1, %edx
2801 ; NoVLX-NEXT: kmovw %k0, %esi
2802 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
2803 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2804 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
2805 ; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
2806 ; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
2807 ; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2808 ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
2809 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2810 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
2811 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
2812 ; NoVLX-NEXT: kmovw %k0, (%rsp)
2813 ; NoVLX-NEXT: movl (%rsp), %eax
2814 ; NoVLX-NEXT: movq %rbp, %rsp
2815 ; NoVLX-NEXT: popq %rbp
2816 ; NoVLX-NEXT: vzeroupper
2817 ; NoVLX-NEXT: retq
2818 entry:
2819 %0 = bitcast <2 x i64> %__a to <4 x i32>
2820 %1 = bitcast <2 x i64> %__b to <4 x i32>
2821 %2 = icmp eq <4 x i32> %0, %1
2822 %3 = bitcast i8 %__u to <8 x i1>
2823 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32>
2824 %4 = and <4 x i1> %2, %extract.i
2825 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32>
2826 %6 = bitcast <32 x i1> %5 to i32
2827 ret i32 %6
2828 }
2829
2830 define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
2831 ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
2832 ; VLX: # %bb.0: # %entry
2833 ; VLX-NEXT: kmovd %edi, %k1
2834 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
2835 ; VLX-NEXT: kmovd %k0, %eax
2836 ; VLX-NEXT: retq
2837 ;
2838 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
2839 ; NoVLX: # %bb.0: # %entry
2840 ; NoVLX-NEXT: pushq %rbp
2841 ; NoVLX-NEXT: .cfi_def_cfa_offset 16
2842 ; NoVLX-NEXT: .cfi_offset %rbp, -16
2843 ; NoVLX-NEXT: movq %rsp, %rbp
2844 ; NoVLX-NEXT: .cfi_def_cfa_register %rbp
2845 ; NoVLX-NEXT: andq $-32, %rsp
2846 ; NoVLX-NEXT: subq $32, %rsp
2847 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
2848 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
2849 ; NoVLX-NEXT: kmovw %edi, %k1
2850 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
2851 ; NoVLX-NEXT: kshiftrw $3, %k0, %k1
2852 ; NoVLX-NEXT: kmovw %k1, %eax
2853 ; NoVLX-NEXT: kshiftrw $2, %k0, %k1
2854 ; NoVLX-NEXT: kmovw %k1, %ecx
2855 ; NoVLX-NEXT: kshiftrw $1, %k0, %k1
2856 ; NoVLX-NEXT: kmovw %k1, %edx
2857 ; NoVLX-NEXT: kmovw %k0, %esi
2858 ; NoVLX-NEXT: kxorw %k0, %k0, %k0
2859 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
2860 ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
2861 ; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
2862 ; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
2863 ; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2864 ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
2865 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
2866 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
2867 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
2868 ; NoVLX-NEXT: kmovw %k0, (%rsp)
2869 ; NoVLX-NEXT: movl (%rsp), %eax
2870 ; NoVLX-NEXT: movq %rbp, %rsp
2871 ; NoVLX-NEXT: popq %rbp
2872 ; NoVLX-NEXT: vzeroupper
2873 ; NoVLX-NEXT: retq
2874 entry:
2875 %0 = bitcast <2 x i64> %__a to <4 x i32>
2876 %load = load <2 x i64>, <2 x i64>* %__b
2877 %1 = bitcast <2 x i64> %load to <4 x i32>
2878 %2 = icmp eq <4 x i32> %0, %1
2879 %3 = bitcast i8 %__u to <8 x i1>
2880 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32>
2881 %4 = and <4 x i1> %2, %extract.i
2882 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32>
2883 %6 = bitcast <32 x i1> %5 to i32
2884 ret i32 %6
2885 }
2886
2887
2888