llvm.org GIT mirror llvm / b237097
[X86][AVX512BW] Vectorize v64i8 vector shifts Differential Revision: https://reviews.llvm.org/D28447 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291665 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
8 changed file(s) with 212 addition(s) and 3118 deletion(s). Raw diff Collapse all Expand all
2175121751 }
2175221752
2175321753 if (VT == MVT::v16i8 ||
21754 (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) {
21754 (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) ||
21755 (VT == MVT::v64i8 && Subtarget.hasBWI())) {
2175521756 MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
2175621757 unsigned ShiftOpcode = Op->getOpcode();
2175721758
2175821759 auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
21759 // On SSE41 targets we make use of the fact that VSELECT lowers
21760 // to PBLENDVB which selects bytes based just on the sign bit.
21761 if (Subtarget.hasSSE41()) {
21760 if (VT.is512BitVector()) {
21761 // On AVX512BW targets we make use of the fact that VSELECT lowers
21762 // to a masked blend which selects bytes based just on the sign bit
21763 // extracted to a mask.
21764 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
21765 V0 = DAG.getBitcast(VT, V0);
21766 V1 = DAG.getBitcast(VT, V1);
21767 Sel = DAG.getBitcast(VT, Sel);
21768 Sel = DAG.getNode(X86ISD::CVT2MASK, dl, MaskVT, Sel);
21769 return DAG.getBitcast(SelVT,
21770 DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1));
21771 } else if (Subtarget.hasSSE41()) {
21772 // On SSE41 targets we make use of the fact that VSELECT lowers
21773 // to PBLENDVB which selects bytes based just on the sign bit.
2176221774 V0 = DAG.getBitcast(VT, V0);
2176321775 V1 = DAG.getBitcast(VT, V1);
2176421776 Sel = DAG.getBitcast(VT, Sel);
321321 { ISD::SHL, MVT::v32i16, 1 }, // vpsllvw
322322 { ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw
323323 { ISD::SRA, MVT::v32i16, 1 }, // vpsravw
324
325 { ISD::SHL, MVT::v64i8, 11 }, // vpblendvb sequence.
326 { ISD::SRL, MVT::v64i8, 11 }, // vpblendvb sequence.
327 { ISD::SRA, MVT::v64i8, 24 }, // vpblendvb sequence.
324328
325329 { ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence.
326330 { ISD::MUL, MVT::v32i8, 4 }, // extend/pmullw/trunc sequence.
164164 ; AVX: Found an estimated cost of 96 for instruction: %shift
165165 ; AVX2: Found an estimated cost of 48 for instruction: %shift
166166 ; AVX512F: Found an estimated cost of 48 for instruction: %shift
167 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
167 ; AVX512BW: Found an estimated cost of 24 for instruction: %shift
168168 ; AVX512VL: Found an estimated cost of 48 for instruction: %shift
169 ; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift
169 ; AVX512BWVL: Found an estimated cost of 24 for instruction: %shift
170170 ; XOP: Found an estimated cost of 8 for instruction: %shift
171171 %shift = ashr <64 x i8> %a, %b
172172 ret <64 x i8> %shift
332332 ; AVX: Found an estimated cost of 96 for instruction: %shift
333333 ; AVX2: Found an estimated cost of 48 for instruction: %shift
334334 ; AVX512F: Found an estimated cost of 48 for instruction: %shift
335 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
335 ; AVX512BW: Found an estimated cost of 24 for instruction: %shift
336336 ; AVX512VL: Found an estimated cost of 48 for instruction: %shift
337 ; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift
337 ; AVX512BWVL: Found an estimated cost of 24 for instruction: %shift
338338 ; XOP: Found an estimated cost of 8 for instruction: %shift
339339 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
340340 %shift = ashr <64 x i8> %a, %splat
490490 ; AVX: Found an estimated cost of 96 for instruction: %shift
491491 ; AVX2: Found an estimated cost of 48 for instruction: %shift
492492 ; AVX512F: Found an estimated cost of 48 for instruction: %shift
493 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
493 ; AVX512BW: Found an estimated cost of 24 for instruction: %shift
494494 ; AVX512VL: Found an estimated cost of 48 for instruction: %shift
495 ; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift
495 ; AVX512BWVL: Found an estimated cost of 24 for instruction: %shift
496496 ; XOP: Found an estimated cost of 8 for instruction: %shift
497497 %shift = ashr <64 x i8> %a,
498498 ret <64 x i8> %shift
164164 ; AVX: Found an estimated cost of 48 for instruction: %shift
165165 ; AVX2: Found an estimated cost of 22 for instruction: %shift
166166 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
167 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
167 ; AVX512BW: Found an estimated cost of 11 for instruction: %shift
168168 ; AVX512VL: Found an estimated cost of 22 for instruction: %shift
169 ; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift
169 ; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift
170170 ; XOP: Found an estimated cost of 8 for instruction: %shift
171171 %shift = lshr <64 x i8> %a, %b
172172 ret <64 x i8> %shift
335335 ; AVX: Found an estimated cost of 48 for instruction: %shift
336336 ; AVX2: Found an estimated cost of 22 for instruction: %shift
337337 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
338 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
338 ; AVX512BW: Found an estimated cost of 11 for instruction: %shift
339339 ; AVX512VL: Found an estimated cost of 22 for instruction: %shift
340 ; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift
340 ; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift
341341 ; XOP: Found an estimated cost of 8 for instruction: %shift
342342 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
343343 %shift = lshr <64 x i8> %a, %splat
496496 ; AVX: Found an estimated cost of 48 for instruction: %shift
497497 ; AVX2: Found an estimated cost of 22 for instruction: %shift
498498 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
499 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
499 ; AVX512BW: Found an estimated cost of 11 for instruction: %shift
500500 ; AVX512VL: Found an estimated cost of 22 for instruction: %shift
501 ; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift
501 ; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift
502502 ; XOP: Found an estimated cost of 8 for instruction: %shift
503503 %shift = lshr <64 x i8> %a,
504504 ret <64 x i8> %shift
163163 ; AVX: Found an estimated cost of 44 for instruction: %shift
164164 ; AVX2: Found an estimated cost of 22 for instruction: %shift
165165 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
166 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
166 ; AVX512BW: Found an estimated cost of 11 for instruction: %shift
167167 ; AVX512VL: Found an estimated cost of 22 for instruction: %shift
168 ; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift
168 ; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift
169169 ; XOP: Found an estimated cost of 4 for instruction: %shift
170170 %shift = shl <64 x i8> %a, %b
171171 ret <64 x i8> %shift
334334 ; AVX: Found an estimated cost of 44 for instruction: %shift
335335 ; AVX2: Found an estimated cost of 22 for instruction: %shift
336336 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
337 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
337 ; AVX512BW: Found an estimated cost of 11 for instruction: %shift
338338 ; AVX512VL: Found an estimated cost of 22 for instruction: %shift
339 ; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift
339 ; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift
340340 ; XOP: Found an estimated cost of 4 for instruction: %shift
341341 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
342342 %shift = shl <64 x i8> %a, %splat
497497 ; AVX: Found an estimated cost of 44 for instruction: %shift
498498 ; AVX2: Found an estimated cost of 22 for instruction: %shift
499499 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
500 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
500 ; AVX512BW: Found an estimated cost of 11 for instruction: %shift
501501 ; AVX512VL: Found an estimated cost of 22 for instruction: %shift
502 ; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift
502 ; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift
503503 ; XOP: Found an estimated cost of 4 for instruction: %shift
504504 %shift = shl <64 x i8> %a,
505505 ret <64 x i8> %shift
9999 ;
100100 ; AVX512BW-LABEL: var_shift_v64i8:
101101 ; AVX512BW: # BB#0:
102 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
103 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %ecx
104 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3
105 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
106 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
107 ; AVX512BW-NEXT: sarb %cl, %al
108 ; AVX512BW-NEXT: movzbl %al, %eax
109 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx
110 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
111 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
112 ; AVX512BW-NEXT: sarb %cl, %dl
113 ; AVX512BW-NEXT: movzbl %dl, %ecx
114 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
115 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
116 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %ecx
117 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax
118 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
119 ; AVX512BW-NEXT: sarb %cl, %al
120 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %ecx
121 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx
122 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
123 ; AVX512BW-NEXT: sarb %cl, %dl
124 ; AVX512BW-NEXT: movzbl %al, %eax
125 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
126 ; AVX512BW-NEXT: movzbl %dl, %eax
127 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %ecx
128 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx
129 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
130 ; AVX512BW-NEXT: sarb %cl, %dl
131 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
132 ; AVX512BW-NEXT: movzbl %dl, %eax
133 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
134 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %ecx
135 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax
136 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
137 ; AVX512BW-NEXT: sarb %cl, %al
138 ; AVX512BW-NEXT: movzbl %al, %eax
139 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
140 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %ecx
141 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax
142 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
143 ; AVX512BW-NEXT: sarb %cl, %al
144 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %ecx
145 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx
146 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
147 ; AVX512BW-NEXT: sarb %cl, %dl
148 ; AVX512BW-NEXT: movzbl %al, %eax
149 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
150 ; AVX512BW-NEXT: movzbl %dl, %eax
151 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %ecx
152 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx
153 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
154 ; AVX512BW-NEXT: sarb %cl, %dl
155 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
156 ; AVX512BW-NEXT: movzbl %dl, %eax
157 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
158 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %ecx
159 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax
160 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
161 ; AVX512BW-NEXT: sarb %cl, %al
162 ; AVX512BW-NEXT: movzbl %al, %eax
163 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
164 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %ecx
165 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax
166 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
167 ; AVX512BW-NEXT: sarb %cl, %al
168 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %ecx
169 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx
170 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
171 ; AVX512BW-NEXT: sarb %cl, %dl
172 ; AVX512BW-NEXT: movzbl %al, %eax
173 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
174 ; AVX512BW-NEXT: movzbl %dl, %eax
175 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %ecx
176 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx
177 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
178 ; AVX512BW-NEXT: sarb %cl, %dl
179 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
180 ; AVX512BW-NEXT: movzbl %dl, %eax
181 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
182 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %ecx
183 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax
184 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
185 ; AVX512BW-NEXT: sarb %cl, %al
186 ; AVX512BW-NEXT: movzbl %al, %eax
187 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
188 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %ecx
189 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax
190 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
191 ; AVX512BW-NEXT: sarb %cl, %al
192 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %ecx
193 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx
194 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
195 ; AVX512BW-NEXT: sarb %cl, %dl
196 ; AVX512BW-NEXT: movzbl %al, %eax
197 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2
198 ; AVX512BW-NEXT: movzbl %dl, %eax
199 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
200 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
201 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
202 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4
203 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax
204 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
205 ; AVX512BW-NEXT: sarb %cl, %al
206 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
207 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx
208 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
209 ; AVX512BW-NEXT: sarb %cl, %dl
210 ; AVX512BW-NEXT: movzbl %al, %eax
211 ; AVX512BW-NEXT: movzbl %dl, %ecx
212 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
213 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
214 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
215 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %eax
216 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
217 ; AVX512BW-NEXT: sarb %cl, %al
218 ; AVX512BW-NEXT: movzbl %al, %eax
219 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
220 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
221 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %eax
222 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
223 ; AVX512BW-NEXT: sarb %cl, %al
224 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
225 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %edx
226 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
227 ; AVX512BW-NEXT: sarb %cl, %dl
228 ; AVX512BW-NEXT: movzbl %al, %eax
229 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
230 ; AVX512BW-NEXT: movzbl %dl, %eax
231 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
232 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %edx
233 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
234 ; AVX512BW-NEXT: sarb %cl, %dl
235 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
236 ; AVX512BW-NEXT: movzbl %dl, %eax
237 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
238 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
239 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %eax
240 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
241 ; AVX512BW-NEXT: sarb %cl, %al
242 ; AVX512BW-NEXT: movzbl %al, %eax
243 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
244 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
245 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %eax
246 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
247 ; AVX512BW-NEXT: sarb %cl, %al
248 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
249 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %edx
250 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
251 ; AVX512BW-NEXT: sarb %cl, %dl
252 ; AVX512BW-NEXT: movzbl %al, %eax
253 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
254 ; AVX512BW-NEXT: movzbl %dl, %eax
255 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
256 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %edx
257 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
258 ; AVX512BW-NEXT: sarb %cl, %dl
259 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
260 ; AVX512BW-NEXT: movzbl %dl, %eax
261 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
262 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
263 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %eax
264 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
265 ; AVX512BW-NEXT: sarb %cl, %al
266 ; AVX512BW-NEXT: movzbl %al, %eax
267 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
268 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
269 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %eax
270 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
271 ; AVX512BW-NEXT: sarb %cl, %al
272 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
273 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %edx
274 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
275 ; AVX512BW-NEXT: sarb %cl, %dl
276 ; AVX512BW-NEXT: movzbl %al, %eax
277 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
278 ; AVX512BW-NEXT: movzbl %dl, %eax
279 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
280 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %edx
281 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
282 ; AVX512BW-NEXT: sarb %cl, %dl
283 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
284 ; AVX512BW-NEXT: movzbl %dl, %eax
285 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
286 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
287 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %eax
288 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
289 ; AVX512BW-NEXT: sarb %cl, %al
290 ; AVX512BW-NEXT: movzbl %al, %eax
291 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
292 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
293 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax
294 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
295 ; AVX512BW-NEXT: sarb %cl, %al
296 ; AVX512BW-NEXT: movzbl %al, %eax
297 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
298 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm3
299 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
300 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm4
301 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax
302 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
303 ; AVX512BW-NEXT: sarb %cl, %al
304 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2
305 ; AVX512BW-NEXT: movzbl %al, %eax
306 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
307 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx
308 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
309 ; AVX512BW-NEXT: sarb %cl, %dl
310 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
311 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %esi
312 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
313 ; AVX512BW-NEXT: sarb %cl, %sil
314 ; AVX512BW-NEXT: movzbl %dl, %ecx
315 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
316 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
317 ; AVX512BW-NEXT: movzbl %sil, %eax
318 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
319 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %edx
320 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
321 ; AVX512BW-NEXT: sarb %cl, %dl
322 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
323 ; AVX512BW-NEXT: movzbl %dl, %eax
324 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
325 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
326 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %eax
327 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
328 ; AVX512BW-NEXT: sarb %cl, %al
329 ; AVX512BW-NEXT: movzbl %al, %eax
330 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
331 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
332 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %eax
333 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
334 ; AVX512BW-NEXT: sarb %cl, %al
335 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
336 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %edx
337 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
338 ; AVX512BW-NEXT: sarb %cl, %dl
339 ; AVX512BW-NEXT: movzbl %al, %eax
340 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
341 ; AVX512BW-NEXT: movzbl %dl, %eax
342 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
343 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %edx
344 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
345 ; AVX512BW-NEXT: sarb %cl, %dl
346 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
347 ; AVX512BW-NEXT: movzbl %dl, %eax
348 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
349 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
350 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %eax
351 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
352 ; AVX512BW-NEXT: sarb %cl, %al
353 ; AVX512BW-NEXT: movzbl %al, %eax
354 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
355 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
356 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %eax
357 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
358 ; AVX512BW-NEXT: sarb %cl, %al
359 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
360 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %edx
361 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
362 ; AVX512BW-NEXT: sarb %cl, %dl
363 ; AVX512BW-NEXT: movzbl %al, %eax
364 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
365 ; AVX512BW-NEXT: movzbl %dl, %eax
366 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
367 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %edx
368 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
369 ; AVX512BW-NEXT: sarb %cl, %dl
370 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
371 ; AVX512BW-NEXT: movzbl %dl, %eax
372 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
373 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
374 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %eax
375 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
376 ; AVX512BW-NEXT: sarb %cl, %al
377 ; AVX512BW-NEXT: movzbl %al, %eax
378 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
379 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
380 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %eax
381 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
382 ; AVX512BW-NEXT: sarb %cl, %al
383 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
384 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %edx
385 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
386 ; AVX512BW-NEXT: sarb %cl, %dl
387 ; AVX512BW-NEXT: movzbl %al, %eax
388 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
389 ; AVX512BW-NEXT: movzbl %dl, %eax
390 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
391 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %edx
392 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
393 ; AVX512BW-NEXT: sarb %cl, %dl
394 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3
395 ; AVX512BW-NEXT: movzbl %dl, %eax
396 ; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx
397 ; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx
398 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
399 ; AVX512BW-NEXT: sarb %cl, %dl
400 ; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
401 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi
402 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
403 ; AVX512BW-NEXT: sarb %cl, %sil
404 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
405 ; AVX512BW-NEXT: movzbl %dl, %eax
406 ; AVX512BW-NEXT: movzbl %sil, %ecx
407 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
408 ; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx
409 ; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx
410 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
411 ; AVX512BW-NEXT: sarb %cl, %dl
412 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
413 ; AVX512BW-NEXT: movzbl %dl, %eax
414 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
415 ; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx
416 ; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
417 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
418 ; AVX512BW-NEXT: sarb %cl, %al
419 ; AVX512BW-NEXT: movzbl %al, %eax
420 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
421 ; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx
422 ; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
423 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
424 ; AVX512BW-NEXT: sarb %cl, %al
425 ; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx
426 ; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx
427 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
428 ; AVX512BW-NEXT: sarb %cl, %dl
429 ; AVX512BW-NEXT: movzbl %al, %eax
430 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
431 ; AVX512BW-NEXT: movzbl %dl, %eax
432 ; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx
433 ; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx
434 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
435 ; AVX512BW-NEXT: sarb %cl, %dl
436 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
437 ; AVX512BW-NEXT: movzbl %dl, %eax
438 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
439 ; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx
440 ; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
441 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
442 ; AVX512BW-NEXT: sarb %cl, %al
443 ; AVX512BW-NEXT: movzbl %al, %eax
444 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
445 ; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx
446 ; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
447 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
448 ; AVX512BW-NEXT: sarb %cl, %al
449 ; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx
450 ; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx
451 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
452 ; AVX512BW-NEXT: sarb %cl, %dl
453 ; AVX512BW-NEXT: movzbl %al, %eax
454 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
455 ; AVX512BW-NEXT: movzbl %dl, %eax
456 ; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx
457 ; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx
458 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
459 ; AVX512BW-NEXT: sarb %cl, %dl
460 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
461 ; AVX512BW-NEXT: movzbl %dl, %eax
462 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
463 ; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx
464 ; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
465 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
466 ; AVX512BW-NEXT: sarb %cl, %al
467 ; AVX512BW-NEXT: movzbl %al, %eax
468 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
469 ; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx
470 ; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
471 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
472 ; AVX512BW-NEXT: sarb %cl, %al
473 ; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx
474 ; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx
475 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
476 ; AVX512BW-NEXT: sarb %cl, %dl
477 ; AVX512BW-NEXT: movzbl %al, %eax
478 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
479 ; AVX512BW-NEXT: movzbl %dl, %eax
480 ; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx
481 ; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx
482 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
483 ; AVX512BW-NEXT: sarb %cl, %dl
484 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
485 ; AVX512BW-NEXT: movzbl %dl, %eax
486 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
487 ; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx
488 ; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
489 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
490 ; AVX512BW-NEXT: sarb %cl, %al
491 ; AVX512BW-NEXT: movzbl %al, %eax
492 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0
493 ; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
494 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
102 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
103 ; AVX512BW-NEXT: vpsraw $4, %zmm2, %zmm3
104 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
105 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
106 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
107 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
108 ; AVX512BW-NEXT: vpsraw $2, %zmm2, %zmm3
109 ; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
110 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
111 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
112 ; AVX512BW-NEXT: vpsraw $1, %zmm2, %zmm3
113 ; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
114 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
115 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
116 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
117 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
118 ; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm3
119 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
120 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
121 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
122 ; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm3
123 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
124 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
125 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
126 ; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm3
127 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
128 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
129 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
130 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
131 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0
495132 ; AVX512BW-NEXT: retq
496133 %shift = ashr <64 x i8> %a, %b
497134 ret <64 x i8> %shift
590227 ; AVX512BW-LABEL: splatvar_shift_v64i8:
591228 ; AVX512BW: # BB#0:
592229 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
593 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm2
594 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
595 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm3
596 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
597 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
598 ; AVX512BW-NEXT: sarb %cl, %al
599 ; AVX512BW-NEXT: movzbl %al, %eax
600 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %edx
601 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
602 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
603 ; AVX512BW-NEXT: sarb %cl, %dl
604 ; AVX512BW-NEXT: movzbl %dl, %ecx
605 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
606 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
607 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
608 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
609 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
610 ; AVX512BW-NEXT: sarb %cl, %al
611 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx
612 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
613 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
614 ; AVX512BW-NEXT: sarb %cl, %dl
615 ; AVX512BW-NEXT: movzbl %al, %eax
616 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
617 ; AVX512BW-NEXT: movzbl %dl, %eax
618 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx
619 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
620 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
621 ; AVX512BW-NEXT: sarb %cl, %dl
622 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
623 ; AVX512BW-NEXT: movzbl %dl, %eax
624 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
625 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
626 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
627 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
628 ; AVX512BW-NEXT: sarb %cl, %al
629 ; AVX512BW-NEXT: movzbl %al, %eax
630 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
631 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
632 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
633 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
634 ; AVX512BW-NEXT: sarb %cl, %al
635 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx
636 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
637 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
638 ; AVX512BW-NEXT: sarb %cl, %dl
639 ; AVX512BW-NEXT: movzbl %al, %eax
640 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
641 ; AVX512BW-NEXT: movzbl %dl, %eax
642 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx
643 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
644 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
645 ; AVX512BW-NEXT: sarb %cl, %dl
646 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
647 ; AVX512BW-NEXT: movzbl %dl, %eax
648 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
649 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
650 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
651 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
652 ; AVX512BW-NEXT: sarb %cl, %al
653 ; AVX512BW-NEXT: movzbl %al, %eax
654 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
655 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
656 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
657 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
658 ; AVX512BW-NEXT: sarb %cl, %al
659 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx
660 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
661 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
662 ; AVX512BW-NEXT: sarb %cl, %dl
663 ; AVX512BW-NEXT: movzbl %al, %eax
664 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
665 ; AVX512BW-NEXT: movzbl %dl, %eax
666 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx
667 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
668 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
669 ; AVX512BW-NEXT: sarb %cl, %dl
670 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
671 ; AVX512BW-NEXT: movzbl %dl, %eax
672 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
673 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
674 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
675 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
676 ; AVX512BW-NEXT: sarb %cl, %al
677 ; AVX512BW-NEXT: movzbl %al, %eax
678 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
679 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
680 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
681 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
682 ; AVX512BW-NEXT: sarb %cl, %al
683 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx
684 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
685 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
686 ; AVX512BW-NEXT: sarb %cl, %dl
687 ; AVX512BW-NEXT: movzbl %al, %eax
688 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2
689 ; AVX512BW-NEXT: movzbl %dl, %eax
690 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
691 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3
692 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
693 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4
694 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx
695 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
696 ; AVX512BW-NEXT: sarb %cl, %al
697 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
698 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx
699 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
700 ; AVX512BW-NEXT: sarb %cl, %dl
701 ; AVX512BW-NEXT: movzbl %al, %eax
702 ; AVX512BW-NEXT: movzbl %dl, %ecx
703 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
704 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
705 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax
706 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx
707 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
708 ; AVX512BW-NEXT: sarb %cl, %al
709 ; AVX512BW-NEXT: movzbl %al, %eax
710 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
711 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %eax
712 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx
713 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
714 ; AVX512BW-NEXT: sarb %cl, %al
715 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx
716 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx
717 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
718 ; AVX512BW-NEXT: sarb %cl, %dl
719 ; AVX512BW-NEXT: movzbl %al, %eax
720 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
721 ; AVX512BW-NEXT: movzbl %dl, %eax
722 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %edx
723 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx
724 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
725 ; AVX512BW-NEXT: sarb %cl, %dl
726 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
727 ; AVX512BW-NEXT: movzbl %dl, %eax
728 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
729 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax
730 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx
731 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
732 ; AVX512BW-NEXT: sarb %cl, %al
733 ; AVX512BW-NEXT: movzbl %al, %eax
734 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
735 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %eax
736 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx
737 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
738 ; AVX512BW-NEXT: sarb %cl, %al
739 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx
740 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx
741 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
742 ; AVX512BW-NEXT: sarb %cl, %dl
743 ; AVX512BW-NEXT: movzbl %al, %eax
744 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
745 ; AVX512BW-NEXT: movzbl %dl, %eax
746 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %edx
747 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx
748 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
749 ; AVX512BW-NEXT: sarb %cl, %dl
750 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
751 ; AVX512BW-NEXT: movzbl %dl, %eax
752 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
753 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax
754 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx
755 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
756 ; AVX512BW-NEXT: sarb %cl, %al
757 ; AVX512BW-NEXT: movzbl %al, %eax
758 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
759 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %eax
760 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx
761 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
762 ; AVX512BW-NEXT: sarb %cl, %al
763 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx
764 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx
765 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
766 ; AVX512BW-NEXT: sarb %cl, %dl
767 ; AVX512BW-NEXT: movzbl %al, %eax
768 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
769 ; AVX512BW-NEXT: movzbl %dl, %eax
770 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %edx
771 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx
772 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
773 ; AVX512BW-NEXT: sarb %cl, %dl
774 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
775 ; AVX512BW-NEXT: movzbl %dl, %eax
776 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
777 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax
778 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx
779 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
780 ; AVX512BW-NEXT: sarb %cl, %al
781 ; AVX512BW-NEXT: movzbl %al, %eax
782 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
783 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %eax
784 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx
785 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
786 ; AVX512BW-NEXT: sarb %cl, %al
787 ; AVX512BW-NEXT: movzbl %al, %eax
788 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
789 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm3
790 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
791 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm4
792 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx
793 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
794 ; AVX512BW-NEXT: sarb %cl, %al
795 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2
796 ; AVX512BW-NEXT: movzbl %al, %eax
797 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
798 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx
799 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
800 ; AVX512BW-NEXT: sarb %cl, %dl
801 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %esi
802 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx
803 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
804 ; AVX512BW-NEXT: sarb %cl, %sil
805 ; AVX512BW-NEXT: movzbl %dl, %ecx
806 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
807 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
808 ; AVX512BW-NEXT: movzbl %sil, %eax
809 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx
810 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx
811 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
812 ; AVX512BW-NEXT: sarb %cl, %dl
813 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
814 ; AVX512BW-NEXT: movzbl %dl, %eax
815 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
816 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %eax
817 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx
818 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
819 ; AVX512BW-NEXT: sarb %cl, %al
820 ; AVX512BW-NEXT: movzbl %al, %eax
821 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
822 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax
823 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx
824 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
825 ; AVX512BW-NEXT: sarb %cl, %al
826 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %edx
827 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx
828 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
829 ; AVX512BW-NEXT: sarb %cl, %dl
830 ; AVX512BW-NEXT: movzbl %al, %eax
831 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
832 ; AVX512BW-NEXT: movzbl %dl, %eax
833 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx
834 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx
835 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
836 ; AVX512BW-NEXT: sarb %cl, %dl
837 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
838 ; AVX512BW-NEXT: movzbl %dl, %eax
839 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
840 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %eax
841 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx
842 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
843 ; AVX512BW-NEXT: sarb %cl, %al
844 ; AVX512BW-NEXT: movzbl %al, %eax
845 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
846 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax
847 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx
848 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
849 ; AVX512BW-NEXT: sarb %cl, %al
850 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %edx
851 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx
852 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
853 ; AVX512BW-NEXT: sarb %cl, %dl
854 ; AVX512BW-NEXT: movzbl %al, %eax
855 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
856 ; AVX512BW-NEXT: movzbl %dl, %eax
857 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx
858 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx
859 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
860 ; AVX512BW-NEXT: sarb %cl, %dl
861 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
862 ; AVX512BW-NEXT: movzbl %dl, %eax
863 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
864 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %eax
865 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx
866 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
867 ; AVX512BW-NEXT: sarb %cl, %al
868 ; AVX512BW-NEXT: movzbl %al, %eax
869 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
870 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax
871 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx
872 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
873 ; AVX512BW-NEXT: sarb %cl, %al
874 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %edx
875 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx
876 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
877 ; AVX512BW-NEXT: sarb %cl, %dl
878 ; AVX512BW-NEXT: movzbl %al, %eax
879 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
880 ; AVX512BW-NEXT: movzbl %dl, %eax
881 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx
882 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx
883 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
884 ; AVX512BW-NEXT: sarb %cl, %dl
885 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3
886 ; AVX512BW-NEXT: movzbl %dl, %eax
887 ; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx
888 ; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx
889 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
890 ; AVX512BW-NEXT: sarb %cl, %dl
891 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi
892 ; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
893 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
894 ; AVX512BW-NEXT: sarb %cl, %sil
895 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
896 ; AVX512BW-NEXT: movzbl %dl, %eax
897 ; AVX512BW-NEXT: movzbl %sil, %ecx
898 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
899 ; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx
900 ; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx
901 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
902 ; AVX512BW-NEXT: sarb %cl, %dl
903 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
904 ; AVX512BW-NEXT: movzbl %dl, %eax
905 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
906 ; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
907 ; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx
908 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
909 ; AVX512BW-NEXT: sarb %cl, %al
910 ; AVX512BW-NEXT: movzbl %al, %eax
911 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
912 ; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
913 ; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx
914 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
915 ; AVX512BW-NEXT: sarb %cl, %al
916 ; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx
917 ; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx
918 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
919 ; AVX512BW-NEXT: sarb %cl, %dl
920 ; AVX512BW-NEXT: movzbl %al, %eax
921 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
922 ; AVX512BW-NEXT: movzbl %dl, %eax
923 ; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx
924 ; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx
925 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
926 ; AVX512BW-NEXT: sarb %cl, %dl
927 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
928 ; AVX512BW-NEXT: movzbl %dl, %eax
929 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
930 ; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
931 ; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx
932 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
933 ; AVX512BW-NEXT: sarb %cl, %al
934 ; AVX512BW-NEXT: movzbl %al, %eax
935 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
936 ; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
937 ; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx
938 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
939 ; AVX512BW-NEXT: sarb %cl, %al
940 ; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx
941 ; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx
942 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
943 ; AVX512BW-NEXT: sarb %cl, %dl
944 ; AVX512BW-NEXT: movzbl %al, %eax
945 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
946 ; AVX512BW-NEXT: movzbl %dl, %eax
947 ; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx
948 ; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx
949 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
950 ; AVX512BW-NEXT: sarb %cl, %dl
951 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
952 ; AVX512BW-NEXT: movzbl %dl, %eax
953 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
954 ; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
955 ; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx
956 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
957 ; AVX512BW-NEXT: sarb %cl, %al
958 ; AVX512BW-NEXT: movzbl %al, %eax
959 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
960 ; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
961 ; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx
962 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
963 ; AVX512BW-NEXT: sarb %cl, %al
964 ; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx
965 ; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx
966 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
967 ; AVX512BW-NEXT: sarb %cl, %dl
968 ; AVX512BW-NEXT: movzbl %al, %eax
969 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
970 ; AVX512BW-NEXT: movzbl %dl, %eax
971 ; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx
972 ; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx
973 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
974 ; AVX512BW-NEXT: sarb %cl, %dl
975 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
976 ; AVX512BW-NEXT: movzbl %dl, %eax
977 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
978 ; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
979 ; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx
980 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
981 ; AVX512BW-NEXT: sarb %cl, %al
982 ; AVX512BW-NEXT: movzbl %al, %eax
983 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0
984 ; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
985 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
230 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
231 ; AVX512BW-NEXT: vpsraw $4, %zmm2, %zmm3
232 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
233 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
234 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
235 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
236 ; AVX512BW-NEXT: vpsraw $2, %zmm2, %zmm3
237 ; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
238 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
239 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
240 ; AVX512BW-NEXT: vpsraw $1, %zmm2, %zmm3
241 ; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
242 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
243 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1}
244 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2
245 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
246 ; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm3
247 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
248 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
249 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
250 ; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm3
251 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
252 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
253 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
254 ; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm3
255 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm1, %zmm1
256 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
257 ; AVX512BW-NEXT: vmovdqu8 %zmm3, %zmm0 {%k1}
258 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
259 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0
986260 ; AVX512BW-NEXT: retq
987261 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
988262 %shift = ashr <64 x i8> %a, %splat
1080354 ;
1081355 ; AVX512BW-LABEL: constant_shift_v64i8:
1082356 ; AVX512BW: # BB#0:
1083 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1
1084 ; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
1085 ; AVX512BW-NEXT: vmovd %eax, %xmm2
1086 ; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax
1087 ; AVX512BW-NEXT: sarb %al
1088 ; AVX512BW-NEXT: movzbl %al, %eax
1089 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
1090 ; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax
1091 ; AVX512BW-NEXT: sarb $2, %al
1092 ; AVX512BW-NEXT: movzbl %al, %eax
1093 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
1094 ; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax
1095 ; AVX512BW-NEXT: sarb $3, %al
1096 ; AVX512BW-NEXT: movzbl %al, %eax
1097 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
1098 ; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax
1099 ; AVX512BW-NEXT: sarb $4, %al
1100 ; AVX512BW-NEXT: movzbl %al, %eax
1101 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
1102 ; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax
1103 ; AVX512BW-NEXT: sarb $5, %al
1104 ; AVX512BW-NEXT: movzbl %al, %eax
1105 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
1106 ; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax
1107 ; AVX512BW-NEXT: sarb $6, %al
1108 ; AVX512BW-NEXT: movzbl %al, %eax
1109 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
1110 ; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax
1111 ; AVX512BW-NEXT: sarb $7, %al
1112 ; AVX512BW-NEXT: movzbl %al, %eax
1113 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
1114 ; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax
1115 ; AVX512BW-NEXT: sarb $7, %al
1116 ; AVX512BW-NEXT: movzbl %al, %eax
1117 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
1118 ; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax
1119 ; AVX512BW-NEXT: sarb $6, %al
1120 ; AVX512BW-NEXT: movzbl %al, %eax
1121 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
1122 ; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax
1123 ; AVX512BW-NEXT: sarb $5, %al
1124 ; AVX512BW-NEXT: movzbl %al, %eax
1125 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
1126 ; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax
1127 ; AVX512BW-NEXT: sarb $4, %al
1128 ; AVX512BW-NEXT: movzbl %al, %eax
1129 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
1130 ; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax
1131 ; AVX512BW-NEXT: sarb $3, %al
1132 ; AVX512BW-NEXT: movzbl %al, %eax
1133 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
1134 ; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax
1135 ; AVX512BW-NEXT: sarb $2, %al
1136 ; AVX512BW-NEXT: movzbl %al, %eax
1137 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
1138 ; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax
1139 ; AVX512BW-NEXT: sarb %al
1140 ; AVX512BW-NEXT: movzbl %al, %eax
1141 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
1142 ; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax
1143 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
1144 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
1145 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
1146 ; AVX512BW-NEXT: vmovd %eax, %xmm3
1147 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
1148 ; AVX512BW-NEXT: sarb %al
1149 ; AVX512BW-NEXT: movzbl %al, %eax
1150 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1151 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
1152 ; AVX512BW-NEXT: sarb $2, %al
1153 ; AVX512BW-NEXT: movzbl %al, %eax
1154 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1155 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
1156 ; AVX512BW-NEXT: sarb $3, %al
1157 ; AVX512BW-NEXT: movzbl %al, %eax
1158 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1159 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
1160 ; AVX512BW-NEXT: sarb $4, %al
1161 ; AVX512BW-NEXT: movzbl %al, %eax
1162 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1163 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
1164 ; AVX512BW-NEXT: sarb $5, %al
1165 ; AVX512BW-NEXT: movzbl %al, %eax
1166 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1167 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
1168 ; AVX512BW-NEXT: sarb $6, %al
1169 ; AVX512BW-NEXT: movzbl %al, %eax
1170 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1171 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
1172 ; AVX512BW-NEXT: sarb $7, %al
1173 ; AVX512BW-NEXT: movzbl %al, %eax
1174 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1175 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
1176 ; AVX512BW-NEXT: sarb $7, %al
1177 ; AVX512BW-NEXT: movzbl %al, %eax
1178 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1179 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
1180 ; AVX512BW-NEXT: sarb $6, %al
1181 ; AVX512BW-NEXT: movzbl %al, %eax
1182 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1183 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
1184 ; AVX512BW-NEXT: sarb $5, %al
1185 ; AVX512BW-NEXT: movzbl %al, %eax
1186 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1187 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
1188 ; AVX512BW-NEXT: sarb $4, %al
1189 ; AVX512BW-NEXT: movzbl %al, %eax
1190 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1191 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
1192 ; AVX512BW-NEXT: sarb $3, %al
1193 ; AVX512BW-NEXT: movzbl %al, %eax
1194 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1195 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
1196 ; AVX512BW-NEXT: sarb $2, %al
1197 ; AVX512BW-NEXT: movzbl %al, %eax
1198 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1199 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
1200 ; AVX512BW-NEXT: sarb %al
1201 ; AVX512BW-NEXT: movzbl %al, %eax
1202 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1203 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
1204 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
1205 ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
1206 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2
1207 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
1208 ; AVX512BW-NEXT: vmovd %eax, %xmm3
1209 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
1210 ; AVX512BW-NEXT: sarb %al
1211 ; AVX512BW-NEXT: movzbl %al, %eax
1212 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1213 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
1214 ; AVX512BW-NEXT: sarb $2, %al
1215 ; AVX512BW-NEXT: movzbl %al, %eax
1216 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1217 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
1218 ; AVX512BW-NEXT: sarb $3, %al
1219 ; AVX512BW-NEXT: movzbl %al, %eax
1220 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1221 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
1222 ; AVX512BW-NEXT: sarb $4, %al
1223 ; AVX512BW-NEXT: movzbl %al, %eax
1224 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1225 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
1226 ; AVX512BW-NEXT: sarb $5, %al
1227 ; AVX512BW-NEXT: movzbl %al, %eax
1228 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1229 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
1230 ; AVX512BW-NEXT: sarb $6, %al
1231 ; AVX512BW-NEXT: movzbl %al, %eax
1232 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1233 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
1234 ; AVX512BW-NEXT: sarb $7, %al
1235 ; AVX512BW-NEXT: movzbl %al, %eax
1236 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1237 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
1238 ; AVX512BW-NEXT: sarb $7, %al
1239 ; AVX512BW-NEXT: movzbl %al, %eax
1240 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1241 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
1242 ; AVX512BW-NEXT: sarb $6, %al
1243 ; AVX512BW-NEXT: movzbl %al, %eax
1244 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1245 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
1246 ; AVX512BW-NEXT: sarb $5, %al
1247 ; AVX512BW-NEXT: movzbl %al, %eax
1248 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1249 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
1250 ; AVX512BW-NEXT: sarb $4, %al
1251 ; AVX512BW-NEXT: movzbl %al, %eax
1252 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1253 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
1254 ; AVX512BW-NEXT: sarb $3, %al
1255 ; AVX512BW-NEXT: movzbl %al, %eax
1256 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1257 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
1258 ; AVX512BW-NEXT: sarb $2, %al
1259 ; AVX512BW-NEXT: movzbl %al, %eax
1260 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1261 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
1262 ; AVX512BW-NEXT: sarb %al
1263 ; AVX512BW-NEXT: movzbl %al, %eax
1264 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1265 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
1266 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
1267 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
1268 ; AVX512BW-NEXT: vmovd %eax, %xmm3
1269 ; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax
1270 ; AVX512BW-NEXT: sarb %al
1271 ; AVX512BW-NEXT: movzbl %al, %eax
1272 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1273 ; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax
1274 ; AVX512BW-NEXT: sarb $2, %al
1275 ; AVX512BW-NEXT: movzbl %al, %eax
1276 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1277 ; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
1278 ; AVX512BW-NEXT: sarb $3, %al
1279 ; AVX512BW-NEXT: movzbl %al, %eax
1280 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1281 ; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
1282 ; AVX512BW-NEXT: sarb $4, %al
1283 ; AVX512BW-NEXT: movzbl %al, %eax
1284 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1285 ; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
1286 ; AVX512BW-NEXT: sarb $5, %al
1287 ; AVX512BW-NEXT: movzbl %al, %eax
1288 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1289 ; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax
1290 ; AVX512BW-NEXT: sarb $6, %al
1291 ; AVX512BW-NEXT: movzbl %al, %eax
1292 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1293 ; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
1294 ; AVX512BW-NEXT: sarb $7, %al
1295 ; AVX512BW-NEXT: movzbl %al, %eax
1296 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1297 ; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
1298 ; AVX512BW-NEXT: sarb $7, %al
1299 ; AVX512BW-NEXT: movzbl %al, %eax
1300 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1301 ; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
1302 ; AVX512BW-NEXT: sarb $6, %al
1303 ; AVX512BW-NEXT: movzbl %al, %eax
1304 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1305 ; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax
1306 ; AVX512BW-NEXT: sarb $5, %al
1307 ; AVX512BW-NEXT: movzbl %al, %eax
1308 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1309 ; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
1310 ; AVX512BW-NEXT: sarb $4, %al
1311 ; AVX512BW-NEXT: movzbl %al, %eax
1312 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1313 ; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
1314 ; AVX512BW-NEXT: sarb $3, %al
1315 ; AVX512BW-NEXT: movzbl %al, %eax
1316 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1317 ; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax
1318 ; AVX512BW-NEXT: sarb $2, %al
1319 ; AVX512BW-NEXT: movzbl %al, %eax
1320 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1321 ; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax
1322 ; AVX512BW-NEXT: sarb %al
1323 ; AVX512BW-NEXT: movzbl %al, %eax
1324 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1325 ; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
1326 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0
1327 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1328 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
357 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
358 ; AVX512BW-NEXT: vpsraw $4, %zmm1, %zmm2
359 ; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm3
360 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm0[8],zmm3[8],zmm0[9],zmm3[9],zmm0[10],zmm3[10],zmm0[11],zmm3[11],zmm0[12],zmm3[12],zmm0[13],zmm3[13],zmm0[14],zmm3[14],zmm0[15],zmm3[15],zmm0[24],zmm3[24],zmm0[25],zmm3[25],zmm0[26],zmm3[26],zmm0[27],zmm3[27],zmm0[28],zmm3[28],zmm0[29],zmm3[29],zmm0[30],zmm3[30],zmm0[31],zmm3[31],zmm0[40],zmm3[40],zmm0[41],zmm3[41],zmm0[42],zmm3[42],zmm0[43],zmm3[43],zmm0[44],zmm3[44],zmm0[45],zmm3[45],zmm0[46],zmm3[46],zmm0[47],zmm3[47],zmm0[56],zmm3[56],zmm0[57],zmm3[57],zmm0[58],zmm3[58],zmm0[59],zmm3[59],zmm0[60],zmm3[60],zmm0[61],zmm3[61],zmm0[62],zmm3[62],zmm0[63],zmm3[63]
361 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
362 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
363 ; AVX512BW-NEXT: vpsraw $2, %zmm1, %zmm2
364 ; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
365 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
366 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
367 ; AVX512BW-NEXT: vpsraw $1, %zmm1, %zmm2
368 ; AVX512BW-NEXT: vpaddw %zmm4, %zmm4, %zmm4
369 ; AVX512BW-NEXT: vpmovb2m %zmm4, %k1
370 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
371 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
372 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
373 ; AVX512BW-NEXT: vpsraw $4, %zmm0, %zmm2
374 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm3[0],zmm0[1],zmm3[1],zmm0[2],zmm3[2],zmm0[3],zmm3[3],zmm0[4],zmm3[4],zmm0[5],zmm3[5],zmm0[6],zmm3[6],zmm0[7],zmm3[7],zmm0[16],zmm3[16],zmm0[17],zmm3[17],zmm0[18],zmm3[18],zmm0[19],zmm3[19],zmm0[20],zmm3[20],zmm0[21],zmm3[21],zmm0[22],zmm3[22],zmm0[23],zmm3[23],zmm0[32],zmm3[32],zmm0[33],zmm3[33],zmm0[34],zmm3[34],zmm0[35],zmm3[35],zmm0[36],zmm3[36],zmm0[37],zmm3[37],zmm0[38],zmm3[38],zmm0[39],zmm3[39],zmm0[48],zmm3[48],zmm0[49],zmm3[49],zmm0[50],zmm3[50],zmm0[51],zmm3[51],zmm0[52],zmm3[52],zmm0[53],zmm3[53],zmm0[54],zmm3[54],zmm0[55],zmm3[55]
375 ; AVX512BW-NEXT: vpmovb2m %zmm3, %k1
376 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
377 ; AVX512BW-NEXT: vpsraw $2, %zmm0, %zmm2
378 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3
379 ; AVX512BW-NEXT: vpmovb2m %zmm3, %k1
380 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
381 ; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm2
382 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm3, %zmm3
383 ; AVX512BW-NEXT: vpmovb2m %zmm3, %k1
384 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
385 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
386 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
1329387 ; AVX512BW-NEXT: retq
1330388 %shift = ashr <64 x i8> %a,
1331389 ret <64 x i8> %shift
7878 ;
7979 ; AVX512BW-LABEL: var_shift_v64i8:
8080 ; AVX512BW: # BB#0:
81 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
82 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %ecx
83 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3
84 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
85 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
86 ; AVX512BW-NEXT: shrb %cl, %al
87 ; AVX512BW-NEXT: movzbl %al, %eax
88 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx
89 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
90 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
91 ; AVX512BW-NEXT: shrb %cl, %dl
92 ; AVX512BW-NEXT: movzbl %dl, %ecx
93 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
94 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
95 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %ecx
96 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax
97 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
98 ; AVX512BW-NEXT: shrb %cl, %al
99 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %ecx
100 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx
101 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
102 ; AVX512BW-NEXT: shrb %cl, %dl
103 ; AVX512BW-NEXT: movzbl %al, %eax
104 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
105 ; AVX512BW-NEXT: movzbl %dl, %eax
106 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %ecx
107 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx
108 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
109 ; AVX512BW-NEXT: shrb %cl, %dl
110 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
111 ; AVX512BW-NEXT: movzbl %dl, %eax
112 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
113 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %ecx
114 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax
115 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
116 ; AVX512BW-NEXT: shrb %cl, %al
117 ; AVX512BW-NEXT: movzbl %al, %eax
118 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
119 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %ecx
120 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax
121 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
122 ; AVX512BW-NEXT: shrb %cl, %al
123 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %ecx
124 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx
125 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
126 ; AVX512BW-NEXT: shrb %cl, %dl
127 ; AVX512BW-NEXT: movzbl %al, %eax
128 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
129 ; AVX512BW-NEXT: movzbl %dl, %eax
130 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %ecx
131 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx
132 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
133 ; AVX512BW-NEXT: shrb %cl, %dl
134 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
135 ; AVX512BW-NEXT: movzbl %dl, %eax
136 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
137 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %ecx
138 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax
139 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
140 ; AVX512BW-NEXT: shrb %cl, %al
141 ; AVX512BW-NEXT: movzbl %al, %eax
142 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
143 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %ecx
144 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax
145 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
146 ; AVX512BW-NEXT: shrb %cl, %al
147 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %ecx
148 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx
149 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
150 ; AVX512BW-NEXT: shrb %cl, %dl
151 ; AVX512BW-NEXT: movzbl %al, %eax
152 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
153 ; AVX512BW-NEXT: movzbl %dl, %eax
154 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %ecx
155 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx
156 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
157 ; AVX512BW-NEXT: shrb %cl, %dl
158 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
159 ; AVX512BW-NEXT: movzbl %dl, %eax
160 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
161 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %ecx
162 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax
163 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
164 ; AVX512BW-NEXT: shrb %cl, %al
165 ; AVX512BW-NEXT: movzbl %al, %eax
166 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
167 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %ecx
168 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax
169 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
170 ; AVX512BW-NEXT: shrb %cl, %al
171 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %ecx
172 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx
173 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
174 ; AVX512BW-NEXT: shrb %cl, %dl
175 ; AVX512BW-NEXT: movzbl %al, %eax
176 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2
177 ; AVX512BW-NEXT: movzbl %dl, %eax
178 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
179 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
180 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
181 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4
182 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax
183 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
184 ; AVX512BW-NEXT: shrb %cl, %al
185 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
186 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx
187 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
188 ; AVX512BW-NEXT: shrb %cl, %dl
189 ; AVX512BW-NEXT: movzbl %al, %eax
190 ; AVX512BW-NEXT: movzbl %dl, %ecx
191 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
192 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
193 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
194 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %eax
195 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
196 ; AVX512BW-NEXT: shrb %cl, %al
197 ; AVX512BW-NEXT: movzbl %al, %eax
198 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
199 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
200 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %eax
201 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
202 ; AVX512BW-NEXT: shrb %cl, %al
203 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
204 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %edx
205 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
206 ; AVX512BW-NEXT: shrb %cl, %dl
207 ; AVX512BW-NEXT: movzbl %al, %eax
208 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
209 ; AVX512BW-NEXT: movzbl %dl, %eax
210 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
211 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %edx
212 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
213 ; AVX512BW-NEXT: shrb %cl, %dl
214 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
215 ; AVX512BW-NEXT: movzbl %dl, %eax
216 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
217 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
218 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %eax
219 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
220 ; AVX512BW-NEXT: shrb %cl, %al
221 ; AVX512BW-NEXT: movzbl %al, %eax
222 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
223 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
224 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %eax
225 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
226 ; AVX512BW-NEXT: shrb %cl, %al
227 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
228 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %edx
229 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
230 ; AVX512BW-NEXT: shrb %cl, %dl
231 ; AVX512BW-NEXT: movzbl %al, %eax
232 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
233 ; AVX512BW-NEXT: movzbl %dl, %eax
234 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
235 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %edx
236 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
237 ; AVX512BW-NEXT: shrb %cl, %dl
238 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
239 ; AVX512BW-NEXT: movzbl %dl, %eax
240 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
241 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
242 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %eax
243 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
244 ; AVX512BW-NEXT: shrb %cl, %al
245 ; AVX512BW-NEXT: movzbl %al, %eax
246 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
247 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
248 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %eax
249 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
250 ; AVX512BW-NEXT: shrb %cl, %al
251 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
252 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %edx
253 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
254 ; AVX512BW-NEXT: shrb %cl, %dl
255 ; AVX512BW-NEXT: movzbl %al, %eax
256 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
257 ; AVX512BW-NEXT: movzbl %dl, %eax
258 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
259 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %edx
260 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
261 ; AVX512BW-NEXT: shrb %cl, %dl
262 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
263 ; AVX512BW-NEXT: movzbl %dl, %eax
264 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
265 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
266 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %eax
267 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
268 ; AVX512BW-NEXT: shrb %cl, %al
269 ; AVX512BW-NEXT: movzbl %al, %eax
270 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
271 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
272 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax
273 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
274 ; AVX512BW-NEXT: shrb %cl, %al
275 ; AVX512BW-NEXT: movzbl %al, %eax
276 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
277 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm3
278 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
279 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm4
280 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax
281 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
282 ; AVX512BW-NEXT: shrb %cl, %al
283 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2
284 ; AVX512BW-NEXT: movzbl %al, %eax
285 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
286 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx
287 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
288 ; AVX512BW-NEXT: shrb %cl, %dl
289 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
290 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %esi
291 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
292 ; AVX512BW-NEXT: shrb %cl, %sil
293 ; AVX512BW-NEXT: movzbl %dl, %ecx
294 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
295 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
296 ; AVX512BW-NEXT: movzbl %sil, %eax
297 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
298 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %edx
299 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
300 ; AVX512BW-NEXT: shrb %cl, %dl
301 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
302 ; AVX512BW-NEXT: movzbl %dl, %eax
303 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
304 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
305 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %eax
306 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
307 ; AVX512BW-NEXT: shrb %cl, %al
308 ; AVX512BW-NEXT: movzbl %al, %eax
309 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
310 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
311 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %eax
312 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
313 ; AVX512BW-NEXT: shrb %cl, %al
314 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
315 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %edx
316 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
317 ; AVX512BW-NEXT: shrb %cl, %dl
318 ; AVX512BW-NEXT: movzbl %al, %eax
319 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
320 ; AVX512BW-NEXT: movzbl %dl, %eax
321 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
322 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %edx
323 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
324 ; AVX512BW-NEXT: shrb %cl, %dl
325 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
326 ; AVX512BW-NEXT: movzbl %dl, %eax
327 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
328 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
329 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %eax
330 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
331 ; AVX512BW-NEXT: shrb %cl, %al
332 ; AVX512BW-NEXT: movzbl %al, %eax
333 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
334 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
335 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %eax
336 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
337 ; AVX512BW-NEXT: shrb %cl, %al
338 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
339 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %edx
340 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
341 ; AVX512BW-NEXT: shrb %cl, %dl
342 ; AVX512BW-NEXT: movzbl %al, %eax
343 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
344 ; AVX512BW-NEXT: movzbl %dl, %eax
345 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
346 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %edx
347 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
348 ; AVX512BW-NEXT: shrb %cl, %dl
349 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
350 ; AVX512BW-NEXT: movzbl %dl, %eax
351 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
352 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
353 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %eax
354 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
355 ; AVX512BW-NEXT: shrb %cl, %al
356 ; AVX512BW-NEXT: movzbl %al, %eax
357 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
358 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
359 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %eax
360 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
361 ; AVX512BW-NEXT: shrb %cl, %al
362 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
363 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %edx
364 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
365 ; AVX512BW-NEXT: shrb %cl, %dl
366 ; AVX512BW-NEXT: movzbl %al, %eax
367 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
368 ; AVX512BW-NEXT: movzbl %dl, %eax
369 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
370 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %edx
371 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
372 ; AVX512BW-NEXT: shrb %cl, %dl
373 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3
374 ; AVX512BW-NEXT: movzbl %dl, %eax
375 ; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx
376 ; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx
377 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
378 ; AVX512BW-NEXT: shrb %cl, %dl
379 ; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
380 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi
381 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
382 ; AVX512BW-NEXT: shrb %cl, %sil
383 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
384 ; AVX512BW-NEXT: movzbl %dl, %eax
385 ; AVX512BW-NEXT: movzbl %sil, %ecx
386 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
387 ; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx
388 ; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx
389 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
390 ; AVX512BW-NEXT: shrb %cl, %dl
391 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
392 ; AVX512BW-NEXT: movzbl %dl, %eax
393 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
394 ; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx
395 ; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
396 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
397 ; AVX512BW-NEXT: shrb %cl, %al
398 ; AVX512BW-NEXT: movzbl %al, %eax
399 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
400 ; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx
401 ; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
402 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
403 ; AVX512BW-NEXT: shrb %cl, %al
404 ; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx
405 ; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx
406 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
407 ; AVX512BW-NEXT: shrb %cl, %dl
408 ; AVX512BW-NEXT: movzbl %al, %eax
409 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
410 ; AVX512BW-NEXT: movzbl %dl, %eax
411 ; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx
412 ; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx
413 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
414 ; AVX512BW-NEXT: shrb %cl, %dl
415 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
416 ; AVX512BW-NEXT: movzbl %dl, %eax
417 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
418 ; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx
419 ; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
420 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
421 ; AVX512BW-NEXT: shrb %cl, %al
422 ; AVX512BW-NEXT: movzbl %al, %eax
423 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
424 ; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx
425 ; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
426 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
427 ; AVX512BW-NEXT: shrb %cl, %al
428 ; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx
429 ; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx
430 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
431 ; AVX512BW-NEXT: shrb %cl, %dl
432 ; AVX512BW-NEXT: movzbl %al, %eax
433 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
434 ; AVX512BW-NEXT: movzbl %dl, %eax
435 ; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx
436 ; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx
437 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
438 ; AVX512BW-NEXT: shrb %cl, %dl
439 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
440 ; AVX512BW-NEXT: movzbl %dl, %eax
441 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
442 ; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx
443 ; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
444 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
445 ; AVX512BW-NEXT: shrb %cl, %al
446 ; AVX512BW-NEXT: movzbl %al, %eax
447 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
448 ; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx
449 ; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
450 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
451 ; AVX512BW-NEXT: shrb %cl, %al
452 ; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx
453 ; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx
454 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
455 ; AVX512BW-NEXT: shrb %cl, %dl
456 ; AVX512BW-NEXT: movzbl %al, %eax
457 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
458 ; AVX512BW-NEXT: movzbl %dl, %eax
459 ; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx
460 ; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx
461 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
462 ; AVX512BW-NEXT: shrb %cl, %dl
463 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
464 ; AVX512BW-NEXT: movzbl %dl, %eax
465 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
466 ; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx
467 ; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
468 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
469 ; AVX512BW-NEXT: shrb %cl, %al
470 ; AVX512BW-NEXT: movzbl %al, %eax
471 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0
472 ; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
473 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
81 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2
82 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
83 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
84 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
85 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
86 ; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm2
87 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
88 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
89 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
90 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
91 ; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm2
92 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
93 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
94 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
95 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
47496 ; AVX512BW-NEXT: retq
47597 %shift = lshr <64 x i8> %a, %b
47698 ret <64 x i8> %shift
552174 ; AVX512BW-LABEL: splatvar_shift_v64i8:
553175 ; AVX512BW: # BB#0:
554176 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
555 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm2
556 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
557 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm3
558 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
559 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
560 ; AVX512BW-NEXT: shrb %cl, %al
561 ; AVX512BW-NEXT: movzbl %al, %eax
562 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %edx
563 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
564 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
565 ; AVX512BW-NEXT: shrb %cl, %dl
566 ; AVX512BW-NEXT: movzbl %dl, %ecx
567 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
568 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
569 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
570 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
571 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
572 ; AVX512BW-NEXT: shrb %cl, %al
573 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx
574 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
575 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
576 ; AVX512BW-NEXT: shrb %cl, %dl
577 ; AVX512BW-NEXT: movzbl %al, %eax
578 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
579 ; AVX512BW-NEXT: movzbl %dl, %eax
580 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx
581 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
582 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
583 ; AVX512BW-NEXT: shrb %cl, %dl
584 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
585 ; AVX512BW-NEXT: movzbl %dl, %eax
586 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
587 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
588 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
589 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
590 ; AVX512BW-NEXT: shrb %cl, %al
591 ; AVX512BW-NEXT: movzbl %al, %eax
592 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
593 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
594 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
595 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
596 ; AVX512BW-NEXT: shrb %cl, %al
597 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx
598 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
599 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
600 ; AVX512BW-NEXT: shrb %cl, %dl
601 ; AVX512BW-NEXT: movzbl %al, %eax
602 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
603 ; AVX512BW-NEXT: movzbl %dl, %eax
604 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx
605 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
606 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
607 ; AVX512BW-NEXT: shrb %cl, %dl
608 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
609 ; AVX512BW-NEXT: movzbl %dl, %eax
610 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
611 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
612 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
613 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
614 ; AVX512BW-NEXT: shrb %cl, %al
615 ; AVX512BW-NEXT: movzbl %al, %eax
616 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
617 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
618 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
619 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
620 ; AVX512BW-NEXT: shrb %cl, %al
621 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx
622 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
623 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
624 ; AVX512BW-NEXT: shrb %cl, %dl
625 ; AVX512BW-NEXT: movzbl %al, %eax
626 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
627 ; AVX512BW-NEXT: movzbl %dl, %eax
628 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx
629 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
630 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
631 ; AVX512BW-NEXT: shrb %cl, %dl
632 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
633 ; AVX512BW-NEXT: movzbl %dl, %eax
634 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
635 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
636 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
637 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
638 ; AVX512BW-NEXT: shrb %cl, %al
639 ; AVX512BW-NEXT: movzbl %al, %eax
640 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
641 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
642 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
643 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
644 ; AVX512BW-NEXT: shrb %cl, %al
645 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx
646 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
647 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
648 ; AVX512BW-NEXT: shrb %cl, %dl
649 ; AVX512BW-NEXT: movzbl %al, %eax
650 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2
651 ; AVX512BW-NEXT: movzbl %dl, %eax
652 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
653 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3
654 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
655 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4
656 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx
657 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
658 ; AVX512BW-NEXT: shrb %cl, %al
659 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
660 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx
661 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
662 ; AVX512BW-NEXT: shrb %cl, %dl
663 ; AVX512BW-NEXT: movzbl %al, %eax
664 ; AVX512BW-NEXT: movzbl %dl, %ecx
665 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
666 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
667 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax
668 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx
669 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
670 ; AVX512BW-NEXT: shrb %cl, %al
671 ; AVX512BW-NEXT: movzbl %al, %eax
672 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
673 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %eax
674 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx
675 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
676 ; AVX512BW-NEXT: shrb %cl, %al
677 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx
678 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx
679 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
680 ; AVX512BW-NEXT: shrb %cl, %dl
681 ; AVX512BW-NEXT: movzbl %al, %eax
682 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
683 ; AVX512BW-NEXT: movzbl %dl, %eax
684 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %edx
685 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx
686 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
687 ; AVX512BW-NEXT: shrb %cl, %dl
688 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
689 ; AVX512BW-NEXT: movzbl %dl, %eax
690 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
691 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax
692 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx
693 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
694 ; AVX512BW-NEXT: shrb %cl, %al
695 ; AVX512BW-NEXT: movzbl %al, %eax
696 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
697 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %eax
698 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx
699 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
700 ; AVX512BW-NEXT: shrb %cl, %al
701 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx
702 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx
703 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
704 ; AVX512BW-NEXT: shrb %cl, %dl
705 ; AVX512BW-NEXT: movzbl %al, %eax
706 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
707 ; AVX512BW-NEXT: movzbl %dl, %eax
708 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %edx
709 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx
710 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
711 ; AVX512BW-NEXT: shrb %cl, %dl
712 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
713 ; AVX512BW-NEXT: movzbl %dl, %eax
714 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
715 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax
716 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx
717 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
718 ; AVX512BW-NEXT: shrb %cl, %al
719 ; AVX512BW-NEXT: movzbl %al, %eax
720 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
721 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %eax
722 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx
723 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
724 ; AVX512BW-NEXT: shrb %cl, %al
725 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx
726 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx
727 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
728 ; AVX512BW-NEXT: shrb %cl, %dl
729 ; AVX512BW-NEXT: movzbl %al, %eax
730 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
731 ; AVX512BW-NEXT: movzbl %dl, %eax
732 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %edx
733 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx
734 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
735 ; AVX512BW-NEXT: shrb %cl, %dl
736 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
737 ; AVX512BW-NEXT: movzbl %dl, %eax
738 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
739 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax
740 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx
741 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
742 ; AVX512BW-NEXT: shrb %cl, %al
743 ; AVX512BW-NEXT: movzbl %al, %eax
744 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
745 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %eax
746 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx
747 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
748 ; AVX512BW-NEXT: shrb %cl, %al
749 ; AVX512BW-NEXT: movzbl %al, %eax
750 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
751 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm3
752 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
753 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm4
754 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx
755 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
756 ; AVX512BW-NEXT: shrb %cl, %al
757 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2
758 ; AVX512BW-NEXT: movzbl %al, %eax
759 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
760 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx
761 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
762 ; AVX512BW-NEXT: shrb %cl, %dl
763 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %esi
764 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx
765 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
766 ; AVX512BW-NEXT: shrb %cl, %sil
767 ; AVX512BW-NEXT: movzbl %dl, %ecx
768 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
769 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
770 ; AVX512BW-NEXT: movzbl %sil, %eax
771 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx
772 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx
773 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
774 ; AVX512BW-NEXT: shrb %cl, %dl
775 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
776 ; AVX512BW-NEXT: movzbl %dl, %eax
777 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
778 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %eax
779 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx
780 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
781 ; AVX512BW-NEXT: shrb %cl, %al
782 ; AVX512BW-NEXT: movzbl %al, %eax
783 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
784 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax
785 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx
786 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
787 ; AVX512BW-NEXT: shrb %cl, %al
788 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %edx
789 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx
790 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
791 ; AVX512BW-NEXT: shrb %cl, %dl
792 ; AVX512BW-NEXT: movzbl %al, %eax
793 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
794 ; AVX512BW-NEXT: movzbl %dl, %eax
795 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx
796 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx
797 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
798 ; AVX512BW-NEXT: shrb %cl, %dl
799 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
800 ; AVX512BW-NEXT: movzbl %dl, %eax
801 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
802 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %eax
803 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx
804 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
805 ; AVX512BW-NEXT: shrb %cl, %al
806 ; AVX512BW-NEXT: movzbl %al, %eax
807 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
808 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax
809 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx
810 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
811 ; AVX512BW-NEXT: shrb %cl, %al
812 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %edx
813 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx
814 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
815 ; AVX512BW-NEXT: shrb %cl, %dl
816 ; AVX512BW-NEXT: movzbl %al, %eax
817 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
818 ; AVX512BW-NEXT: movzbl %dl, %eax
819 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx
820 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx
821 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
822 ; AVX512BW-NEXT: shrb %cl, %dl
823 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
824 ; AVX512BW-NEXT: movzbl %dl, %eax
825 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
826 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %eax
827 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx
828 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
829 ; AVX512BW-NEXT: shrb %cl, %al
830 ; AVX512BW-NEXT: movzbl %al, %eax
831 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
832 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax
833 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx
834 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
835 ; AVX512BW-NEXT: shrb %cl, %al
836 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %edx
837 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx
838 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
839 ; AVX512BW-NEXT: shrb %cl, %dl
840 ; AVX512BW-NEXT: movzbl %al, %eax
841 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
842 ; AVX512BW-NEXT: movzbl %dl, %eax
843 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx
844 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx
845 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
846 ; AVX512BW-NEXT: shrb %cl, %dl
847 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3
848 ; AVX512BW-NEXT: movzbl %dl, %eax
849 ; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx
850 ; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx
851 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
852 ; AVX512BW-NEXT: shrb %cl, %dl
853 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi
854 ; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
855 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
856 ; AVX512BW-NEXT: shrb %cl, %sil
857 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
858 ; AVX512BW-NEXT: movzbl %dl, %eax
859 ; AVX512BW-NEXT: movzbl %sil, %ecx
860 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
861 ; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx
862 ; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx
863 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
864 ; AVX512BW-NEXT: shrb %cl, %dl
865 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
866 ; AVX512BW-NEXT: movzbl %dl, %eax
867 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
868 ; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
869 ; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx
870 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
871 ; AVX512BW-NEXT: shrb %cl, %al
872 ; AVX512BW-NEXT: movzbl %al, %eax
873 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
874 ; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
875 ; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx
876 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
877 ; AVX512BW-NEXT: shrb %cl, %al
878 ; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx
879 ; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx
880 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
881 ; AVX512BW-NEXT: shrb %cl, %dl
882 ; AVX512BW-NEXT: movzbl %al, %eax
883 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
884 ; AVX512BW-NEXT: movzbl %dl, %eax
885 ; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx
886 ; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx
887 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
888 ; AVX512BW-NEXT: shrb %cl, %dl
889 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
890 ; AVX512BW-NEXT: movzbl %dl, %eax
891 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
892 ; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
893 ; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx
894 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
895 ; AVX512BW-NEXT: shrb %cl, %al
896 ; AVX512BW-NEXT: movzbl %al, %eax
897 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
898 ; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
899 ; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx
900 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
901 ; AVX512BW-NEXT: shrb %cl, %al
902 ; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx
903 ; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx
904 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
905 ; AVX512BW-NEXT: shrb %cl, %dl
906 ; AVX512BW-NEXT: movzbl %al, %eax
907 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
908 ; AVX512BW-NEXT: movzbl %dl, %eax
909 ; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx
910 ; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx
911 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
912 ; AVX512BW-NEXT: shrb %cl, %dl
913 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
914 ; AVX512BW-NEXT: movzbl %dl, %eax
915 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
916 ; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
917 ; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx
918 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
919 ; AVX512BW-NEXT: shrb %cl, %al
920 ; AVX512BW-NEXT: movzbl %al, %eax
921 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
922 ; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
923 ; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx
924 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
925 ; AVX512BW-NEXT: shrb %cl, %al
926 ; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx
927 ; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx
928 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
929 ; AVX512BW-NEXT: shrb %cl, %dl
930 ; AVX512BW-NEXT: movzbl %al, %eax
931 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
932 ; AVX512BW-NEXT: movzbl %dl, %eax
933 ; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx
934 ; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx
935 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
936 ; AVX512BW-NEXT: shrb %cl, %dl
937 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
938 ; AVX512BW-NEXT: movzbl %dl, %eax
939 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
940 ; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
941 ; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx
942 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
943 ; AVX512BW-NEXT: shrb %cl, %al
944 ; AVX512BW-NEXT: movzbl %al, %eax
945 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0
946 ; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
947 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
177 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2
178 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
179 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
180 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
181 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
182 ; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm2
183 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
184 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
185 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
186 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
187 ; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm2
188 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
189 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
190 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
191 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
948192 ; AVX512BW-NEXT: retq
949193 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
950194 %shift = lshr <64 x i8> %a, %splat
1025269 ;
1026270 ; AVX512BW-LABEL: constant_shift_v64i8:
1027271 ; AVX512BW: # BB#0:
1028 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1
1029 ; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
1030 ; AVX512BW-NEXT: vmovd %eax, %xmm2
1031 ; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax
1032 ; AVX512BW-NEXT: shrb %al
1033 ; AVX512BW-NEXT: movzbl %al, %eax
1034 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
1035 ; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax
1036 ; AVX512BW-NEXT: shrb $2, %al
1037 ; AVX512BW-NEXT: movzbl %al, %eax
1038 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
1039 ; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax
1040 ; AVX512BW-NEXT: shrb $3, %al
1041 ; AVX512BW-NEXT: movzbl %al, %eax
1042 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
1043 ; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax
1044 ; AVX512BW-NEXT: shrb $4, %al
1045 ; AVX512BW-NEXT: movzbl %al, %eax
1046 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
1047 ; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax
1048 ; AVX512BW-NEXT: shrb $5, %al
1049 ; AVX512BW-NEXT: movzbl %al, %eax
1050 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
1051 ; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax
1052 ; AVX512BW-NEXT: shrb $6, %al
1053 ; AVX512BW-NEXT: movzbl %al, %eax
1054 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
1055 ; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax
1056 ; AVX512BW-NEXT: shrb $7, %al
1057 ; AVX512BW-NEXT: movzbl %al, %eax
1058 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
1059 ; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax
1060 ; AVX512BW-NEXT: shrb $7, %al
1061 ; AVX512BW-NEXT: movzbl %al, %eax
1062 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
1063 ; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax
1064 ; AVX512BW-NEXT: shrb $6, %al
1065 ; AVX512BW-NEXT: movzbl %al, %eax
1066 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
1067 ; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax
1068 ; AVX512BW-NEXT: shrb $5, %al
1069 ; AVX512BW-NEXT: movzbl %al, %eax
1070 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
1071 ; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax
1072 ; AVX512BW-NEXT: shrb $4, %al
1073 ; AVX512BW-NEXT: movzbl %al, %eax
1074 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
1075 ; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax
1076 ; AVX512BW-NEXT: shrb $3, %al
1077 ; AVX512BW-NEXT: movzbl %al, %eax
1078 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
1079 ; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax
1080 ; AVX512BW-NEXT: shrb $2, %al
1081 ; AVX512BW-NEXT: movzbl %al, %eax
1082 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
1083 ; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax
1084 ; AVX512BW-NEXT: shrb %al
1085 ; AVX512BW-NEXT: movzbl %al, %eax
1086 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
1087 ; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax
1088 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
1089 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
1090 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
1091 ; AVX512BW-NEXT: vmovd %eax, %xmm3
1092 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
1093 ; AVX512BW-NEXT: shrb %al
1094 ; AVX512BW-NEXT: movzbl %al, %eax
1095 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1096 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
1097 ; AVX512BW-NEXT: shrb $2, %al
1098 ; AVX512BW-NEXT: movzbl %al, %eax
1099 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1100 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
1101 ; AVX512BW-NEXT: shrb $3, %al
1102 ; AVX512BW-NEXT: movzbl %al, %eax
1103 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1104 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
1105 ; AVX512BW-NEXT: shrb $4, %al
1106 ; AVX512BW-NEXT: movzbl %al, %eax
1107 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1108 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
1109 ; AVX512BW-NEXT: shrb $5, %al
1110 ; AVX512BW-NEXT: movzbl %al, %eax
1111 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1112 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
1113 ; AVX512BW-NEXT: shrb $6, %al
1114 ; AVX512BW-NEXT: movzbl %al, %eax
1115 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1116 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
1117 ; AVX512BW-NEXT: shrb $7, %al
1118 ; AVX512BW-NEXT: movzbl %al, %eax
1119 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1120 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
1121 ; AVX512BW-NEXT: shrb $7, %al
1122 ; AVX512BW-NEXT: movzbl %al, %eax
1123 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1124 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
1125 ; AVX512BW-NEXT: shrb $6, %al
1126 ; AVX512BW-NEXT: movzbl %al, %eax
1127 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1128 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
1129 ; AVX512BW-NEXT: shrb $5, %al
1130 ; AVX512BW-NEXT: movzbl %al, %eax
1131 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1132 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
1133 ; AVX512BW-NEXT: shrb $4, %al
1134 ; AVX512BW-NEXT: movzbl %al, %eax
1135 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1136 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
1137 ; AVX512BW-NEXT: shrb $3, %al
1138 ; AVX512BW-NEXT: movzbl %al, %eax
1139 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1140 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
1141 ; AVX512BW-NEXT: shrb $2, %al
1142 ; AVX512BW-NEXT: movzbl %al, %eax
1143 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1144 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
1145 ; AVX512BW-NEXT: shrb %al
1146 ; AVX512BW-NEXT: movzbl %al, %eax
1147 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1148 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
1149 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
1150 ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
1151 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2
1152 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
1153 ; AVX512BW-NEXT: vmovd %eax, %xmm3
1154 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
1155 ; AVX512BW-NEXT: shrb %al
1156 ; AVX512BW-NEXT: movzbl %al, %eax
1157 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1158 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
1159 ; AVX512BW-NEXT: shrb $2, %al
1160 ; AVX512BW-NEXT: movzbl %al, %eax
1161 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1162 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
1163 ; AVX512BW-NEXT: shrb $3, %al
1164 ; AVX512BW-NEXT: movzbl %al, %eax
1165 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1166 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
1167 ; AVX512BW-NEXT: shrb $4, %al
1168 ; AVX512BW-NEXT: movzbl %al, %eax
1169 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1170 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
1171 ; AVX512BW-NEXT: shrb $5, %al
1172 ; AVX512BW-NEXT: movzbl %al, %eax
1173 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1174 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
1175 ; AVX512BW-NEXT: shrb $6, %al
1176 ; AVX512BW-NEXT: movzbl %al, %eax
1177 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1178 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
1179 ; AVX512BW-NEXT: shrb $7, %al
1180 ; AVX512BW-NEXT: movzbl %al, %eax
1181 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1182 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
1183 ; AVX512BW-NEXT: shrb $7, %al
1184 ; AVX512BW-NEXT: movzbl %al, %eax
1185 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1186 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
1187 ; AVX512BW-NEXT: shrb $6, %al
1188 ; AVX512BW-NEXT: movzbl %al, %eax
1189 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1190 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
1191 ; AVX512BW-NEXT: shrb $5, %al
1192 ; AVX512BW-NEXT: movzbl %al, %eax
1193 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1194 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
1195 ; AVX512BW-NEXT: shrb $4, %al
1196 ; AVX512BW-NEXT: movzbl %al, %eax
1197 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1198 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
1199 ; AVX512BW-NEXT: shrb $3, %al
1200 ; AVX512BW-NEXT: movzbl %al, %eax
1201 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1202 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
1203 ; AVX512BW-NEXT: shrb $2, %al
1204 ; AVX512BW-NEXT: movzbl %al, %eax
1205 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1206 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
1207 ; AVX512BW-NEXT: shrb %al
1208 ; AVX512BW-NEXT: movzbl %al, %eax
1209 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1210 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
1211 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
1212 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
1213 ; AVX512BW-NEXT: vmovd %eax, %xmm3
1214 ; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax
1215 ; AVX512BW-NEXT: shrb %al
1216 ; AVX512BW-NEXT: movzbl %al, %eax
1217 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1218 ; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax
1219 ; AVX512BW-NEXT: shrb $2, %al
1220 ; AVX512BW-NEXT: movzbl %al, %eax
1221 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1222 ; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
1223 ; AVX512BW-NEXT: shrb $3, %al
1224 ; AVX512BW-NEXT: movzbl %al, %eax
1225 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1226 ; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
1227 ; AVX512BW-NEXT: shrb $4, %al
1228 ; AVX512BW-NEXT: movzbl %al, %eax
1229 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1230 ; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
1231 ; AVX512BW-NEXT: shrb $5, %al
1232 ; AVX512BW-NEXT: movzbl %al, %eax
1233 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1234 ; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax
1235 ; AVX512BW-NEXT: shrb $6, %al
1236 ; AVX512BW-NEXT: movzbl %al, %eax
1237 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1238 ; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
1239 ; AVX512BW-NEXT: shrb $7, %al
1240 ; AVX512BW-NEXT: movzbl %al, %eax
1241 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1242 ; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
1243 ; AVX512BW-NEXT: shrb $7, %al
1244 ; AVX512BW-NEXT: movzbl %al, %eax
1245 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1246 ; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
1247 ; AVX512BW-NEXT: shrb $6, %al
1248 ; AVX512BW-NEXT: movzbl %al, %eax
1249 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1250 ; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax
1251 ; AVX512BW-NEXT: shrb $5, %al
1252 ; AVX512BW-NEXT: movzbl %al, %eax
1253 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1254 ; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
1255 ; AVX512BW-NEXT: shrb $4, %al
1256 ; AVX512BW-NEXT: movzbl %al, %eax
1257 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1258 ; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
1259 ; AVX512BW-NEXT: shrb $3, %al
1260 ; AVX512BW-NEXT: movzbl %al, %eax
1261 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1262 ; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax
1263 ; AVX512BW-NEXT: shrb $2, %al
1264 ; AVX512BW-NEXT: movzbl %al, %eax
1265 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1266 ; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax
1267 ; AVX512BW-NEXT: shrb %al
1268 ; AVX512BW-NEXT: movzbl %al, %eax
1269 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1270 ; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
1271 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0
1272 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1273 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
272 ; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm1
273 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
274 ; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2
275 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
276 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
277 ; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm2
278 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
279 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
280 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
281 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
282 ; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm2
283 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
284 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
285 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
286 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
1274287 ; AVX512BW-NEXT: retq
1275288 %shift = lshr <64 x i8> %a,
1276289 ret <64 x i8> %shift
7575 ;
7676 ; AVX512BW-LABEL: var_shift_v64i8:
7777 ; AVX512BW: # BB#0:
78 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2
79 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %ecx
80 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm3
81 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
82 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
83 ; AVX512BW-NEXT: shlb %cl, %al
84 ; AVX512BW-NEXT: movzbl %al, %eax
85 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx
86 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
87 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
88 ; AVX512BW-NEXT: shlb %cl, %dl
89 ; AVX512BW-NEXT: movzbl %dl, %ecx
90 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
91 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
92 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %ecx
93 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax
94 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
95 ; AVX512BW-NEXT: shlb %cl, %al
96 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %ecx
97 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx
98 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
99 ; AVX512BW-NEXT: shlb %cl, %dl
100 ; AVX512BW-NEXT: movzbl %al, %eax
101 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
102 ; AVX512BW-NEXT: movzbl %dl, %eax
103 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %ecx
104 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx
105 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
106 ; AVX512BW-NEXT: shlb %cl, %dl
107 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
108 ; AVX512BW-NEXT: movzbl %dl, %eax
109 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
110 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %ecx
111 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax
112 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
113 ; AVX512BW-NEXT: shlb %cl, %al
114 ; AVX512BW-NEXT: movzbl %al, %eax
115 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
116 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %ecx
117 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax
118 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
119 ; AVX512BW-NEXT: shlb %cl, %al
120 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %ecx
121 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx
122 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
123 ; AVX512BW-NEXT: shlb %cl, %dl
124 ; AVX512BW-NEXT: movzbl %al, %eax
125 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
126 ; AVX512BW-NEXT: movzbl %dl, %eax
127 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %ecx
128 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx
129 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
130 ; AVX512BW-NEXT: shlb %cl, %dl
131 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
132 ; AVX512BW-NEXT: movzbl %dl, %eax
133 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
134 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %ecx
135 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax
136 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
137 ; AVX512BW-NEXT: shlb %cl, %al
138 ; AVX512BW-NEXT: movzbl %al, %eax
139 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
140 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %ecx
141 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax
142 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
143 ; AVX512BW-NEXT: shlb %cl, %al
144 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %ecx
145 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx
146 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
147 ; AVX512BW-NEXT: shlb %cl, %dl
148 ; AVX512BW-NEXT: movzbl %al, %eax
149 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
150 ; AVX512BW-NEXT: movzbl %dl, %eax
151 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %ecx
152 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx
153 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
154 ; AVX512BW-NEXT: shlb %cl, %dl
155 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
156 ; AVX512BW-NEXT: movzbl %dl, %eax
157 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
158 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %ecx
159 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax
160 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
161 ; AVX512BW-NEXT: shlb %cl, %al
162 ; AVX512BW-NEXT: movzbl %al, %eax
163 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
164 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %ecx
165 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax
166 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
167 ; AVX512BW-NEXT: shlb %cl, %al
168 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %ecx
169 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx
170 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
171 ; AVX512BW-NEXT: shlb %cl, %dl
172 ; AVX512BW-NEXT: movzbl %al, %eax
173 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2
174 ; AVX512BW-NEXT: movzbl %dl, %eax
175 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
176 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3
177 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
178 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm4
179 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax
180 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
181 ; AVX512BW-NEXT: shlb %cl, %al
182 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
183 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx
184 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
185 ; AVX512BW-NEXT: shlb %cl, %dl
186 ; AVX512BW-NEXT: movzbl %al, %eax
187 ; AVX512BW-NEXT: movzbl %dl, %ecx
188 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
189 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
190 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
191 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %eax
192 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
193 ; AVX512BW-NEXT: shlb %cl, %al
194 ; AVX512BW-NEXT: movzbl %al, %eax
195 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
196 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
197 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %eax
198 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
199 ; AVX512BW-NEXT: shlb %cl, %al
200 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
201 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %edx
202 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
203 ; AVX512BW-NEXT: shlb %cl, %dl
204 ; AVX512BW-NEXT: movzbl %al, %eax
205 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
206 ; AVX512BW-NEXT: movzbl %dl, %eax
207 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
208 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %edx
209 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
210 ; AVX512BW-NEXT: shlb %cl, %dl
211 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
212 ; AVX512BW-NEXT: movzbl %dl, %eax
213 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
214 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
215 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %eax
216 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
217 ; AVX512BW-NEXT: shlb %cl, %al
218 ; AVX512BW-NEXT: movzbl %al, %eax
219 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
220 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
221 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %eax
222 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
223 ; AVX512BW-NEXT: shlb %cl, %al
224 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
225 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %edx
226 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
227 ; AVX512BW-NEXT: shlb %cl, %dl
228 ; AVX512BW-NEXT: movzbl %al, %eax
229 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
230 ; AVX512BW-NEXT: movzbl %dl, %eax
231 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
232 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %edx
233 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
234 ; AVX512BW-NEXT: shlb %cl, %dl
235 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
236 ; AVX512BW-NEXT: movzbl %dl, %eax
237 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
238 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
239 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %eax
240 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
241 ; AVX512BW-NEXT: shlb %cl, %al
242 ; AVX512BW-NEXT: movzbl %al, %eax
243 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
244 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
245 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %eax
246 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
247 ; AVX512BW-NEXT: shlb %cl, %al
248 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
249 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %edx
250 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
251 ; AVX512BW-NEXT: shlb %cl, %dl
252 ; AVX512BW-NEXT: movzbl %al, %eax
253 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
254 ; AVX512BW-NEXT: movzbl %dl, %eax
255 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
256 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %edx
257 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
258 ; AVX512BW-NEXT: shlb %cl, %dl
259 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
260 ; AVX512BW-NEXT: movzbl %dl, %eax
261 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
262 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
263 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %eax
264 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
265 ; AVX512BW-NEXT: shlb %cl, %al
266 ; AVX512BW-NEXT: movzbl %al, %eax
267 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
268 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
269 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax
270 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
271 ; AVX512BW-NEXT: shlb %cl, %al
272 ; AVX512BW-NEXT: movzbl %al, %eax
273 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
274 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm3
275 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
276 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm4
277 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %eax
278 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
279 ; AVX512BW-NEXT: shlb %cl, %al
280 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2
281 ; AVX512BW-NEXT: movzbl %al, %eax
282 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
283 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %edx
284 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
285 ; AVX512BW-NEXT: shlb %cl, %dl
286 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
287 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %esi
288 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
289 ; AVX512BW-NEXT: shlb %cl, %sil
290 ; AVX512BW-NEXT: movzbl %dl, %ecx
291 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
292 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
293 ; AVX512BW-NEXT: movzbl %sil, %eax
294 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
295 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %edx
296 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
297 ; AVX512BW-NEXT: shlb %cl, %dl
298 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
299 ; AVX512BW-NEXT: movzbl %dl, %eax
300 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
301 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
302 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %eax
303 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
304 ; AVX512BW-NEXT: shlb %cl, %al
305 ; AVX512BW-NEXT: movzbl %al, %eax
306 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
307 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
308 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %eax
309 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
310 ; AVX512BW-NEXT: shlb %cl, %al
311 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
312 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %edx
313 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
314 ; AVX512BW-NEXT: shlb %cl, %dl
315 ; AVX512BW-NEXT: movzbl %al, %eax
316 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
317 ; AVX512BW-NEXT: movzbl %dl, %eax
318 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
319 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %edx
320 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
321 ; AVX512BW-NEXT: shlb %cl, %dl
322 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
323 ; AVX512BW-NEXT: movzbl %dl, %eax
324 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
325 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
326 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %eax
327 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
328 ; AVX512BW-NEXT: shlb %cl, %al
329 ; AVX512BW-NEXT: movzbl %al, %eax
330 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
331 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
332 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %eax
333 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
334 ; AVX512BW-NEXT: shlb %cl, %al
335 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
336 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %edx
337 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
338 ; AVX512BW-NEXT: shlb %cl, %dl
339 ; AVX512BW-NEXT: movzbl %al, %eax
340 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
341 ; AVX512BW-NEXT: movzbl %dl, %eax
342 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
343 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %edx
344 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
345 ; AVX512BW-NEXT: shlb %cl, %dl
346 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
347 ; AVX512BW-NEXT: movzbl %dl, %eax
348 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
349 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
350 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %eax
351 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
352 ; AVX512BW-NEXT: shlb %cl, %al
353 ; AVX512BW-NEXT: movzbl %al, %eax
354 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
355 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
356 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %eax
357 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
358 ; AVX512BW-NEXT: shlb %cl, %al
359 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
360 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %edx
361 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
362 ; AVX512BW-NEXT: shlb %cl, %dl
363 ; AVX512BW-NEXT: movzbl %al, %eax
364 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
365 ; AVX512BW-NEXT: movzbl %dl, %eax
366 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
367 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %edx
368 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
369 ; AVX512BW-NEXT: shlb %cl, %dl
370 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3
371 ; AVX512BW-NEXT: movzbl %dl, %eax
372 ; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx
373 ; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx
374 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
375 ; AVX512BW-NEXT: shlb %cl, %dl
376 ; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
377 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi
378 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
379 ; AVX512BW-NEXT: shlb %cl, %sil
380 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
381 ; AVX512BW-NEXT: movzbl %dl, %eax
382 ; AVX512BW-NEXT: movzbl %sil, %ecx
383 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
384 ; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx
385 ; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx
386 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
387 ; AVX512BW-NEXT: shlb %cl, %dl
388 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
389 ; AVX512BW-NEXT: movzbl %dl, %eax
390 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
391 ; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx
392 ; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
393 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
394 ; AVX512BW-NEXT: shlb %cl, %al
395 ; AVX512BW-NEXT: movzbl %al, %eax
396 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
397 ; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx
398 ; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
399 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
400 ; AVX512BW-NEXT: shlb %cl, %al
401 ; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx
402 ; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx
403 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
404 ; AVX512BW-NEXT: shlb %cl, %dl
405 ; AVX512BW-NEXT: movzbl %al, %eax
406 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
407 ; AVX512BW-NEXT: movzbl %dl, %eax
408 ; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx
409 ; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx
410 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
411 ; AVX512BW-NEXT: shlb %cl, %dl
412 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
413 ; AVX512BW-NEXT: movzbl %dl, %eax
414 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
415 ; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx
416 ; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
417 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
418 ; AVX512BW-NEXT: shlb %cl, %al
419 ; AVX512BW-NEXT: movzbl %al, %eax
420 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
421 ; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx
422 ; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
423 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
424 ; AVX512BW-NEXT: shlb %cl, %al
425 ; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx
426 ; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx
427 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
428 ; AVX512BW-NEXT: shlb %cl, %dl
429 ; AVX512BW-NEXT: movzbl %al, %eax
430 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
431 ; AVX512BW-NEXT: movzbl %dl, %eax
432 ; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx
433 ; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx
434 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
435 ; AVX512BW-NEXT: shlb %cl, %dl
436 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
437 ; AVX512BW-NEXT: movzbl %dl, %eax
438 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
439 ; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx
440 ; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
441 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
442 ; AVX512BW-NEXT: shlb %cl, %al
443 ; AVX512BW-NEXT: movzbl %al, %eax
444 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
445 ; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx
446 ; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
447 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
448 ; AVX512BW-NEXT: shlb %cl, %al
449 ; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx
450 ; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx
451 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
452 ; AVX512BW-NEXT: shlb %cl, %dl
453 ; AVX512BW-NEXT: movzbl %al, %eax
454 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
455 ; AVX512BW-NEXT: movzbl %dl, %eax
456 ; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx
457 ; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx
458 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
459 ; AVX512BW-NEXT: shlb %cl, %dl
460 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
461 ; AVX512BW-NEXT: movzbl %dl, %eax
462 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
463 ; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx
464 ; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
465 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
466 ; AVX512BW-NEXT: shlb %cl, %al
467 ; AVX512BW-NEXT: movzbl %al, %eax
468 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0
469 ; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
470 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
78 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
79 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
80 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
81 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
82 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
83 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
84 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
85 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
86 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
87 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
88 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
89 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
90 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
47191 ; AVX512BW-NEXT: retq
47292 %shift = shl <64 x i8> %a, %b
47393 ret <64 x i8> %shift
546166 ; AVX512BW-LABEL: splatvar_shift_v64i8:
547167 ; AVX512BW: # BB#0:
548168 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1
549 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm2
550 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
551 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm3
552 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %ecx
553 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
554 ; AVX512BW-NEXT: shlb %cl, %al
555 ; AVX512BW-NEXT: movzbl %al, %eax
556 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %edx
557 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %ecx
558 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
559 ; AVX512BW-NEXT: shlb %cl, %dl
560 ; AVX512BW-NEXT: movzbl %dl, %ecx
561 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
562 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
563 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
564 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %ecx
565 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
566 ; AVX512BW-NEXT: shlb %cl, %al
567 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx
568 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %ecx
569 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
570 ; AVX512BW-NEXT: shlb %cl, %dl
571 ; AVX512BW-NEXT: movzbl %al, %eax
572 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
573 ; AVX512BW-NEXT: movzbl %dl, %eax
574 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx
575 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %ecx
576 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
577 ; AVX512BW-NEXT: shlb %cl, %dl
578 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
579 ; AVX512BW-NEXT: movzbl %dl, %eax
580 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
581 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
582 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %ecx
583 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
584 ; AVX512BW-NEXT: shlb %cl, %al
585 ; AVX512BW-NEXT: movzbl %al, %eax
586 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
587 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
588 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %ecx
589 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
590 ; AVX512BW-NEXT: shlb %cl, %al
591 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx
592 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %ecx
593 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
594 ; AVX512BW-NEXT: shlb %cl, %dl
595 ; AVX512BW-NEXT: movzbl %al, %eax
596 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
597 ; AVX512BW-NEXT: movzbl %dl, %eax
598 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx
599 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %ecx
600 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
601 ; AVX512BW-NEXT: shlb %cl, %dl
602 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
603 ; AVX512BW-NEXT: movzbl %dl, %eax
604 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
605 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
606 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %ecx
607 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
608 ; AVX512BW-NEXT: shlb %cl, %al
609 ; AVX512BW-NEXT: movzbl %al, %eax
610 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
611 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
612 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %ecx
613 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
614 ; AVX512BW-NEXT: shlb %cl, %al
615 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx
616 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %ecx
617 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
618 ; AVX512BW-NEXT: shlb %cl, %dl
619 ; AVX512BW-NEXT: movzbl %al, %eax
620 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
621 ; AVX512BW-NEXT: movzbl %dl, %eax
622 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx
623 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %ecx
624 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
625 ; AVX512BW-NEXT: shlb %cl, %dl
626 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
627 ; AVX512BW-NEXT: movzbl %dl, %eax
628 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
629 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
630 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %ecx
631 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
632 ; AVX512BW-NEXT: shlb %cl, %al
633 ; AVX512BW-NEXT: movzbl %al, %eax
634 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
635 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
636 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %ecx
637 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
638 ; AVX512BW-NEXT: shlb %cl, %al
639 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx
640 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %ecx
641 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
642 ; AVX512BW-NEXT: shlb %cl, %dl
643 ; AVX512BW-NEXT: movzbl %al, %eax
644 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm2
645 ; AVX512BW-NEXT: movzbl %dl, %eax
646 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
647 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3
648 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
649 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm4
650 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx
651 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
652 ; AVX512BW-NEXT: shlb %cl, %al
653 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
654 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx
655 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
656 ; AVX512BW-NEXT: shlb %cl, %dl
657 ; AVX512BW-NEXT: movzbl %al, %eax
658 ; AVX512BW-NEXT: movzbl %dl, %ecx
659 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
660 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
661 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %eax
662 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx
663 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
664 ; AVX512BW-NEXT: shlb %cl, %al
665 ; AVX512BW-NEXT: movzbl %al, %eax
666 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
667 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %eax
668 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx
669 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
670 ; AVX512BW-NEXT: shlb %cl, %al
671 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %edx
672 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx
673 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
674 ; AVX512BW-NEXT: shlb %cl, %dl
675 ; AVX512BW-NEXT: movzbl %al, %eax
676 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
677 ; AVX512BW-NEXT: movzbl %dl, %eax
678 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %edx
679 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx
680 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
681 ; AVX512BW-NEXT: shlb %cl, %dl
682 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
683 ; AVX512BW-NEXT: movzbl %dl, %eax
684 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
685 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %eax
686 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx
687 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
688 ; AVX512BW-NEXT: shlb %cl, %al
689 ; AVX512BW-NEXT: movzbl %al, %eax
690 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
691 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %eax
692 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx
693 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
694 ; AVX512BW-NEXT: shlb %cl, %al
695 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %edx
696 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx
697 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
698 ; AVX512BW-NEXT: shlb %cl, %dl
699 ; AVX512BW-NEXT: movzbl %al, %eax
700 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
701 ; AVX512BW-NEXT: movzbl %dl, %eax
702 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %edx
703 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx
704 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
705 ; AVX512BW-NEXT: shlb %cl, %dl
706 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
707 ; AVX512BW-NEXT: movzbl %dl, %eax
708 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
709 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %eax
710 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx
711 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
712 ; AVX512BW-NEXT: shlb %cl, %al
713 ; AVX512BW-NEXT: movzbl %al, %eax
714 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
715 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %eax
716 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx
717 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
718 ; AVX512BW-NEXT: shlb %cl, %al
719 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %edx
720 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx
721 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
722 ; AVX512BW-NEXT: shlb %cl, %dl
723 ; AVX512BW-NEXT: movzbl %al, %eax
724 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
725 ; AVX512BW-NEXT: movzbl %dl, %eax
726 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %edx
727 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx
728 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
729 ; AVX512BW-NEXT: shlb %cl, %dl
730 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
731 ; AVX512BW-NEXT: movzbl %dl, %eax
732 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
733 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %eax
734 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx
735 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
736 ; AVX512BW-NEXT: shlb %cl, %al
737 ; AVX512BW-NEXT: movzbl %al, %eax
738 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
739 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %eax
740 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx
741 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
742 ; AVX512BW-NEXT: shlb %cl, %al
743 ; AVX512BW-NEXT: movzbl %al, %eax
744 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
745 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm3
746 ; AVX512BW-NEXT: vpextrb $1, %xmm3, %eax
747 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm4
748 ; AVX512BW-NEXT: vpextrb $1, %xmm4, %ecx
749 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
750 ; AVX512BW-NEXT: shlb %cl, %al
751 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2
752 ; AVX512BW-NEXT: movzbl %al, %eax
753 ; AVX512BW-NEXT: vpextrb $0, %xmm3, %edx
754 ; AVX512BW-NEXT: vpextrb $0, %xmm4, %ecx
755 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
756 ; AVX512BW-NEXT: shlb %cl, %dl
757 ; AVX512BW-NEXT: vpextrb $2, %xmm3, %esi
758 ; AVX512BW-NEXT: vpextrb $2, %xmm4, %ecx
759 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
760 ; AVX512BW-NEXT: shlb %cl, %sil
761 ; AVX512BW-NEXT: movzbl %dl, %ecx
762 ; AVX512BW-NEXT: vmovd %ecx, %xmm5
763 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
764 ; AVX512BW-NEXT: movzbl %sil, %eax
765 ; AVX512BW-NEXT: vpextrb $3, %xmm3, %edx
766 ; AVX512BW-NEXT: vpextrb $3, %xmm4, %ecx
767 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
768 ; AVX512BW-NEXT: shlb %cl, %dl
769 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
770 ; AVX512BW-NEXT: movzbl %dl, %eax
771 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
772 ; AVX512BW-NEXT: vpextrb $4, %xmm3, %eax
773 ; AVX512BW-NEXT: vpextrb $4, %xmm4, %ecx
774 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
775 ; AVX512BW-NEXT: shlb %cl, %al
776 ; AVX512BW-NEXT: movzbl %al, %eax
777 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
778 ; AVX512BW-NEXT: vpextrb $5, %xmm3, %eax
779 ; AVX512BW-NEXT: vpextrb $5, %xmm4, %ecx
780 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
781 ; AVX512BW-NEXT: shlb %cl, %al
782 ; AVX512BW-NEXT: vpextrb $6, %xmm3, %edx
783 ; AVX512BW-NEXT: vpextrb $6, %xmm4, %ecx
784 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
785 ; AVX512BW-NEXT: shlb %cl, %dl
786 ; AVX512BW-NEXT: movzbl %al, %eax
787 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
788 ; AVX512BW-NEXT: movzbl %dl, %eax
789 ; AVX512BW-NEXT: vpextrb $7, %xmm3, %edx
790 ; AVX512BW-NEXT: vpextrb $7, %xmm4, %ecx
791 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
792 ; AVX512BW-NEXT: shlb %cl, %dl
793 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
794 ; AVX512BW-NEXT: movzbl %dl, %eax
795 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
796 ; AVX512BW-NEXT: vpextrb $8, %xmm3, %eax
797 ; AVX512BW-NEXT: vpextrb $8, %xmm4, %ecx
798 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
799 ; AVX512BW-NEXT: shlb %cl, %al
800 ; AVX512BW-NEXT: movzbl %al, %eax
801 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
802 ; AVX512BW-NEXT: vpextrb $9, %xmm3, %eax
803 ; AVX512BW-NEXT: vpextrb $9, %xmm4, %ecx
804 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
805 ; AVX512BW-NEXT: shlb %cl, %al
806 ; AVX512BW-NEXT: vpextrb $10, %xmm3, %edx
807 ; AVX512BW-NEXT: vpextrb $10, %xmm4, %ecx
808 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
809 ; AVX512BW-NEXT: shlb %cl, %dl
810 ; AVX512BW-NEXT: movzbl %al, %eax
811 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
812 ; AVX512BW-NEXT: movzbl %dl, %eax
813 ; AVX512BW-NEXT: vpextrb $11, %xmm3, %edx
814 ; AVX512BW-NEXT: vpextrb $11, %xmm4, %ecx
815 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
816 ; AVX512BW-NEXT: shlb %cl, %dl
817 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
818 ; AVX512BW-NEXT: movzbl %dl, %eax
819 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
820 ; AVX512BW-NEXT: vpextrb $12, %xmm3, %eax
821 ; AVX512BW-NEXT: vpextrb $12, %xmm4, %ecx
822 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
823 ; AVX512BW-NEXT: shlb %cl, %al
824 ; AVX512BW-NEXT: movzbl %al, %eax
825 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
826 ; AVX512BW-NEXT: vpextrb $13, %xmm3, %eax
827 ; AVX512BW-NEXT: vpextrb $13, %xmm4, %ecx
828 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
829 ; AVX512BW-NEXT: shlb %cl, %al
830 ; AVX512BW-NEXT: vpextrb $14, %xmm3, %edx
831 ; AVX512BW-NEXT: vpextrb $14, %xmm4, %ecx
832 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
833 ; AVX512BW-NEXT: shlb %cl, %dl
834 ; AVX512BW-NEXT: movzbl %al, %eax
835 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
836 ; AVX512BW-NEXT: movzbl %dl, %eax
837 ; AVX512BW-NEXT: vpextrb $15, %xmm3, %edx
838 ; AVX512BW-NEXT: vpextrb $15, %xmm4, %ecx
839 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
840 ; AVX512BW-NEXT: shlb %cl, %dl
841 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm5, %xmm3
842 ; AVX512BW-NEXT: movzbl %dl, %eax
843 ; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx
844 ; AVX512BW-NEXT: vpextrb $1, %xmm1, %ecx
845 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
846 ; AVX512BW-NEXT: shlb %cl, %dl
847 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi
848 ; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx
849 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
850 ; AVX512BW-NEXT: shlb %cl, %sil
851 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
852 ; AVX512BW-NEXT: movzbl %dl, %eax
853 ; AVX512BW-NEXT: movzbl %sil, %ecx
854 ; AVX512BW-NEXT: vmovd %ecx, %xmm4
855 ; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx
856 ; AVX512BW-NEXT: vpextrb $2, %xmm1, %ecx
857 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
858 ; AVX512BW-NEXT: shlb %cl, %dl
859 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
860 ; AVX512BW-NEXT: movzbl %dl, %eax
861 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
862 ; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
863 ; AVX512BW-NEXT: vpextrb $3, %xmm1, %ecx
864 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
865 ; AVX512BW-NEXT: shlb %cl, %al
866 ; AVX512BW-NEXT: movzbl %al, %eax
867 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
868 ; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
869 ; AVX512BW-NEXT: vpextrb $4, %xmm1, %ecx
870 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
871 ; AVX512BW-NEXT: shlb %cl, %al
872 ; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx
873 ; AVX512BW-NEXT: vpextrb $5, %xmm1, %ecx
874 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
875 ; AVX512BW-NEXT: shlb %cl, %dl
876 ; AVX512BW-NEXT: movzbl %al, %eax
877 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
878 ; AVX512BW-NEXT: movzbl %dl, %eax
879 ; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx
880 ; AVX512BW-NEXT: vpextrb $6, %xmm1, %ecx
881 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
882 ; AVX512BW-NEXT: shlb %cl, %dl
883 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
884 ; AVX512BW-NEXT: movzbl %dl, %eax
885 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
886 ; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
887 ; AVX512BW-NEXT: vpextrb $7, %xmm1, %ecx
888 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
889 ; AVX512BW-NEXT: shlb %cl, %al
890 ; AVX512BW-NEXT: movzbl %al, %eax
891 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
892 ; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
893 ; AVX512BW-NEXT: vpextrb $8, %xmm1, %ecx
894 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
895 ; AVX512BW-NEXT: shlb %cl, %al
896 ; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx
897 ; AVX512BW-NEXT: vpextrb $9, %xmm1, %ecx
898 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
899 ; AVX512BW-NEXT: shlb %cl, %dl
900 ; AVX512BW-NEXT: movzbl %al, %eax
901 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
902 ; AVX512BW-NEXT: movzbl %dl, %eax
903 ; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx
904 ; AVX512BW-NEXT: vpextrb $10, %xmm1, %ecx
905 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
906 ; AVX512BW-NEXT: shlb %cl, %dl
907 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
908 ; AVX512BW-NEXT: movzbl %dl, %eax
909 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
910 ; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
911 ; AVX512BW-NEXT: vpextrb $11, %xmm1, %ecx
912 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
913 ; AVX512BW-NEXT: shlb %cl, %al
914 ; AVX512BW-NEXT: movzbl %al, %eax
915 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
916 ; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
917 ; AVX512BW-NEXT: vpextrb $12, %xmm1, %ecx
918 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
919 ; AVX512BW-NEXT: shlb %cl, %al
920 ; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx
921 ; AVX512BW-NEXT: vpextrb $13, %xmm1, %ecx
922 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
923 ; AVX512BW-NEXT: shlb %cl, %dl
924 ; AVX512BW-NEXT: movzbl %al, %eax
925 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
926 ; AVX512BW-NEXT: movzbl %dl, %eax
927 ; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx
928 ; AVX512BW-NEXT: vpextrb $14, %xmm1, %ecx
929 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
930 ; AVX512BW-NEXT: shlb %cl, %dl
931 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
932 ; AVX512BW-NEXT: movzbl %dl, %eax
933 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
934 ; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
935 ; AVX512BW-NEXT: vpextrb $15, %xmm1, %ecx
936 ; AVX512BW-NEXT: # kill: %CL %CL %ECX
937 ; AVX512BW-NEXT: shlb %cl, %al
938 ; AVX512BW-NEXT: movzbl %al, %eax
939 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm0
940 ; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
941 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
169 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
170 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
171 ; AVX512BW-NEXT: vpsllw $5, %zmm1, %zmm1
172 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
173 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
174 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
175 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
176 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
177 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
178 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
179 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
180 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
181 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
942182 ; AVX512BW-NEXT: retq
943183 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
944184 %shift = shl <64 x i8> %a, %splat
1012252 ;
1013253 ; AVX512BW-LABEL: constant_shift_v64i8:
1014254 ; AVX512BW: # BB#0:
1015 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1
1016 ; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax
1017 ; AVX512BW-NEXT: vmovd %eax, %xmm2
1018 ; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax
1019 ; AVX512BW-NEXT: addb %al, %al
1020 ; AVX512BW-NEXT: movzbl %al, %eax
1021 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
1022 ; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax
1023 ; AVX512BW-NEXT: shlb $2, %al
1024 ; AVX512BW-NEXT: movzbl %al, %eax
1025 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
1026 ; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax
1027 ; AVX512BW-NEXT: shlb $3, %al
1028 ; AVX512BW-NEXT: movzbl %al, %eax
1029 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
1030 ; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax
1031 ; AVX512BW-NEXT: shlb $4, %al
1032 ; AVX512BW-NEXT: movzbl %al, %eax
1033 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
1034 ; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax
1035 ; AVX512BW-NEXT: shlb $5, %al
1036 ; AVX512BW-NEXT: movzbl %al, %eax
1037 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
1038 ; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax
1039 ; AVX512BW-NEXT: shlb $6, %al
1040 ; AVX512BW-NEXT: movzbl %al, %eax
1041 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
1042 ; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax
1043 ; AVX512BW-NEXT: shlb $7, %al
1044 ; AVX512BW-NEXT: movzbl %al, %eax
1045 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
1046 ; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax
1047 ; AVX512BW-NEXT: shlb $7, %al
1048 ; AVX512BW-NEXT: movzbl %al, %eax
1049 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
1050 ; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax
1051 ; AVX512BW-NEXT: shlb $6, %al
1052 ; AVX512BW-NEXT: movzbl %al, %eax
1053 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
1054 ; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax
1055 ; AVX512BW-NEXT: shlb $5, %al
1056 ; AVX512BW-NEXT: movzbl %al, %eax
1057 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
1058 ; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax
1059 ; AVX512BW-NEXT: shlb $4, %al
1060 ; AVX512BW-NEXT: movzbl %al, %eax
1061 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
1062 ; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax
1063 ; AVX512BW-NEXT: shlb $3, %al
1064 ; AVX512BW-NEXT: movzbl %al, %eax
1065 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
1066 ; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax
1067 ; AVX512BW-NEXT: shlb $2, %al
1068 ; AVX512BW-NEXT: movzbl %al, %eax
1069 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
1070 ; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax
1071 ; AVX512BW-NEXT: addb %al, %al
1072 ; AVX512BW-NEXT: movzbl %al, %eax
1073 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
1074 ; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax
1075 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
1076 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
1077 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
1078 ; AVX512BW-NEXT: vmovd %eax, %xmm3
1079 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
1080 ; AVX512BW-NEXT: addb %al, %al
1081 ; AVX512BW-NEXT: movzbl %al, %eax
1082 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1083 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
1084 ; AVX512BW-NEXT: shlb $2, %al
1085 ; AVX512BW-NEXT: movzbl %al, %eax
1086 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1087 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
1088 ; AVX512BW-NEXT: shlb $3, %al
1089 ; AVX512BW-NEXT: movzbl %al, %eax
1090 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1091 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
1092 ; AVX512BW-NEXT: shlb $4, %al
1093 ; AVX512BW-NEXT: movzbl %al, %eax
1094 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1095 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
1096 ; AVX512BW-NEXT: shlb $5, %al
1097 ; AVX512BW-NEXT: movzbl %al, %eax
1098 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1099 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
1100 ; AVX512BW-NEXT: shlb $6, %al
1101 ; AVX512BW-NEXT: movzbl %al, %eax
1102 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1103 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
1104 ; AVX512BW-NEXT: shlb $7, %al
1105 ; AVX512BW-NEXT: movzbl %al, %eax
1106 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1107 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
1108 ; AVX512BW-NEXT: shlb $7, %al
1109 ; AVX512BW-NEXT: movzbl %al, %eax
1110 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1111 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
1112 ; AVX512BW-NEXT: shlb $6, %al
1113 ; AVX512BW-NEXT: movzbl %al, %eax
1114 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1115 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
1116 ; AVX512BW-NEXT: shlb $5, %al
1117 ; AVX512BW-NEXT: movzbl %al, %eax
1118 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1119 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
1120 ; AVX512BW-NEXT: shlb $4, %al
1121 ; AVX512BW-NEXT: movzbl %al, %eax
1122 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1123 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
1124 ; AVX512BW-NEXT: shlb $3, %al
1125 ; AVX512BW-NEXT: movzbl %al, %eax
1126 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1127 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
1128 ; AVX512BW-NEXT: shlb $2, %al
1129 ; AVX512BW-NEXT: movzbl %al, %eax
1130 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1131 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
1132 ; AVX512BW-NEXT: addb %al, %al
1133 ; AVX512BW-NEXT: movzbl %al, %eax
1134 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1135 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
1136 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
1137 ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
1138 ; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2
1139 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax
1140 ; AVX512BW-NEXT: vmovd %eax, %xmm3
1141 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
1142 ; AVX512BW-NEXT: addb %al, %al
1143 ; AVX512BW-NEXT: movzbl %al, %eax
1144 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1145 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax
1146 ; AVX512BW-NEXT: shlb $2, %al
1147 ; AVX512BW-NEXT: movzbl %al, %eax
1148 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1149 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax
1150 ; AVX512BW-NEXT: shlb $3, %al
1151 ; AVX512BW-NEXT: movzbl %al, %eax
1152 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1153 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax
1154 ; AVX512BW-NEXT: shlb $4, %al
1155 ; AVX512BW-NEXT: movzbl %al, %eax
1156 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1157 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
1158 ; AVX512BW-NEXT: shlb $5, %al
1159 ; AVX512BW-NEXT: movzbl %al, %eax
1160 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1161 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax
1162 ; AVX512BW-NEXT: shlb $6, %al
1163 ; AVX512BW-NEXT: movzbl %al, %eax
1164 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1165 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax
1166 ; AVX512BW-NEXT: shlb $7, %al
1167 ; AVX512BW-NEXT: movzbl %al, %eax
1168 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1169 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax
1170 ; AVX512BW-NEXT: shlb $7, %al
1171 ; AVX512BW-NEXT: movzbl %al, %eax
1172 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1173 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
1174 ; AVX512BW-NEXT: shlb $6, %al
1175 ; AVX512BW-NEXT: movzbl %al, %eax
1176 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1177 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax
1178 ; AVX512BW-NEXT: shlb $5, %al
1179 ; AVX512BW-NEXT: movzbl %al, %eax
1180 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1181 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax
1182 ; AVX512BW-NEXT: shlb $4, %al
1183 ; AVX512BW-NEXT: movzbl %al, %eax
1184 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1185 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax
1186 ; AVX512BW-NEXT: shlb $3, %al
1187 ; AVX512BW-NEXT: movzbl %al, %eax
1188 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1189 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
1190 ; AVX512BW-NEXT: shlb $2, %al
1191 ; AVX512BW-NEXT: movzbl %al, %eax
1192 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1193 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax
1194 ; AVX512BW-NEXT: addb %al, %al
1195 ; AVX512BW-NEXT: movzbl %al, %eax
1196 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1197 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax
1198 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2
1199 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
1200 ; AVX512BW-NEXT: vmovd %eax, %xmm3
1201 ; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax
1202 ; AVX512BW-NEXT: addb %al, %al
1203 ; AVX512BW-NEXT: movzbl %al, %eax
1204 ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1205 ; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax
1206 ; AVX512BW-NEXT: shlb $2, %al
1207 ; AVX512BW-NEXT: movzbl %al, %eax
1208 ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1209 ; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax
1210 ; AVX512BW-NEXT: shlb $3, %al
1211 ; AVX512BW-NEXT: movzbl %al, %eax
1212 ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1213 ; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax
1214 ; AVX512BW-NEXT: shlb $4, %al
1215 ; AVX512BW-NEXT: movzbl %al, %eax
1216 ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1217 ; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
1218 ; AVX512BW-NEXT: shlb $5, %al
1219 ; AVX512BW-NEXT: movzbl %al, %eax
1220 ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1221 ; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax
1222 ; AVX512BW-NEXT: shlb $6, %al
1223 ; AVX512BW-NEXT: movzbl %al, %eax
1224 ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1225 ; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax
1226 ; AVX512BW-NEXT: shlb $7, %al
1227 ; AVX512BW-NEXT: movzbl %al, %eax
1228 ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1229 ; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
1230 ; AVX512BW-NEXT: shlb $7, %al
1231 ; AVX512BW-NEXT: movzbl %al, %eax
1232 ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1233 ; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
1234 ; AVX512BW-NEXT: shlb $6, %al
1235 ; AVX512BW-NEXT: movzbl %al, %eax
1236 ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1237 ; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax
1238 ; AVX512BW-NEXT: shlb $5, %al
1239 ; AVX512BW-NEXT: movzbl %al, %eax
1240 ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1241 ; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax
1242 ; AVX512BW-NEXT: shlb $4, %al
1243 ; AVX512BW-NEXT: movzbl %al, %eax
1244 ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1245 ; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax
1246 ; AVX512BW-NEXT: shlb $3, %al
1247 ; AVX512BW-NEXT: movzbl %al, %eax
1248 ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1249 ; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax
1250 ; AVX512BW-NEXT: shlb $2, %al
1251 ; AVX512BW-NEXT: movzbl %al, %eax
1252 ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1253 ; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax
1254 ; AVX512BW-NEXT: addb %al, %al
1255 ; AVX512BW-NEXT: movzbl %al, %eax
1256 ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1257 ; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax
1258 ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0
1259 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1260 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
255 ; AVX512BW-NEXT: vpsllw $5, {{.*}}(%rip), %zmm1
256 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
257 ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
258 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
259 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
260 ; AVX512BW-NEXT: vpsllw $2, %zmm0, %zmm2
261 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2
262 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
263 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
264 ; AVX512BW-NEXT: vmovdqu8 %zmm2, %zmm0 {%k1}
265 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm1, %zmm1
266 ; AVX512BW-NEXT: vpmovb2m %zmm1, %k1
267 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm0, %zmm0 {%k1}
1261268 ; AVX512BW-NEXT: retq
1262269 %shift = shl <64 x i8> %a,
1263270 ret <64 x i8> %shift