llvm.org GIT mirror llvm / 4c690f3
[X86] Teach X86FixupBWInsts to promote MOV8rr/MOV16rr to MOV32rr. This re-applies r268760, reverted in r268794. Fixes http://llvm.org/PR27670 The original imp-defs assertion was way overzealous: forward all implicit operands, except imp-defs of the new super-reg def (r268787 for GR64, but also possible for GR16->GR32), or imp-uses of the new super-reg use. While there, mark the source use as Undef, and add an imp-use of the old source reg: that should cover any case of dead super-regs. At the stage the pass runs, flags are unlikely to matter anyway; still, let's be as correct as possible. Also add MIR tests for the various interesting cases. Original commit message: Codesize is less (16) or equal (8), and we avoid partial dependencies. Differential Revision: http://reviews.llvm.org/D19999 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268831 91177308-0d34-0410-b5e6-96231b3b80d8 Ahmed Bougacha 3 years ago
18 changed file(s) with 503 addition(s) and 227 deletion(s). Raw diff Collapse all Expand all
8989 /// OK, otherwise return nullptr.
9090 MachineInstr *tryReplaceLoad(unsigned New32BitOpcode, MachineInstr *MI) const;
9191
92 /// Change the MachineInstr \p MI into the equivalent 32-bit copy if it is
93 /// safe to do so. Return the replacement instruction if OK, otherwise return
94 /// nullptr.
95 MachineInstr *tryReplaceCopy(MachineInstr *MI) const;
96
9297 public:
9398 static char ID;
9499
225230 return MIB;
226231 }
227232
233 MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
234 assert(MI->getNumExplicitOperands() == 2);
235 auto &OldDest = MI->getOperand(0);
236 auto &OldSrc = MI->getOperand(1);
237
238 unsigned NewDestReg;
239 if (!getSuperRegDestIfDead(MI, NewDestReg))
240 return nullptr;
241
242 unsigned NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32);
243
244 // This is only correct if we access the same subregister index: otherwise,
245 // we could try to replace "movb %ah, %al" with "movl %eax, %eax".
246 auto *TRI = &TII->getRegisterInfo();
247 if (TRI->getSubRegIndex(NewSrcReg, OldSrc.getReg()) !=
248 TRI->getSubRegIndex(NewDestReg, OldDest.getReg()))
249 return nullptr;
250
251 // Safe to change the instruction.
252 // Don't set src flags, as we don't know if we're also killing the superreg.
253 // However, the superregister might not be defined; make it explicit that
254 // we don't care about the higher bits by reading it as Undef, and adding
255 // an imp-use on the original subregister.
256 MachineInstrBuilder MIB =
257 BuildMI(*MF, MI->getDebugLoc(), TII->get(X86::MOV32rr), NewDestReg)
258 .addReg(NewSrcReg, RegState::Undef)
259 .addReg(OldSrc.getReg(), RegState::Implicit);
260
261 // Drop imp-defs/uses that would be redundant with the new def/use.
262 for (auto &Op : MI->implicit_operands())
263 if (Op.getReg() != (Op.isDef() ? NewDestReg : NewSrcReg))
264 MIB.addOperand(Op);
265
266 return MIB;
267 }
268
228269 void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
229270 MachineBasicBlock &MBB) {
230271
271312 NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI);
272313 break;
273314
315 case X86::MOV8rr:
316 case X86::MOV16rr:
317 // Always try to replace 8/16 bit copies with a 32 bit copy.
318 // Code size is either less (16) or equal (8), and there is sometimes a
319 // perf advantage from eliminating a false dependence on the upper portion
320 // of the register.
321 NewMI = tryReplaceCopy(MI);
322 break;
323
274324 default:
275325 // nothing to do here.
276326 break;
55 define i8 @f(i8 %v1, i8 %v2) nounwind {
66 entry:
77 ; CHECK: callq
8 ; CHECK: movb %{{.*}}, %al
8 ; CHECK: movl %{{.*}}, %eax
99 ; CHECK: mulb
1010 ; CHECK: mulb
1111 %rval = tail call i8 @bar() nounwind
3838 ; X64-LABEL: bar:
3939 ; X64: # BB#0:
4040 ; X64-NEXT: xorl %edx, %edx
41 ; X64-NEXT: movw %di, %ax
41 ; X64-NEXT: movl %edi, %eax
4242 ; X64-NEXT: divw %si
4343 ; X64-NEXT: andl $1, %eax
4444 ; X64-NEXT: retq
460460 ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
461461 ; KNL_X32-NEXT: movl %edi, (%esp)
462462 ; KNL_X32-NEXT: calll _test11
463 ; KNL_X32-NEXT: movb %al, %bl
463 ; KNL_X32-NEXT: movl %eax, %ebx
464464 ; KNL_X32-NEXT: movzbl %bl, %eax
465465 ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
466466 ; KNL_X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
8080 ; CHECK-NEXT: xorl %esi, %eax
8181 ; CHECK-NEXT: andl %esi, %edi
8282 ; CHECK-NEXT: orl %eax, %edi
83 ; CHECK-NEXT: movw %di, %ax
83 ; CHECK-NEXT: movl %edi, %eax
8484 ; CHECK-NEXT: retq
8585 %ma = bitcast i16 %x to <16 x i1>
8686 %mb = bitcast i16 %y to <16 x i1>
7171 ; CHECK-LABEL: select05:
7272 ; CHECK: ## BB#0:
7373 ; CHECK-NEXT: orl %esi, %edi
74 ; CHECK-NEXT: movb %dil, %al
74 ; CHECK-NEXT: movl %edi, %eax
7575 ; CHECK-NEXT: retq
7676 %mask = bitcast i8 %m to <8 x i1>
7777 %a = bitcast i8 %a.0 to <8 x i1>
101101 ; CHECK-LABEL: select06:
102102 ; CHECK: ## BB#0:
103103 ; CHECK-NEXT: andl %esi, %edi
104 ; CHECK-NEXT: movb %dil, %al
104 ; CHECK-NEXT: movl %edi, %eax
105105 ; CHECK-NEXT: retq
106106 %mask = bitcast i8 %m to <8 x i1>
107107 %a = bitcast i8 %a.0 to <8 x i1>
3535 ; CHECK-NEXT: xorl %esi, %eax
3636 ; CHECK-NEXT: andl %esi, %edi
3737 ; CHECK-NEXT: orl %eax, %edi
38 ; CHECK-NEXT: movb %dil, %al
38 ; CHECK-NEXT: movl %edi, %eax
3939 ; CHECK-NEXT: retq
4040 %ma = bitcast i8 %x to <8 x i1>
4141 %mb = bitcast i8 %y to <8 x i1>
249249 ; CMOV-DAG: movb $20, %al
250250 ; CMOV-DAG: movb $20, %dl
251251 ; CMOV: jl [[BB0:.LBB[0-9_]+]]
252 ; CMOV: movb %cl, %dl
252 ; CMOV: movl %ecx, %edx
253253 ; CMOV: [[BB0]]:
254254 ; CMOV: jg [[BB1:.LBB[0-9_]+]]
255 ; CMOV: movb %dl, %al
255 ; CMOV: movl %edx, %eax
256256 ; CMOV: [[BB1]]:
257257 ; CMOV: testl %edi, %edi
258258 ; CMOV: je [[BB2:.LBB[0-9_]+]]
259 ; CMOV: movb %dl, %al
259 ; CMOV: movl %edx, %eax
260260 ; CMOV: [[BB2]]:
261261 ; CMOV: movb %al, g8(%rip)
262262 ; CMOV: retq
0 ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
1 ; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=1 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWON64 %s
2 ; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=0 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWOFF64 %s
3 ; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=1 -mtriple=i386-- < %s | FileCheck --check-prefix=X32 --check-prefix=BWON32 %s
4 ; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=0 -mtriple=i386-- < %s | FileCheck --check-prefix=X32 --check-prefix=BWOFF32 %s
5
6 target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
7
8 define i8 @test_movb(i8 %a0) {
9 ; BWON64-LABEL: test_movb:
10 ; BWON64: # BB#0:
11 ; BWON64-NEXT: movl %edi, %eax
12 ; BWON64-NEXT: retq
13 ;
14 ; BWOFF64-LABEL: test_movb:
15 ; BWOFF64: # BB#0:
16 ; BWOFF64-NEXT: movb %dil, %al
17 ; BWOFF64-NEXT: retq
18 ;
19 ; X32-LABEL: test_movb:
20 ; X32: # BB#0:
21 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
22 ; X32-NEXT: retl
23 ret i8 %a0
24 }
25
26 define i16 @test_movw(i16 %a0) {
27 ; BWON64-LABEL: test_movw:
28 ; BWON64: # BB#0:
29 ; BWON64-NEXT: movl %edi, %eax
30 ; BWON64-NEXT: retq
31 ;
32 ; BWOFF64-LABEL: test_movw:
33 ; BWOFF64: # BB#0:
34 ; BWOFF64-NEXT: movw %di, %ax
35 ; BWOFF64-NEXT: retq
36 ;
37 ; BWON32-LABEL: test_movw:
38 ; BWON32: # BB#0:
39 ; BWON32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
40 ; BWON32-NEXT: retl
41 ;
42 ; BWOFF32-LABEL: test_movw:
43 ; BWOFF32: # BB#0:
44 ; BWOFF32-NEXT: movw {{[0-9]+}}(%esp), %ax
45 ; BWOFF32-NEXT: retl
46 ret i16 %a0
47 }
48
49 ; Verify we don't mess with H-reg copies (only generated in 32-bit mode).
50 define i8 @test_movb_hreg(i16 %a0) {
51 ; X64-LABEL: test_movb_hreg:
52 ; X64: # BB#0:
53 ; X64-NEXT: movl %edi, %eax
54 ; X64-NEXT: shrl $8, %eax
55 ; X64-NEXT: addb %dil, %al
56 ; X64-NEXT: retq
57 ;
58 ; X32-LABEL: test_movb_hreg:
59 ; X32: # BB#0:
60 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
61 ; X32-NEXT: addb %al, %ah
62 ; X32-NEXT: movb %ah, %al
63 ; X32-NEXT: retl
64 %tmp0 = trunc i16 %a0 to i8
65 %tmp1 = lshr i16 %a0, 8
66 %tmp2 = trunc i16 %tmp1 to i8
67 %tmp3 = add i8 %tmp0, %tmp2
68 ret i8 %tmp3
69 }
0 # RUN: llc -run-pass x86-fixup-bw-insts -mtriple=x86_64-- -o /dev/null %s 2>&1 | FileCheck %s
1
2 # Verify that we correctly deal with the flag edge cases when replacing
3 # copies by bigger copies, which is a pretty unusual transform.
4
5 --- |
6 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
7
8 define i8 @test_movb_killed(i8 %a0) {
9 ret i8 %a0
10 }
11
12 define i8 @test_movb_impuse(i8 %a0) {
13 ret i8 %a0
14 }
15
16 define i8 @test_movb_impdef_gr64(i8 %a0) {
17 ret i8 %a0
18 }
19
20 define i8 @test_movb_impdef_gr32(i8 %a0) {
21 ret i8 %a0
22 }
23
24 define i8 @test_movb_impdef_gr16(i8 %a0) {
25 ret i8 %a0
26 }
27
28 define i16 @test_movw_impdef_gr32(i16 %a0) {
29 ret i16 %a0
30 }
31
32 define i16 @test_movw_impdef_gr64(i16 %a0) {
33 ret i16 %a0
34 }
35
36 ...
37
38 ---
39 name: test_movb_killed
40 allVRegsAllocated: true
41 isSSA: false
42 tracksRegLiveness: true
43 liveins:
44 - { reg: '%edi' }
45 body: |
46 bb.0 (%ir-block.0):
47 liveins: %edi
48
49 ; CHECK: %eax = MOV32rr undef %edi, implicit %dil
50 %al = MOV8rr killed %dil
51 RETQ killed %al
52
53 ...
54
55 ---
56 name: test_movb_impuse
57 allVRegsAllocated: true
58 isSSA: false
59 tracksRegLiveness: true
60 liveins:
61 - { reg: '%edi' }
62 body: |
63 bb.0 (%ir-block.0):
64 liveins: %edi
65
66 ; CHECK: %eax = MOV32rr undef %edi, implicit %dil
67 %al = MOV8rr %dil, implicit %edi
68 RETQ killed %al
69
70 ...
71
72 ---
73 name: test_movb_impdef_gr64
74 allVRegsAllocated: true
75 isSSA: false
76 tracksRegLiveness: true
77 liveins:
78 - { reg: '%edi' }
79 body: |
80 bb.0 (%ir-block.0):
81 liveins: %edi
82
83 ; CHECK: %eax = MOV32rr undef %edi, implicit %dil, implicit-def %rax
84 %al = MOV8rr %dil, implicit-def %rax
85 RETQ killed %al
86
87 ...
88
89 ---
90 name: test_movb_impdef_gr32
91 allVRegsAllocated: true
92 isSSA: false
93 tracksRegLiveness: true
94 liveins:
95 - { reg: '%edi' }
96 body: |
97 bb.0 (%ir-block.0):
98 liveins: %edi
99
100 ; CHECK: %eax = MOV32rr undef %edi, implicit %dil
101 %al = MOV8rr %dil, implicit-def %eax
102 RETQ killed %al
103
104 ...
105
106 ---
107 name: test_movb_impdef_gr16
108 allVRegsAllocated: true
109 isSSA: false
110 tracksRegLiveness: true
111 liveins:
112 - { reg: '%edi' }
113 body: |
114 bb.0 (%ir-block.0):
115 liveins: %edi
116
117 ; CHECK: %eax = MOV32rr undef %edi, implicit %dil
118 %al = MOV8rr %dil, implicit-def %ax
119 RETQ killed %al
120
121 ...
122
123 ---
124 name: test_movw_impdef_gr32
125 allVRegsAllocated: true
126 isSSA: false
127 tracksRegLiveness: true
128 liveins:
129 - { reg: '%edi' }
130 body: |
131 bb.0 (%ir-block.0):
132 liveins: %edi
133
134 ; CHECK: %eax = MOV32rr undef %edi, implicit %di
135 %ax = MOV16rr %di, implicit-def %eax
136 RETQ killed %ax
137
138 ...
139
140 ---
141 name: test_movw_impdef_gr64
142 allVRegsAllocated: true
143 isSSA: false
144 tracksRegLiveness: true
145 liveins:
146 - { reg: '%edi' }
147 body: |
148 bb.0 (%ir-block.0):
149 liveins: %edi
150
151 ; CHECK: %eax = MOV32rr undef %edi, implicit %di, implicit-def %rax
152 %ax = MOV16rr %di, implicit-def %rax
153 RETQ killed %ax
154
155 ...
2020
2121 ; CHECK-LABEL: bar
2222 ; CHECK-NOT: cvt
23 ; CHECK: movb
23 ; CHECK: movl
2424 define zeroext i8 @bar(i8 zeroext %a) #0 {
2525 %conv = uitofp i8 %a to float
2626 %conv1 = fptoui float %conv to i8
5959 ; CHECK-NEXT: subb %sil, %dil
6060 ; CHECK-NEXT: andb %cl, %dl
6161 ; CHECK-NEXT: andb %dil, %dl
62 ; CHECK-NEXT: movb %dl, %al
62 ; CHECK-NEXT: movl %edx, %eax
6363 ; CHECK-NEXT: retq
6464 %t0 = sub i8 %x0, %x1
6565 %t1 = and i8 %x2, %t0
106106 ; CHECK-NEXT: subb %sil, %dil
107107 ; CHECK-NEXT: orb %cl, %dl
108108 ; CHECK-NEXT: orb %dil, %dl
109 ; CHECK-NEXT: movb %dl, %al
109 ; CHECK-NEXT: movl %edx, %eax
110110 ; CHECK-NEXT: retq
111111 %t0 = sub i8 %x0, %x1
112112 %t1 = or i8 %x2, %t0
153153 ; CHECK-NEXT: subb %sil, %dil
154154 ; CHECK-NEXT: xorb %cl, %dl
155155 ; CHECK-NEXT: xorb %dil, %dl
156 ; CHECK-NEXT: movb %dl, %al
156 ; CHECK-NEXT: movl %edx, %eax
157157 ; CHECK-NEXT: retq
158158 %t0 = sub i8 %x0, %x1
159159 %t1 = xor i8 %x2, %t0
11
22 ; This test should get one and only one register to register mov.
33 ; CHECK-LABEL: t:
4 ; CHECK: movw
5 ; CHECK-NOT: movw
4 ; CHECK: movl
5 ; CHECK-NOT: mov
66 ; CHECK: ret
77
88 define signext i16 @t() {
88 ; CHECK-LABEL: f:
99 ; CHECK: addb %dil, %dil
1010 ; CHECK-NEXT: orb $1, %dil
11 ; CHECK-NEXT: movb %dil, %al
11 ; CHECK-NEXT: movl %edi, %eax
1212 ; CHECK-NEXT: retq
1313 }
88 define i8 @test_bitreverse_i8(i8 %a) nounwind {
99 ; SSE-LABEL: test_bitreverse_i8:
1010 ; SSE: # BB#0:
11 ; SSE-NEXT: movb %dil, %al
11 ; SSE-NEXT: movl %edi, %eax
1212 ; SSE-NEXT: shlb $7, %al
13 ; SSE-NEXT: movb %dil, %cl
13 ; SSE-NEXT: movl %edi, %ecx
1414 ; SSE-NEXT: shlb $5, %cl
1515 ; SSE-NEXT: andb $64, %cl
16 ; SSE-NEXT: movb %dil, %dl
16 ; SSE-NEXT: movl %edi, %edx
1717 ; SSE-NEXT: shlb $3, %dl
1818 ; SSE-NEXT: andb $32, %dl
1919 ; SSE-NEXT: orb %cl, %dl
20 ; SSE-NEXT: movb %dil, %cl
20 ; SSE-NEXT: movl %edi, %ecx
2121 ; SSE-NEXT: addb %cl, %cl
2222 ; SSE-NEXT: andb $16, %cl
2323 ; SSE-NEXT: orb %dl, %cl
24 ; SSE-NEXT: movb %dil, %dl
24 ; SSE-NEXT: movl %edi, %edx
2525 ; SSE-NEXT: shrb %dl
2626 ; SSE-NEXT: andb $8, %dl
2727 ; SSE-NEXT: orb %cl, %dl
28 ; SSE-NEXT: movb %dil, %cl
28 ; SSE-NEXT: movl %edi, %ecx
2929 ; SSE-NEXT: shrb $3, %cl
3030 ; SSE-NEXT: andb $4, %cl
3131 ; SSE-NEXT: orb %dl, %cl
32 ; SSE-NEXT: movb %dil, %dl
32 ; SSE-NEXT: movl %edi, %edx
3333 ; SSE-NEXT: shrb $5, %dl
3434 ; SSE-NEXT: andb $2, %dl
3535 ; SSE-NEXT: orb %cl, %dl
3636 ; SSE-NEXT: shrb $7, %dil
3737 ; SSE-NEXT: orb %dl, %dil
3838 ; SSE-NEXT: orb %al, %dil
39 ; SSE-NEXT: movb %dil, %al
39 ; SSE-NEXT: movl %edi, %eax
4040 ; SSE-NEXT: retq
4141 ;
4242 ; AVX-LABEL: test_bitreverse_i8:
4343 ; AVX: # BB#0:
44 ; AVX-NEXT: movb %dil, %al
44 ; AVX-NEXT: movl %edi, %eax
4545 ; AVX-NEXT: shlb $7, %al
46 ; AVX-NEXT: movb %dil, %cl
46 ; AVX-NEXT: movl %edi, %ecx
4747 ; AVX-NEXT: shlb $5, %cl
4848 ; AVX-NEXT: andb $64, %cl
49 ; AVX-NEXT: movb %dil, %dl
49 ; AVX-NEXT: movl %edi, %edx
5050 ; AVX-NEXT: shlb $3, %dl
5151 ; AVX-NEXT: andb $32, %dl
5252 ; AVX-NEXT: orb %cl, %dl
53 ; AVX-NEXT: movb %dil, %cl
53 ; AVX-NEXT: movl %edi, %ecx
5454 ; AVX-NEXT: addb %cl, %cl
5555 ; AVX-NEXT: andb $16, %cl
5656 ; AVX-NEXT: orb %dl, %cl
57 ; AVX-NEXT: movb %dil, %dl
57 ; AVX-NEXT: movl %edi, %edx
5858 ; AVX-NEXT: shrb %dl
5959 ; AVX-NEXT: andb $8, %dl
6060 ; AVX-NEXT: orb %cl, %dl
61 ; AVX-NEXT: movb %dil, %cl
61 ; AVX-NEXT: movl %edi, %ecx
6262 ; AVX-NEXT: shrb $3, %cl
6363 ; AVX-NEXT: andb $4, %cl
6464 ; AVX-NEXT: orb %dl, %cl
65 ; AVX-NEXT: movb %dil, %dl
65 ; AVX-NEXT: movl %edi, %edx
6666 ; AVX-NEXT: shrb $5, %dl
6767 ; AVX-NEXT: andb $2, %dl
6868 ; AVX-NEXT: orb %cl, %dl
6969 ; AVX-NEXT: shrb $7, %dil
7070 ; AVX-NEXT: orb %dl, %dil
7171 ; AVX-NEXT: orb %al, %dil
72 ; AVX-NEXT: movb %dil, %al
72 ; AVX-NEXT: movl %edi, %eax
7373 ; AVX-NEXT: retq
7474 ;
7575 ; XOP-LABEL: test_bitreverse_i8:
335335 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
336336 ; AVX512BW-NEXT: shrl $8, %ecx
337337 ; AVX512BW-NEXT: addb %cl, %al
338 ; AVX512BW-NEXT: movb %al, %cl
338 ; AVX512BW-NEXT: movl %eax, %ecx
339339 ; AVX512BW-NEXT: shrb $7, %cl
340340 ; AVX512BW-NEXT: sarb $2, %al
341341 ; AVX512BW-NEXT: addb %cl, %al
345345 ; AVX512BW-NEXT: imull $-109, %ecx, %edx
346346 ; AVX512BW-NEXT: shrl $8, %edx
347347 ; AVX512BW-NEXT: addb %dl, %cl
348 ; AVX512BW-NEXT: movb %cl, %dl
348 ; AVX512BW-NEXT: movl %ecx, %edx
349349 ; AVX512BW-NEXT: shrb $7, %dl
350350 ; AVX512BW-NEXT: sarb $2, %cl
351351 ; AVX512BW-NEXT: addb %dl, %cl
357357 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
358358 ; AVX512BW-NEXT: shrl $8, %ecx
359359 ; AVX512BW-NEXT: addb %cl, %al
360 ; AVX512BW-NEXT: movb %al, %cl
360 ; AVX512BW-NEXT: movl %eax, %ecx
361361 ; AVX512BW-NEXT: shrb $7, %cl
362362 ; AVX512BW-NEXT: sarb $2, %al
363363 ; AVX512BW-NEXT: addb %cl, %al
368368 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
369369 ; AVX512BW-NEXT: shrl $8, %ecx
370370 ; AVX512BW-NEXT: addb %cl, %al
371 ; AVX512BW-NEXT: movb %al, %cl
371 ; AVX512BW-NEXT: movl %eax, %ecx
372372 ; AVX512BW-NEXT: shrb $7, %cl
373373 ; AVX512BW-NEXT: sarb $2, %al
374374 ; AVX512BW-NEXT: addb %cl, %al
379379 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
380380 ; AVX512BW-NEXT: shrl $8, %ecx
381381 ; AVX512BW-NEXT: addb %cl, %al
382 ; AVX512BW-NEXT: movb %al, %cl
382 ; AVX512BW-NEXT: movl %eax, %ecx
383383 ; AVX512BW-NEXT: shrb $7, %cl
384384 ; AVX512BW-NEXT: sarb $2, %al
385385 ; AVX512BW-NEXT: addb %cl, %al
390390 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
391391 ; AVX512BW-NEXT: shrl $8, %ecx
392392 ; AVX512BW-NEXT: addb %cl, %al
393 ; AVX512BW-NEXT: movb %al, %cl
393 ; AVX512BW-NEXT: movl %eax, %ecx
394394 ; AVX512BW-NEXT: shrb $7, %cl
395395 ; AVX512BW-NEXT: sarb $2, %al
396396 ; AVX512BW-NEXT: addb %cl, %al
401401 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
402402 ; AVX512BW-NEXT: shrl $8, %ecx
403403 ; AVX512BW-NEXT: addb %cl, %al
404 ; AVX512BW-NEXT: movb %al, %cl
404 ; AVX512BW-NEXT: movl %eax, %ecx
405405 ; AVX512BW-NEXT: shrb $7, %cl
406406 ; AVX512BW-NEXT: sarb $2, %al
407407 ; AVX512BW-NEXT: addb %cl, %al
412412 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
413413 ; AVX512BW-NEXT: shrl $8, %ecx
414414 ; AVX512BW-NEXT: addb %cl, %al
415 ; AVX512BW-NEXT: movb %al, %cl
415 ; AVX512BW-NEXT: movl %eax, %ecx
416416 ; AVX512BW-NEXT: shrb $7, %cl
417417 ; AVX512BW-NEXT: sarb $2, %al
418418 ; AVX512BW-NEXT: addb %cl, %al
423423 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
424424 ; AVX512BW-NEXT: shrl $8, %ecx
425425 ; AVX512BW-NEXT: addb %cl, %al
426 ; AVX512BW-NEXT: movb %al, %cl
426 ; AVX512BW-NEXT: movl %eax, %ecx
427427 ; AVX512BW-NEXT: shrb $7, %cl
428428 ; AVX512BW-NEXT: sarb $2, %al
429429 ; AVX512BW-NEXT: addb %cl, %al
434434 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
435435 ; AVX512BW-NEXT: shrl $8, %ecx
436436 ; AVX512BW-NEXT: addb %cl, %al
437 ; AVX512BW-NEXT: movb %al, %cl
437 ; AVX512BW-NEXT: movl %eax, %ecx
438438 ; AVX512BW-NEXT: shrb $7, %cl
439439 ; AVX512BW-NEXT: sarb $2, %al
440440 ; AVX512BW-NEXT: addb %cl, %al
445445 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
446446 ; AVX512BW-NEXT: shrl $8, %ecx
447447 ; AVX512BW-NEXT: addb %cl, %al
448 ; AVX512BW-NEXT: movb %al, %cl
448 ; AVX512BW-NEXT: movl %eax, %ecx
449449 ; AVX512BW-NEXT: shrb $7, %cl
450450 ; AVX512BW-NEXT: sarb $2, %al
451451 ; AVX512BW-NEXT: addb %cl, %al
456456 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
457457 ; AVX512BW-NEXT: shrl $8, %ecx
458458 ; AVX512BW-NEXT: addb %cl, %al
459 ; AVX512BW-NEXT: movb %al, %cl
459 ; AVX512BW-NEXT: movl %eax, %ecx
460460 ; AVX512BW-NEXT: shrb $7, %cl
461461 ; AVX512BW-NEXT: sarb $2, %al
462462 ; AVX512BW-NEXT: addb %cl, %al
467467 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
468468 ; AVX512BW-NEXT: shrl $8, %ecx
469469 ; AVX512BW-NEXT: addb %cl, %al
470 ; AVX512BW-NEXT: movb %al, %cl
470 ; AVX512BW-NEXT: movl %eax, %ecx
471471 ; AVX512BW-NEXT: shrb $7, %cl
472472 ; AVX512BW-NEXT: sarb $2, %al
473473 ; AVX512BW-NEXT: addb %cl, %al
478478 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
479479 ; AVX512BW-NEXT: shrl $8, %ecx
480480 ; AVX512BW-NEXT: addb %cl, %al
481 ; AVX512BW-NEXT: movb %al, %cl
481 ; AVX512BW-NEXT: movl %eax, %ecx
482482 ; AVX512BW-NEXT: shrb $7, %cl
483483 ; AVX512BW-NEXT: sarb $2, %al
484484 ; AVX512BW-NEXT: addb %cl, %al
489489 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
490490 ; AVX512BW-NEXT: shrl $8, %ecx
491491 ; AVX512BW-NEXT: addb %cl, %al
492 ; AVX512BW-NEXT: movb %al, %cl
492 ; AVX512BW-NEXT: movl %eax, %ecx
493493 ; AVX512BW-NEXT: shrb $7, %cl
494494 ; AVX512BW-NEXT: sarb $2, %al
495495 ; AVX512BW-NEXT: addb %cl, %al
500500 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
501501 ; AVX512BW-NEXT: shrl $8, %ecx
502502 ; AVX512BW-NEXT: addb %cl, %al
503 ; AVX512BW-NEXT: movb %al, %cl
503 ; AVX512BW-NEXT: movl %eax, %ecx
504504 ; AVX512BW-NEXT: shrb $7, %cl
505505 ; AVX512BW-NEXT: sarb $2, %al
506506 ; AVX512BW-NEXT: addb %cl, %al
512512 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
513513 ; AVX512BW-NEXT: shrl $8, %ecx
514514 ; AVX512BW-NEXT: addb %cl, %al
515 ; AVX512BW-NEXT: movb %al, %cl
515 ; AVX512BW-NEXT: movl %eax, %ecx
516516 ; AVX512BW-NEXT: shrb $7, %cl
517517 ; AVX512BW-NEXT: sarb $2, %al
518518 ; AVX512BW-NEXT: addb %cl, %al
522522 ; AVX512BW-NEXT: imull $-109, %ecx, %edx
523523 ; AVX512BW-NEXT: shrl $8, %edx
524524 ; AVX512BW-NEXT: addb %dl, %cl
525 ; AVX512BW-NEXT: movb %cl, %dl
525 ; AVX512BW-NEXT: movl %ecx, %edx
526526 ; AVX512BW-NEXT: shrb $7, %dl
527527 ; AVX512BW-NEXT: sarb $2, %cl
528528 ; AVX512BW-NEXT: addb %dl, %cl
534534 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
535535 ; AVX512BW-NEXT: shrl $8, %ecx
536536 ; AVX512BW-NEXT: addb %cl, %al
537 ; AVX512BW-NEXT: movb %al, %cl
537 ; AVX512BW-NEXT: movl %eax, %ecx
538538 ; AVX512BW-NEXT: shrb $7, %cl
539539 ; AVX512BW-NEXT: sarb $2, %al
540540 ; AVX512BW-NEXT: addb %cl, %al
545545 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
546546 ; AVX512BW-NEXT: shrl $8, %ecx
547547 ; AVX512BW-NEXT: addb %cl, %al
548 ; AVX512BW-NEXT: movb %al, %cl
548 ; AVX512BW-NEXT: movl %eax, %ecx
549549 ; AVX512BW-NEXT: shrb $7, %cl
550550 ; AVX512BW-NEXT: sarb $2, %al
551551 ; AVX512BW-NEXT: addb %cl, %al
556556 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
557557 ; AVX512BW-NEXT: shrl $8, %ecx
558558 ; AVX512BW-NEXT: addb %cl, %al
559 ; AVX512BW-NEXT: movb %al, %cl
559 ; AVX512BW-NEXT: movl %eax, %ecx
560560 ; AVX512BW-NEXT: shrb $7, %cl
561561 ; AVX512BW-NEXT: sarb $2, %al
562562 ; AVX512BW-NEXT: addb %cl, %al
567567 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
568568 ; AVX512BW-NEXT: shrl $8, %ecx
569569 ; AVX512BW-NEXT: addb %cl, %al
570 ; AVX512BW-NEXT: movb %al, %cl
570 ; AVX512BW-NEXT: movl %eax, %ecx
571571 ; AVX512BW-NEXT: shrb $7, %cl
572572 ; AVX512BW-NEXT: sarb $2, %al
573573 ; AVX512BW-NEXT: addb %cl, %al
578578 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
579579 ; AVX512BW-NEXT: shrl $8, %ecx
580580 ; AVX512BW-NEXT: addb %cl, %al
581 ; AVX512BW-NEXT: movb %al, %cl
581 ; AVX512BW-NEXT: movl %eax, %ecx
582582 ; AVX512BW-NEXT: shrb $7, %cl
583583 ; AVX512BW-NEXT: sarb $2, %al
584584 ; AVX512BW-NEXT: addb %cl, %al
589589 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
590590 ; AVX512BW-NEXT: shrl $8, %ecx
591591 ; AVX512BW-NEXT: addb %cl, %al
592 ; AVX512BW-NEXT: movb %al, %cl
592 ; AVX512BW-NEXT: movl %eax, %ecx
593593 ; AVX512BW-NEXT: shrb $7, %cl
594594 ; AVX512BW-NEXT: sarb $2, %al
595595 ; AVX512BW-NEXT: addb %cl, %al
600600 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
601601 ; AVX512BW-NEXT: shrl $8, %ecx
602602 ; AVX512BW-NEXT: addb %cl, %al
603 ; AVX512BW-NEXT: movb %al, %cl
603 ; AVX512BW-NEXT: movl %eax, %ecx
604604 ; AVX512BW-NEXT: shrb $7, %cl
605605 ; AVX512BW-NEXT: sarb $2, %al
606606 ; AVX512BW-NEXT: addb %cl, %al
611611 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
612612 ; AVX512BW-NEXT: shrl $8, %ecx
613613 ; AVX512BW-NEXT: addb %cl, %al
614 ; AVX512BW-NEXT: movb %al, %cl
614 ; AVX512BW-NEXT: movl %eax, %ecx
615615 ; AVX512BW-NEXT: shrb $7, %cl
616616 ; AVX512BW-NEXT: sarb $2, %al
617617 ; AVX512BW-NEXT: addb %cl, %al
622622 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
623623 ; AVX512BW-NEXT: shrl $8, %ecx
624624 ; AVX512BW-NEXT: addb %cl, %al
625 ; AVX512BW-NEXT: movb %al, %cl
625 ; AVX512BW-NEXT: movl %eax, %ecx
626626 ; AVX512BW-NEXT: shrb $7, %cl
627627 ; AVX512BW-NEXT: sarb $2, %al
628628 ; AVX512BW-NEXT: addb %cl, %al
633633 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
634634 ; AVX512BW-NEXT: shrl $8, %ecx
635635 ; AVX512BW-NEXT: addb %cl, %al
636 ; AVX512BW-NEXT: movb %al, %cl
636 ; AVX512BW-NEXT: movl %eax, %ecx
637637 ; AVX512BW-NEXT: shrb $7, %cl
638638 ; AVX512BW-NEXT: sarb $2, %al
639639 ; AVX512BW-NEXT: addb %cl, %al
644644 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
645645 ; AVX512BW-NEXT: shrl $8, %ecx
646646 ; AVX512BW-NEXT: addb %cl, %al
647 ; AVX512BW-NEXT: movb %al, %cl
647 ; AVX512BW-NEXT: movl %eax, %ecx
648648 ; AVX512BW-NEXT: shrb $7, %cl
649649 ; AVX512BW-NEXT: sarb $2, %al
650650 ; AVX512BW-NEXT: addb %cl, %al
655655 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
656656 ; AVX512BW-NEXT: shrl $8, %ecx
657657 ; AVX512BW-NEXT: addb %cl, %al
658 ; AVX512BW-NEXT: movb %al, %cl
658 ; AVX512BW-NEXT: movl %eax, %ecx
659659 ; AVX512BW-NEXT: shrb $7, %cl
660660 ; AVX512BW-NEXT: sarb $2, %al
661661 ; AVX512BW-NEXT: addb %cl, %al
666666 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
667667 ; AVX512BW-NEXT: shrl $8, %ecx
668668 ; AVX512BW-NEXT: addb %cl, %al
669 ; AVX512BW-NEXT: movb %al, %cl
669 ; AVX512BW-NEXT: movl %eax, %ecx
670670 ; AVX512BW-NEXT: shrb $7, %cl
671671 ; AVX512BW-NEXT: sarb $2, %al
672672 ; AVX512BW-NEXT: addb %cl, %al
677677 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
678678 ; AVX512BW-NEXT: shrl $8, %ecx
679679 ; AVX512BW-NEXT: addb %cl, %al
680 ; AVX512BW-NEXT: movb %al, %cl
680 ; AVX512BW-NEXT: movl %eax, %ecx
681681 ; AVX512BW-NEXT: shrb $7, %cl
682682 ; AVX512BW-NEXT: sarb $2, %al
683683 ; AVX512BW-NEXT: addb %cl, %al
690690 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
691691 ; AVX512BW-NEXT: shrl $8, %ecx
692692 ; AVX512BW-NEXT: addb %cl, %al
693 ; AVX512BW-NEXT: movb %al, %cl
693 ; AVX512BW-NEXT: movl %eax, %ecx
694694 ; AVX512BW-NEXT: shrb $7, %cl
695695 ; AVX512BW-NEXT: sarb $2, %al
696696 ; AVX512BW-NEXT: addb %cl, %al
700700 ; AVX512BW-NEXT: imull $-109, %ecx, %edx
701701 ; AVX512BW-NEXT: shrl $8, %edx
702702 ; AVX512BW-NEXT: addb %dl, %cl
703 ; AVX512BW-NEXT: movb %cl, %dl
703 ; AVX512BW-NEXT: movl %ecx, %edx
704704 ; AVX512BW-NEXT: shrb $7, %dl
705705 ; AVX512BW-NEXT: sarb $2, %cl
706706 ; AVX512BW-NEXT: addb %dl, %cl
712712 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
713713 ; AVX512BW-NEXT: shrl $8, %ecx
714714 ; AVX512BW-NEXT: addb %cl, %al
715 ; AVX512BW-NEXT: movb %al, %cl
715 ; AVX512BW-NEXT: movl %eax, %ecx
716716 ; AVX512BW-NEXT: shrb $7, %cl
717717 ; AVX512BW-NEXT: sarb $2, %al
718718 ; AVX512BW-NEXT: addb %cl, %al
723723 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
724724 ; AVX512BW-NEXT: shrl $8, %ecx
725725 ; AVX512BW-NEXT: addb %cl, %al
726 ; AVX512BW-NEXT: movb %al, %cl
726 ; AVX512BW-NEXT: movl %eax, %ecx
727727 ; AVX512BW-NEXT: shrb $7, %cl
728728 ; AVX512BW-NEXT: sarb $2, %al
729729 ; AVX512BW-NEXT: addb %cl, %al
734734 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
735735 ; AVX512BW-NEXT: shrl $8, %ecx
736736 ; AVX512BW-NEXT: addb %cl, %al
737 ; AVX512BW-NEXT: movb %al, %cl
737 ; AVX512BW-NEXT: movl %eax, %ecx
738738 ; AVX512BW-NEXT: shrb $7, %cl
739739 ; AVX512BW-NEXT: sarb $2, %al
740740 ; AVX512BW-NEXT: addb %cl, %al
745745 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
746746 ; AVX512BW-NEXT: shrl $8, %ecx
747747 ; AVX512BW-NEXT: addb %cl, %al
748 ; AVX512BW-NEXT: movb %al, %cl
748 ; AVX512BW-NEXT: movl %eax, %ecx
749749 ; AVX512BW-NEXT: shrb $7, %cl
750750 ; AVX512BW-NEXT: sarb $2, %al
751751 ; AVX512BW-NEXT: addb %cl, %al
756756 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
757757 ; AVX512BW-NEXT: shrl $8, %ecx
758758 ; AVX512BW-NEXT: addb %cl, %al
759 ; AVX512BW-NEXT: movb %al, %cl
759 ; AVX512BW-NEXT: movl %eax, %ecx
760760 ; AVX512BW-NEXT: shrb $7, %cl
761761 ; AVX512BW-NEXT: sarb $2, %al
762762 ; AVX512BW-NEXT: addb %cl, %al
767767 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
768768 ; AVX512BW-NEXT: shrl $8, %ecx
769769 ; AVX512BW-NEXT: addb %cl, %al
770 ; AVX512BW-NEXT: movb %al, %cl
770 ; AVX512BW-NEXT: movl %eax, %ecx
771771 ; AVX512BW-NEXT: shrb $7, %cl
772772 ; AVX512BW-NEXT: sarb $2, %al
773773 ; AVX512BW-NEXT: addb %cl, %al
778778 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
779779 ; AVX512BW-NEXT: shrl $8, %ecx
780780 ; AVX512BW-NEXT: addb %cl, %al
781 ; AVX512BW-NEXT: movb %al, %cl
781 ; AVX512BW-NEXT: movl %eax, %ecx
782782 ; AVX512BW-NEXT: shrb $7, %cl
783783 ; AVX512BW-NEXT: sarb $2, %al
784784 ; AVX512BW-NEXT: addb %cl, %al
789789 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
790790 ; AVX512BW-NEXT: shrl $8, %ecx
791791 ; AVX512BW-NEXT: addb %cl, %al
792 ; AVX512BW-NEXT: movb %al, %cl
792 ; AVX512BW-NEXT: movl %eax, %ecx
793793 ; AVX512BW-NEXT: shrb $7, %cl
794794 ; AVX512BW-NEXT: sarb $2, %al
795795 ; AVX512BW-NEXT: addb %cl, %al
800800 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
801801 ; AVX512BW-NEXT: shrl $8, %ecx
802802 ; AVX512BW-NEXT: addb %cl, %al
803 ; AVX512BW-NEXT: movb %al, %cl
803 ; AVX512BW-NEXT: movl %eax, %ecx
804804 ; AVX512BW-NEXT: shrb $7, %cl
805805 ; AVX512BW-NEXT: sarb $2, %al
806806 ; AVX512BW-NEXT: addb %cl, %al
811811 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
812812 ; AVX512BW-NEXT: shrl $8, %ecx
813813 ; AVX512BW-NEXT: addb %cl, %al
814 ; AVX512BW-NEXT: movb %al, %cl
814 ; AVX512BW-NEXT: movl %eax, %ecx
815815 ; AVX512BW-NEXT: shrb $7, %cl
816816 ; AVX512BW-NEXT: sarb $2, %al
817817 ; AVX512BW-NEXT: addb %cl, %al
822822 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
823823 ; AVX512BW-NEXT: shrl $8, %ecx
824824 ; AVX512BW-NEXT: addb %cl, %al
825 ; AVX512BW-NEXT: movb %al, %cl
825 ; AVX512BW-NEXT: movl %eax, %ecx
826826 ; AVX512BW-NEXT: shrb $7, %cl
827827 ; AVX512BW-NEXT: sarb $2, %al
828828 ; AVX512BW-NEXT: addb %cl, %al
833833 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
834834 ; AVX512BW-NEXT: shrl $8, %ecx
835835 ; AVX512BW-NEXT: addb %cl, %al
836 ; AVX512BW-NEXT: movb %al, %cl
836 ; AVX512BW-NEXT: movl %eax, %ecx
837837 ; AVX512BW-NEXT: shrb $7, %cl
838838 ; AVX512BW-NEXT: sarb $2, %al
839839 ; AVX512BW-NEXT: addb %cl, %al
844844 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
845845 ; AVX512BW-NEXT: shrl $8, %ecx
846846 ; AVX512BW-NEXT: addb %cl, %al
847 ; AVX512BW-NEXT: movb %al, %cl
847 ; AVX512BW-NEXT: movl %eax, %ecx
848848 ; AVX512BW-NEXT: shrb $7, %cl
849849 ; AVX512BW-NEXT: sarb $2, %al
850850 ; AVX512BW-NEXT: addb %cl, %al
855855 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
856856 ; AVX512BW-NEXT: shrl $8, %ecx
857857 ; AVX512BW-NEXT: addb %cl, %al
858 ; AVX512BW-NEXT: movb %al, %cl
858 ; AVX512BW-NEXT: movl %eax, %ecx
859859 ; AVX512BW-NEXT: shrb $7, %cl
860860 ; AVX512BW-NEXT: sarb $2, %al
861861 ; AVX512BW-NEXT: addb %cl, %al
866866 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
867867 ; AVX512BW-NEXT: shrl $8, %ecx
868868 ; AVX512BW-NEXT: addb %cl, %al
869 ; AVX512BW-NEXT: movb %al, %cl
869 ; AVX512BW-NEXT: movl %eax, %ecx
870870 ; AVX512BW-NEXT: shrb $7, %cl
871871 ; AVX512BW-NEXT: sarb $2, %al
872872 ; AVX512BW-NEXT: addb %cl, %al
876876 ; AVX512BW-NEXT: imull $-109, %ecx, %edx
877877 ; AVX512BW-NEXT: shrl $8, %edx
878878 ; AVX512BW-NEXT: addb %dl, %cl
879 ; AVX512BW-NEXT: movb %cl, %dl
879 ; AVX512BW-NEXT: movl %ecx, %edx
880880 ; AVX512BW-NEXT: shrb $7, %dl
881881 ; AVX512BW-NEXT: sarb $2, %cl
882882 ; AVX512BW-NEXT: addb %dl, %cl
888888 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
889889 ; AVX512BW-NEXT: shrl $8, %ecx
890890 ; AVX512BW-NEXT: addb %cl, %al
891 ; AVX512BW-NEXT: movb %al, %cl
891 ; AVX512BW-NEXT: movl %eax, %ecx
892892 ; AVX512BW-NEXT: shrb $7, %cl
893893 ; AVX512BW-NEXT: sarb $2, %al
894894 ; AVX512BW-NEXT: addb %cl, %al
899899 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
900900 ; AVX512BW-NEXT: shrl $8, %ecx
901901 ; AVX512BW-NEXT: addb %cl, %al
902 ; AVX512BW-NEXT: movb %al, %cl
902 ; AVX512BW-NEXT: movl %eax, %ecx
903903 ; AVX512BW-NEXT: shrb $7, %cl
904904 ; AVX512BW-NEXT: sarb $2, %al
905905 ; AVX512BW-NEXT: addb %cl, %al
910910 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
911911 ; AVX512BW-NEXT: shrl $8, %ecx
912912 ; AVX512BW-NEXT: addb %cl, %al
913 ; AVX512BW-NEXT: movb %al, %cl
913 ; AVX512BW-NEXT: movl %eax, %ecx
914914 ; AVX512BW-NEXT: shrb $7, %cl
915915 ; AVX512BW-NEXT: sarb $2, %al
916916 ; AVX512BW-NEXT: addb %cl, %al
921921 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
922922 ; AVX512BW-NEXT: shrl $8, %ecx
923923 ; AVX512BW-NEXT: addb %cl, %al
924 ; AVX512BW-NEXT: movb %al, %cl
924 ; AVX512BW-NEXT: movl %eax, %ecx
925925 ; AVX512BW-NEXT: shrb $7, %cl
926926 ; AVX512BW-NEXT: sarb $2, %al
927927 ; AVX512BW-NEXT: addb %cl, %al
932932 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
933933 ; AVX512BW-NEXT: shrl $8, %ecx
934934 ; AVX512BW-NEXT: addb %cl, %al
935 ; AVX512BW-NEXT: movb %al, %cl
935 ; AVX512BW-NEXT: movl %eax, %ecx
936936 ; AVX512BW-NEXT: shrb $7, %cl
937937 ; AVX512BW-NEXT: sarb $2, %al
938938 ; AVX512BW-NEXT: addb %cl, %al
943943 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
944944 ; AVX512BW-NEXT: shrl $8, %ecx
945945 ; AVX512BW-NEXT: addb %cl, %al
946 ; AVX512BW-NEXT: movb %al, %cl
946 ; AVX512BW-NEXT: movl %eax, %ecx
947947 ; AVX512BW-NEXT: shrb $7, %cl
948948 ; AVX512BW-NEXT: sarb $2, %al
949949 ; AVX512BW-NEXT: addb %cl, %al
954954 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
955955 ; AVX512BW-NEXT: shrl $8, %ecx
956956 ; AVX512BW-NEXT: addb %cl, %al
957 ; AVX512BW-NEXT: movb %al, %cl
957 ; AVX512BW-NEXT: movl %eax, %ecx
958958 ; AVX512BW-NEXT: shrb $7, %cl
959959 ; AVX512BW-NEXT: sarb $2, %al
960960 ; AVX512BW-NEXT: addb %cl, %al
965965 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
966966 ; AVX512BW-NEXT: shrl $8, %ecx
967967 ; AVX512BW-NEXT: addb %cl, %al
968 ; AVX512BW-NEXT: movb %al, %cl
968 ; AVX512BW-NEXT: movl %eax, %ecx
969969 ; AVX512BW-NEXT: shrb $7, %cl
970970 ; AVX512BW-NEXT: sarb $2, %al
971971 ; AVX512BW-NEXT: addb %cl, %al
976976 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
977977 ; AVX512BW-NEXT: shrl $8, %ecx
978978 ; AVX512BW-NEXT: addb %cl, %al
979 ; AVX512BW-NEXT: movb %al, %cl
979 ; AVX512BW-NEXT: movl %eax, %ecx
980980 ; AVX512BW-NEXT: shrb $7, %cl
981981 ; AVX512BW-NEXT: sarb $2, %al
982982 ; AVX512BW-NEXT: addb %cl, %al
987987 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
988988 ; AVX512BW-NEXT: shrl $8, %ecx
989989 ; AVX512BW-NEXT: addb %cl, %al
990 ; AVX512BW-NEXT: movb %al, %cl
990 ; AVX512BW-NEXT: movl %eax, %ecx
991991 ; AVX512BW-NEXT: shrb $7, %cl
992992 ; AVX512BW-NEXT: sarb $2, %al
993993 ; AVX512BW-NEXT: addb %cl, %al
998998 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
999999 ; AVX512BW-NEXT: shrl $8, %ecx
10001000 ; AVX512BW-NEXT: addb %cl, %al
1001 ; AVX512BW-NEXT: movb %al, %cl
1001 ; AVX512BW-NEXT: movl %eax, %ecx
10021002 ; AVX512BW-NEXT: shrb $7, %cl
10031003 ; AVX512BW-NEXT: sarb $2, %al
10041004 ; AVX512BW-NEXT: addb %cl, %al
10091009 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
10101010 ; AVX512BW-NEXT: shrl $8, %ecx
10111011 ; AVX512BW-NEXT: addb %cl, %al
1012 ; AVX512BW-NEXT: movb %al, %cl
1012 ; AVX512BW-NEXT: movl %eax, %ecx
10131013 ; AVX512BW-NEXT: shrb $7, %cl
10141014 ; AVX512BW-NEXT: sarb $2, %al
10151015 ; AVX512BW-NEXT: addb %cl, %al
10201020 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
10211021 ; AVX512BW-NEXT: shrl $8, %ecx
10221022 ; AVX512BW-NEXT: addb %cl, %al
1023 ; AVX512BW-NEXT: movb %al, %cl
1023 ; AVX512BW-NEXT: movl %eax, %ecx
10241024 ; AVX512BW-NEXT: shrb $7, %cl
10251025 ; AVX512BW-NEXT: sarb $2, %al
10261026 ; AVX512BW-NEXT: addb %cl, %al
10311031 ; AVX512BW-NEXT: imull $-109, %eax, %ecx
10321032 ; AVX512BW-NEXT: shrl $8, %ecx
10331033 ; AVX512BW-NEXT: addb %cl, %al
1034 ; AVX512BW-NEXT: movb %al, %cl
1034 ; AVX512BW-NEXT: movl %eax, %ecx
10351035 ; AVX512BW-NEXT: shrb $7, %cl
10361036 ; AVX512BW-NEXT: sarb $2, %al
10371037 ; AVX512BW-NEXT: addb %cl, %al
14881488 ; AVX512BW-NEXT: imull $-109, %edx, %eax
14891489 ; AVX512BW-NEXT: shrl $8, %eax
14901490 ; AVX512BW-NEXT: addb %dl, %al
1491 ; AVX512BW-NEXT: movb %al, %cl
1491 ; AVX512BW-NEXT: movl %eax, %ecx
14921492 ; AVX512BW-NEXT: shrb $7, %cl
14931493 ; AVX512BW-NEXT: sarb $2, %al
14941494 ; AVX512BW-NEXT: addb %cl, %al
15011501 ; AVX512BW-NEXT: imull $-109, %esi, %eax
15021502 ; AVX512BW-NEXT: shrl $8, %eax
15031503 ; AVX512BW-NEXT: addb %sil, %al
1504 ; AVX512BW-NEXT: movb %al, %cl
1504 ; AVX512BW-NEXT: movl %eax, %ecx
15051505 ; AVX512BW-NEXT: shrb $7, %cl
15061506 ; AVX512BW-NEXT: sarb $2, %al
15071507 ; AVX512BW-NEXT: addb %cl, %al
15151515 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
15161516 ; AVX512BW-NEXT: shrl $8, %eax
15171517 ; AVX512BW-NEXT: addb %cl, %al
1518 ; AVX512BW-NEXT: movb %al, %dl
1518 ; AVX512BW-NEXT: movl %eax, %edx
15191519 ; AVX512BW-NEXT: shrb $7, %dl
15201520 ; AVX512BW-NEXT: sarb $2, %al
15211521 ; AVX512BW-NEXT: addb %dl, %al
15281528 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
15291529 ; AVX512BW-NEXT: shrl $8, %eax
15301530 ; AVX512BW-NEXT: addb %cl, %al
1531 ; AVX512BW-NEXT: movb %al, %dl
1531 ; AVX512BW-NEXT: movl %eax, %edx
15321532 ; AVX512BW-NEXT: shrb $7, %dl
15331533 ; AVX512BW-NEXT: sarb $2, %al
15341534 ; AVX512BW-NEXT: addb %dl, %al
15411541 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
15421542 ; AVX512BW-NEXT: shrl $8, %eax
15431543 ; AVX512BW-NEXT: addb %cl, %al
1544 ; AVX512BW-NEXT: movb %al, %dl
1544 ; AVX512BW-NEXT: movl %eax, %edx
15451545 ; AVX512BW-NEXT: shrb $7, %dl
15461546 ; AVX512BW-NEXT: sarb $2, %al
15471547 ; AVX512BW-NEXT: addb %dl, %al
15541554 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
15551555 ; AVX512BW-NEXT: shrl $8, %eax
15561556 ; AVX512BW-NEXT: addb %cl, %al
1557 ; AVX512BW-NEXT: movb %al, %dl
1557 ; AVX512BW-NEXT: movl %eax, %edx
15581558 ; AVX512BW-NEXT: shrb $7, %dl
15591559 ; AVX512BW-NEXT: sarb $2, %al
15601560 ; AVX512BW-NEXT: addb %dl, %al
15671567 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
15681568 ; AVX512BW-NEXT: shrl $8, %eax
15691569 ; AVX512BW-NEXT: addb %cl, %al
1570 ; AVX512BW-NEXT: movb %al, %dl
1570 ; AVX512BW-NEXT: movl %eax, %edx
15711571 ; AVX512BW-NEXT: shrb $7, %dl
15721572 ; AVX512BW-NEXT: sarb $2, %al
15731573 ; AVX512BW-NEXT: addb %dl, %al
15801580 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
15811581 ; AVX512BW-NEXT: shrl $8, %eax
15821582 ; AVX512BW-NEXT: addb %cl, %al
1583 ; AVX512BW-NEXT: movb %al, %dl
1583 ; AVX512BW-NEXT: movl %eax, %edx
15841584 ; AVX512BW-NEXT: shrb $7, %dl
15851585 ; AVX512BW-NEXT: sarb $2, %al
15861586 ; AVX512BW-NEXT: addb %dl, %al
15931593 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
15941594 ; AVX512BW-NEXT: shrl $8, %eax
15951595 ; AVX512BW-NEXT: addb %cl, %al
1596 ; AVX512BW-NEXT: movb %al, %dl
1596 ; AVX512BW-NEXT: movl %eax, %edx
15971597 ; AVX512BW-NEXT: shrb $7, %dl
15981598 ; AVX512BW-NEXT: sarb $2, %al
15991599 ; AVX512BW-NEXT: addb %dl, %al
16061606 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
16071607 ; AVX512BW-NEXT: shrl $8, %eax
16081608 ; AVX512BW-NEXT: addb %cl, %al
1609 ; AVX512BW-NEXT: movb %al, %dl
1609 ; AVX512BW-NEXT: movl %eax, %edx
16101610 ; AVX512BW-NEXT: shrb $7, %dl
16111611 ; AVX512BW-NEXT: sarb $2, %al
16121612 ; AVX512BW-NEXT: addb %dl, %al
16191619 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
16201620 ; AVX512BW-NEXT: shrl $8, %eax
16211621 ; AVX512BW-NEXT: addb %cl, %al
1622 ; AVX512BW-NEXT: movb %al, %dl
1622 ; AVX512BW-NEXT: movl %eax, %edx
16231623 ; AVX512BW-NEXT: shrb $7, %dl
16241624 ; AVX512BW-NEXT: sarb $2, %al
16251625 ; AVX512BW-NEXT: addb %dl, %al
16321632 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
16331633 ; AVX512BW-NEXT: shrl $8, %eax
16341634 ; AVX512BW-NEXT: addb %cl, %al
1635 ; AVX512BW-NEXT: movb %al, %dl
1635 ; AVX512BW-NEXT: movl %eax, %edx
16361636 ; AVX512BW-NEXT: shrb $7, %dl
16371637 ; AVX512BW-NEXT: sarb $2, %al
16381638 ; AVX512BW-NEXT: addb %dl, %al
16451645 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
16461646 ; AVX512BW-NEXT: shrl $8, %eax
16471647 ; AVX512BW-NEXT: addb %cl, %al
1648 ; AVX512BW-NEXT: movb %al, %dl
1648 ; AVX512BW-NEXT: movl %eax, %edx
16491649 ; AVX512BW-NEXT: shrb $7, %dl
16501650 ; AVX512BW-NEXT: sarb $2, %al
16511651 ; AVX512BW-NEXT: addb %dl, %al
16581658 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
16591659 ; AVX512BW-NEXT: shrl $8, %eax
16601660 ; AVX512BW-NEXT: addb %cl, %al
1661 ; AVX512BW-NEXT: movb %al, %dl
1661 ; AVX512BW-NEXT: movl %eax, %edx
16621662 ; AVX512BW-NEXT: shrb $7, %dl
16631663 ; AVX512BW-NEXT: sarb $2, %al
16641664 ; AVX512BW-NEXT: addb %dl, %al
16711671 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
16721672 ; AVX512BW-NEXT: shrl $8, %eax
16731673 ; AVX512BW-NEXT: addb %cl, %al
1674 ; AVX512BW-NEXT: movb %al, %dl
1674 ; AVX512BW-NEXT: movl %eax, %edx
16751675 ; AVX512BW-NEXT: shrb $7, %dl
16761676 ; AVX512BW-NEXT: sarb $2, %al
16771677 ; AVX512BW-NEXT: addb %dl, %al
16841684 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
16851685 ; AVX512BW-NEXT: shrl $8, %eax
16861686 ; AVX512BW-NEXT: addb %cl, %al
1687 ; AVX512BW-NEXT: movb %al, %dl
1687 ; AVX512BW-NEXT: movl %eax, %edx
16881688 ; AVX512BW-NEXT: shrb $7, %dl
16891689 ; AVX512BW-NEXT: sarb $2, %al
16901690 ; AVX512BW-NEXT: addb %dl, %al
16981698 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
16991699 ; AVX512BW-NEXT: shrl $8, %eax
17001700 ; AVX512BW-NEXT: addb %cl, %al
1701 ; AVX512BW-NEXT: movb %al, %dl
1701 ; AVX512BW-NEXT: movl %eax, %edx
17021702 ; AVX512BW-NEXT: shrb $7, %dl
17031703 ; AVX512BW-NEXT: sarb $2, %al
17041704 ; AVX512BW-NEXT: addb %dl, %al
17101710 ; AVX512BW-NEXT: imull $-109, %edx, %eax
17111711 ; AVX512BW-NEXT: shrl $8, %eax
17121712 ; AVX512BW-NEXT: addb %dl, %al
1713 ; AVX512BW-NEXT: movb %al, %cl
1713 ; AVX512BW-NEXT: movl %eax, %ecx
17141714 ; AVX512BW-NEXT: shrb $7, %cl
17151715 ; AVX512BW-NEXT: sarb $2, %al
17161716 ; AVX512BW-NEXT: addb %cl, %al
17241724 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
17251725 ; AVX512BW-NEXT: shrl $8, %eax
17261726 ; AVX512BW-NEXT: addb %cl, %al
1727 ; AVX512BW-NEXT: movb %al, %dl
1727 ; AVX512BW-NEXT: movl %eax, %edx
17281728 ; AVX512BW-NEXT: shrb $7, %dl
17291729 ; AVX512BW-NEXT: sarb $2, %al
17301730 ; AVX512BW-NEXT: addb %dl, %al
17371737 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
17381738 ; AVX512BW-NEXT: shrl $8, %eax
17391739 ; AVX512BW-NEXT: addb %cl, %al
1740 ; AVX512BW-NEXT: movb %al, %dl
1740 ; AVX512BW-NEXT: movl %eax, %edx
17411741 ; AVX512BW-NEXT: shrb $7, %dl
17421742 ; AVX512BW-NEXT: sarb $2, %al
17431743 ; AVX512BW-NEXT: addb %dl, %al
17501750 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
17511751 ; AVX512BW-NEXT: shrl $8, %eax
17521752 ; AVX512BW-NEXT: addb %cl, %al
1753 ; AVX512BW-NEXT: movb %al, %dl
1753 ; AVX512BW-NEXT: movl %eax, %edx
17541754 ; AVX512BW-NEXT: shrb $7, %dl
17551755 ; AVX512BW-NEXT: sarb $2, %al
17561756 ; AVX512BW-NEXT: addb %dl, %al
17631763 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
17641764 ; AVX512BW-NEXT: shrl $8, %eax
17651765 ; AVX512BW-NEXT: addb %cl, %al
1766 ; AVX512BW-NEXT: movb %al, %dl
1766 ; AVX512BW-NEXT: movl %eax, %edx
17671767 ; AVX512BW-NEXT: shrb $7, %dl
17681768 ; AVX512BW-NEXT: sarb $2, %al
17691769 ; AVX512BW-NEXT: addb %dl, %al
17761776 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
17771777 ; AVX512BW-NEXT: shrl $8, %eax
17781778 ; AVX512BW-NEXT: addb %cl, %al
1779 ; AVX512BW-NEXT: movb %al, %dl
1779 ; AVX512BW-NEXT: movl %eax, %edx
17801780 ; AVX512BW-NEXT: shrb $7, %dl
17811781 ; AVX512BW-NEXT: sarb $2, %al
17821782 ; AVX512BW-NEXT: addb %dl, %al
17891789 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
17901790 ; AVX512BW-NEXT: shrl $8, %eax
17911791 ; AVX512BW-NEXT: addb %cl, %al
1792 ; AVX512BW-NEXT: movb %al, %dl
1792 ; AVX512BW-NEXT: movl %eax, %edx
17931793 ; AVX512BW-NEXT: shrb $7, %dl
17941794 ; AVX512BW-NEXT: sarb $2, %al
17951795 ; AVX512BW-NEXT: addb %dl, %al
18021802 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
18031803 ; AVX512BW-NEXT: shrl $8, %eax
18041804 ; AVX512BW-NEXT: addb %cl, %al
1805 ; AVX512BW-NEXT: movb %al, %dl
1805 ; AVX512BW-NEXT: movl %eax, %edx
18061806 ; AVX512BW-NEXT: shrb $7, %dl
18071807 ; AVX512BW-NEXT: sarb $2, %al
18081808 ; AVX512BW-NEXT: addb %dl, %al
18151815 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
18161816 ; AVX512BW-NEXT: shrl $8, %eax
18171817 ; AVX512BW-NEXT: addb %cl, %al
1818 ; AVX512BW-NEXT: movb %al, %dl
1818 ; AVX512BW-NEXT: movl %eax, %edx
18191819 ; AVX512BW-NEXT: shrb $7, %dl
18201820 ; AVX512BW-NEXT: sarb $2, %al
18211821 ; AVX512BW-NEXT: addb %dl, %al
18281828 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
18291829 ; AVX512BW-NEXT: shrl $8, %eax
18301830 ; AVX512BW-NEXT: addb %cl, %al
1831 ; AVX512BW-NEXT: movb %al, %dl
1831 ; AVX512BW-NEXT: movl %eax, %edx
18321832 ; AVX512BW-NEXT: shrb $7, %dl
18331833 ; AVX512BW-NEXT: sarb $2, %al
18341834 ; AVX512BW-NEXT: addb %dl, %al
18411841 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
18421842 ; AVX512BW-NEXT: shrl $8, %eax
18431843 ; AVX512BW-NEXT: addb %cl, %al
1844 ; AVX512BW-NEXT: movb %al, %dl
1844 ; AVX512BW-NEXT: movl %eax, %edx
18451845 ; AVX512BW-NEXT: shrb $7, %dl
18461846 ; AVX512BW-NEXT: sarb $2, %al
18471847 ; AVX512BW-NEXT: addb %dl, %al
18541854 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
18551855 ; AVX512BW-NEXT: shrl $8, %eax
18561856 ; AVX512BW-NEXT: addb %cl, %al
1857 ; AVX512BW-NEXT: movb %al, %dl
1857 ; AVX512BW-NEXT: movl %eax, %edx
18581858 ; AVX512BW-NEXT: shrb $7, %dl
18591859 ; AVX512BW-NEXT: sarb $2, %al
18601860 ; AVX512BW-NEXT: addb %dl, %al
18671867 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
18681868 ; AVX512BW-NEXT: shrl $8, %eax
18691869 ; AVX512BW-NEXT: addb %cl, %al
1870 ; AVX512BW-NEXT: movb %al, %dl
1870 ; AVX512BW-NEXT: movl %eax, %edx
18711871 ; AVX512BW-NEXT: shrb $7, %dl
18721872 ; AVX512BW-NEXT: sarb $2, %al
18731873 ; AVX512BW-NEXT: addb %dl, %al
18801880 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
18811881 ; AVX512BW-NEXT: shrl $8, %eax
18821882 ; AVX512BW-NEXT: addb %cl, %al
1883 ; AVX512BW-NEXT: movb %al, %dl
1883 ; AVX512BW-NEXT: movl %eax, %edx
18841884 ; AVX512BW-NEXT: shrb $7, %dl
18851885 ; AVX512BW-NEXT: sarb $2, %al
18861886 ; AVX512BW-NEXT: addb %dl, %al
18931893 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
18941894 ; AVX512BW-NEXT: shrl $8, %eax
18951895 ; AVX512BW-NEXT: addb %cl, %al
1896 ; AVX512BW-NEXT: movb %al, %dl
1896 ; AVX512BW-NEXT: movl %eax, %edx
18971897 ; AVX512BW-NEXT: shrb $7, %dl
18981898 ; AVX512BW-NEXT: sarb $2, %al
18991899 ; AVX512BW-NEXT: addb %dl, %al
19081908 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
19091909 ; AVX512BW-NEXT: shrl $8, %eax
19101910 ; AVX512BW-NEXT: addb %cl, %al
1911 ; AVX512BW-NEXT: movb %al, %dl
1911 ; AVX512BW-NEXT: movl %eax, %edx
19121912 ; AVX512BW-NEXT: shrb $7, %dl
19131913 ; AVX512BW-NEXT: sarb $2, %al
19141914 ; AVX512BW-NEXT: addb %dl, %al
19201920 ; AVX512BW-NEXT: imull $-109, %edx, %eax
19211921 ; AVX512BW-NEXT: shrl $8, %eax
19221922 ; AVX512BW-NEXT: addb %dl, %al
1923 ; AVX512BW-NEXT: movb %al, %cl
1923 ; AVX512BW-NEXT: movl %eax, %ecx
19241924 ; AVX512BW-NEXT: shrb $7, %cl
19251925 ; AVX512BW-NEXT: sarb $2, %al
19261926 ; AVX512BW-NEXT: addb %cl, %al
19341934 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
19351935 ; AVX512BW-NEXT: shrl $8, %eax
19361936 ; AVX512BW-NEXT: addb %cl, %al
1937 ; AVX512BW-NEXT: movb %al, %dl
1937 ; AVX512BW-NEXT: movl %eax, %edx
19381938 ; AVX512BW-NEXT: shrb $7, %dl
19391939 ; AVX512BW-NEXT: sarb $2, %al
19401940 ; AVX512BW-NEXT: addb %dl, %al
19471947 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
19481948 ; AVX512BW-NEXT: shrl $8, %eax
19491949 ; AVX512BW-NEXT: addb %cl, %al
1950 ; AVX512BW-NEXT: movb %al, %dl
1950 ; AVX512BW-NEXT: movl %eax, %edx
19511951 ; AVX512BW-NEXT: shrb $7, %dl
19521952 ; AVX512BW-NEXT: sarb $2, %al
19531953 ; AVX512BW-NEXT: addb %dl, %al
19601960 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
19611961 ; AVX512BW-NEXT: shrl $8, %eax
19621962 ; AVX512BW-NEXT: addb %cl, %al
1963 ; AVX512BW-NEXT: movb %al, %dl
1963 ; AVX512BW-NEXT: movl %eax, %edx
19641964 ; AVX512BW-NEXT: shrb $7, %dl
19651965 ; AVX512BW-NEXT: sarb $2, %al
19661966 ; AVX512BW-NEXT: addb %dl, %al
19731973 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
19741974 ; AVX512BW-NEXT: shrl $8, %eax
19751975 ; AVX512BW-NEXT: addb %cl, %al
1976 ; AVX512BW-NEXT: movb %al, %dl
1976 ; AVX512BW-NEXT: movl %eax, %edx
19771977 ; AVX512BW-NEXT: shrb $7, %dl
19781978 ; AVX512BW-NEXT: sarb $2, %al
19791979 ; AVX512BW-NEXT: addb %dl, %al
19861986 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
19871987 ; AVX512BW-NEXT: shrl $8, %eax
19881988 ; AVX512BW-NEXT: addb %cl, %al
1989 ; AVX512BW-NEXT: movb %al, %dl
1989 ; AVX512BW-NEXT: movl %eax, %edx
19901990 ; AVX512BW-NEXT: shrb $7, %dl
19911991 ; AVX512BW-NEXT: sarb $2, %al
19921992 ; AVX512BW-NEXT: addb %dl, %al
19991999 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
20002000 ; AVX512BW-NEXT: shrl $8, %eax
20012001 ; AVX512BW-NEXT: addb %cl, %al
2002 ; AVX512BW-NEXT: movb %al, %dl
2002 ; AVX512BW-NEXT: movl %eax, %edx
20032003 ; AVX512BW-NEXT: shrb $7, %dl
20042004 ; AVX512BW-NEXT: sarb $2, %al
20052005 ; AVX512BW-NEXT: addb %dl, %al
20122012 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
20132013 ; AVX512BW-NEXT: shrl $8, %eax
20142014 ; AVX512BW-NEXT: addb %cl, %al
2015 ; AVX512BW-NEXT: movb %al, %dl
2015 ; AVX512BW-NEXT: movl %eax, %edx
20162016 ; AVX512BW-NEXT: shrb $7, %dl
20172017 ; AVX512BW-NEXT: sarb $2, %al
20182018 ; AVX512BW-NEXT: addb %dl, %al
20252025 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
20262026 ; AVX512BW-NEXT: shrl $8, %eax
20272027 ; AVX512BW-NEXT: addb %cl, %al
2028 ; AVX512BW-NEXT: movb %al, %dl
2028 ; AVX512BW-NEXT: movl %eax, %edx
20292029 ; AVX512BW-NEXT: shrb $7, %dl
20302030 ; AVX512BW-NEXT: sarb $2, %al
20312031 ; AVX512BW-NEXT: addb %dl, %al
20382038 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
20392039 ; AVX512BW-NEXT: shrl $8, %eax
20402040 ; AVX512BW-NEXT: addb %cl, %al
2041 ; AVX512BW-NEXT: movb %al, %dl
2041 ; AVX512BW-NEXT: movl %eax, %edx
20422042 ; AVX512BW-NEXT: shrb $7, %dl
20432043 ; AVX512BW-NEXT: sarb $2, %al
20442044 ; AVX512BW-NEXT: addb %dl, %al
20512051 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
20522052 ; AVX512BW-NEXT: shrl $8, %eax
20532053 ; AVX512BW-NEXT: addb %cl, %al
2054 ; AVX512BW-NEXT: movb %al, %dl
2054 ; AVX512BW-NEXT: movl %eax, %edx
20552055 ; AVX512BW-NEXT: shrb $7, %dl
20562056 ; AVX512BW-NEXT: sarb $2, %al
20572057 ; AVX512BW-NEXT: addb %dl, %al
20642064 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
20652065 ; AVX512BW-NEXT: shrl $8, %eax
20662066 ; AVX512BW-NEXT: addb %cl, %al
2067 ; AVX512BW-NEXT: movb %al, %dl
2067 ; AVX512BW-NEXT: movl %eax, %edx
20682068 ; AVX512BW-NEXT: shrb $7, %dl
20692069 ; AVX512BW-NEXT: sarb $2, %al
20702070 ; AVX512BW-NEXT: addb %dl, %al
20772077 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
20782078 ; AVX512BW-NEXT: shrl $8, %eax
20792079 ; AVX512BW-NEXT: addb %cl, %al
2080 ; AVX512BW-NEXT: movb %al, %dl
2080 ; AVX512BW-NEXT: movl %eax, %edx
20812081 ; AVX512BW-NEXT: shrb $7, %dl
20822082 ; AVX512BW-NEXT: sarb $2, %al
20832083 ; AVX512BW-NEXT: addb %dl, %al
20902090 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
20912091 ; AVX512BW-NEXT: shrl $8, %eax
20922092 ; AVX512BW-NEXT: addb %cl, %al
2093 ; AVX512BW-NEXT: movb %al, %dl
2093 ; AVX512BW-NEXT: movl %eax, %edx
20942094 ; AVX512BW-NEXT: shrb $7, %dl
20952095 ; AVX512BW-NEXT: sarb $2, %al
20962096 ; AVX512BW-NEXT: addb %dl, %al
21032103 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
21042104 ; AVX512BW-NEXT: shrl $8, %eax
21052105 ; AVX512BW-NEXT: addb %cl, %al
2106 ; AVX512BW-NEXT: movb %al, %dl
2106 ; AVX512BW-NEXT: movl %eax, %edx
21072107 ; AVX512BW-NEXT: shrb $7, %dl
21082108 ; AVX512BW-NEXT: sarb $2, %al
21092109 ; AVX512BW-NEXT: addb %dl, %al
21162116 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
21172117 ; AVX512BW-NEXT: shrl $8, %eax
21182118 ; AVX512BW-NEXT: addb %cl, %al
2119 ; AVX512BW-NEXT: movb %al, %dl
2119 ; AVX512BW-NEXT: movl %eax, %edx
21202120 ; AVX512BW-NEXT: shrb $7, %dl
21212121 ; AVX512BW-NEXT: sarb $2, %al
21222122 ; AVX512BW-NEXT: addb %dl, %al
21282128 ; AVX512BW-NEXT: imull $-109, %edx, %eax
21292129 ; AVX512BW-NEXT: shrl $8, %eax
21302130 ; AVX512BW-NEXT: addb %dl, %al
2131 ; AVX512BW-NEXT: movb %al, %cl
2131 ; AVX512BW-NEXT: movl %eax, %ecx
21322132 ; AVX512BW-NEXT: shrb $7, %cl
21332133 ; AVX512BW-NEXT: sarb $2, %al
21342134 ; AVX512BW-NEXT: addb %cl, %al
21422142 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
21432143 ; AVX512BW-NEXT: shrl $8, %eax
21442144 ; AVX512BW-NEXT: addb %cl, %al
2145 ; AVX512BW-NEXT: movb %al, %dl
2145 ; AVX512BW-NEXT: movl %eax, %edx
21462146 ; AVX512BW-NEXT: shrb $7, %dl
21472147 ; AVX512BW-NEXT: sarb $2, %al
21482148 ; AVX512BW-NEXT: addb %dl, %al
21552155 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
21562156 ; AVX512BW-NEXT: shrl $8, %eax
21572157 ; AVX512BW-NEXT: addb %cl, %al
2158 ; AVX512BW-NEXT: movb %al, %dl
2158 ; AVX512BW-NEXT: movl %eax, %edx
21592159 ; AVX512BW-NEXT: shrb $7, %dl
21602160 ; AVX512BW-NEXT: sarb $2, %al
21612161 ; AVX512BW-NEXT: addb %dl, %al
21682168 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
21692169 ; AVX512BW-NEXT: shrl $8, %eax
21702170 ; AVX512BW-NEXT: addb %cl, %al
2171 ; AVX512BW-NEXT: movb %al, %dl
2171 ; AVX512BW-NEXT: movl %eax, %edx
21722172 ; AVX512BW-NEXT: shrb $7, %dl
21732173 ; AVX512BW-NEXT: sarb $2, %al
21742174 ; AVX512BW-NEXT: addb %dl, %al
21812181 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
21822182 ; AVX512BW-NEXT: shrl $8, %eax
21832183 ; AVX512BW-NEXT: addb %cl, %al
2184 ; AVX512BW-NEXT: movb %al, %dl
2184 ; AVX512BW-NEXT: movl %eax, %edx
21852185 ; AVX512BW-NEXT: shrb $7, %dl
21862186 ; AVX512BW-NEXT: sarb $2, %al
21872187 ; AVX512BW-NEXT: addb %dl, %al
21942194 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
21952195 ; AVX512BW-NEXT: shrl $8, %eax
21962196 ; AVX512BW-NEXT: addb %cl, %al
2197 ; AVX512BW-NEXT: movb %al, %dl
2197 ; AVX512BW-NEXT: movl %eax, %edx
21982198 ; AVX512BW-NEXT: shrb $7, %dl
21992199 ; AVX512BW-NEXT: sarb $2, %al
22002200 ; AVX512BW-NEXT: addb %dl, %al
22072207 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
22082208 ; AVX512BW-NEXT: shrl $8, %eax
22092209 ; AVX512BW-NEXT: addb %cl, %al
2210 ; AVX512BW-NEXT: movb %al, %dl
2210 ; AVX512BW-NEXT: movl %eax, %edx
22112211 ; AVX512BW-NEXT: shrb $7, %dl
22122212 ; AVX512BW-NEXT: sarb $2, %al
22132213 ; AVX512BW-NEXT: addb %dl, %al
22202220 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
22212221 ; AVX512BW-NEXT: shrl $8, %eax
22222222 ; AVX512BW-NEXT: addb %cl, %al
2223 ; AVX512BW-NEXT: movb %al, %dl
2223 ; AVX512BW-NEXT: movl %eax, %edx
22242224 ; AVX512BW-NEXT: shrb $7, %dl
22252225 ; AVX512BW-NEXT: sarb $2, %al
22262226 ; AVX512BW-NEXT: addb %dl, %al
22332233 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
22342234 ; AVX512BW-NEXT: shrl $8, %eax
22352235 ; AVX512BW-NEXT: addb %cl, %al
2236 ; AVX512BW-NEXT: movb %al, %dl
2236 ; AVX512BW-NEXT: movl %eax, %edx
22372237 ; AVX512BW-NEXT: shrb $7, %dl
22382238 ; AVX512BW-NEXT: sarb $2, %al
22392239 ; AVX512BW-NEXT: addb %dl, %al
22462246 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
22472247 ; AVX512BW-NEXT: shrl $8, %eax
22482248 ; AVX512BW-NEXT: addb %cl, %al
2249 ; AVX512BW-NEXT: movb %al, %dl
2249 ; AVX512BW-NEXT: movl %eax, %edx
22502250 ; AVX512BW-NEXT: shrb $7, %dl
22512251 ; AVX512BW-NEXT: sarb $2, %al
22522252 ; AVX512BW-NEXT: addb %dl, %al
22592259 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
22602260 ; AVX512BW-NEXT: shrl $8, %eax
22612261 ; AVX512BW-NEXT: addb %cl, %al
2262 ; AVX512BW-NEXT: movb %al, %dl
2262 ; AVX512BW-NEXT: movl %eax, %edx
22632263 ; AVX512BW-NEXT: shrb $7, %dl
22642264 ; AVX512BW-NEXT: sarb $2, %al
22652265 ; AVX512BW-NEXT: addb %dl, %al
22722272 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
22732273 ; AVX512BW-NEXT: shrl $8, %eax
22742274 ; AVX512BW-NEXT: addb %cl, %al
2275 ; AVX512BW-NEXT: movb %al, %dl
2275 ; AVX512BW-NEXT: movl %eax, %edx
22762276 ; AVX512BW-NEXT: shrb $7, %dl
22772277 ; AVX512BW-NEXT: sarb $2, %al
22782278 ; AVX512BW-NEXT: addb %dl, %al
22852285 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
22862286 ; AVX512BW-NEXT: shrl $8, %eax
22872287 ; AVX512BW-NEXT: addb %cl, %al
2288 ; AVX512BW-NEXT: movb %al, %dl
2288 ; AVX512BW-NEXT: movl %eax, %edx
22892289 ; AVX512BW-NEXT: shrb $7, %dl
22902290 ; AVX512BW-NEXT: sarb $2, %al
22912291 ; AVX512BW-NEXT: addb %dl, %al
22982298 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
22992299 ; AVX512BW-NEXT: shrl $8, %eax
23002300 ; AVX512BW-NEXT: addb %cl, %al
2301 ; AVX512BW-NEXT: movb %al, %dl
2301 ; AVX512BW-NEXT: movl %eax, %edx
23022302 ; AVX512BW-NEXT: shrb $7, %dl
23032303 ; AVX512BW-NEXT: sarb $2, %al
23042304 ; AVX512BW-NEXT: addb %dl, %al
23112311 ; AVX512BW-NEXT: imull $-109, %ecx, %eax
23122312 ; AVX512BW-NEXT: shrl $8, %eax
23132313 ; AVX512BW-NEXT: addb %cl, %al
2314 ; AVX512BW-NEXT: movb %al, %dl
2314 ; AVX512BW-NEXT: movl %eax, %edx
23152315 ; AVX512BW-NEXT: shrb $7, %dl
23162316 ; AVX512BW-NEXT: sarb $2, %al
23172317 ; AVX512BW-NEXT: addb %dl, %al
13221322 ; AVX512BW-NEXT: vpextrb $1, %xmm1, %edx
13231323 ; AVX512BW-NEXT: imull $37, %edx, %ecx
13241324 ; AVX512BW-NEXT: shrl $8, %ecx
1325 ; AVX512BW-NEXT: movb %dl, %al
1325 ; AVX512BW-NEXT: movl %edx, %eax
13261326 ; AVX512BW-NEXT: subb %cl, %al
13271327 ; AVX512BW-NEXT: shrb %al
13281328 ; AVX512BW-NEXT: addb %cl, %al
13341334 ; AVX512BW-NEXT: vpextrb $0, %xmm1, %esi
13351335 ; AVX512BW-NEXT: imull $37, %esi, %edi
13361336 ; AVX512BW-NEXT: shrl $8, %edi
1337 ; AVX512BW-NEXT: movb %sil, %al
1337 ; AVX512BW-NEXT: movl %esi, %eax
13381338 ; AVX512BW-NEXT: subb %dil, %al
13391339 ; AVX512BW-NEXT: shrb %al
13401340 ; AVX512BW-NEXT: addb %dil, %al
13471347 ; AVX512BW-NEXT: vpextrb $2, %xmm1, %edx
13481348 ; AVX512BW-NEXT: imull $37, %edx, %esi
13491349 ; AVX512BW-NEXT: shrl $8, %esi
1350 ; AVX512BW-NEXT: movb %dl, %al
1350 ; AVX512BW-NEXT: movl %edx, %eax
13511351 ; AVX512BW-NEXT: subb %sil, %al
13521352 ; AVX512BW-NEXT: shrb %al
13531353 ; AVX512BW-NEXT: addb %sil, %al
13591359 ; AVX512BW-NEXT: vpextrb $3, %xmm1, %edx
13601360 ; AVX512BW-NEXT: imull $37, %edx, %esi
13611361 ; AVX512BW-NEXT: shrl $8, %esi
1362 ; AVX512BW-NEXT: movb %dl, %al
1362 ; AVX512BW-NEXT: movl %edx, %eax
13631363 ; AVX512BW-NEXT: subb %sil, %al
13641364 ; AVX512BW-NEXT: shrb %al
13651365 ; AVX512BW-NEXT: addb %sil, %al
13711371 ; AVX512BW-NEXT: vpextrb $4, %xmm1, %edx
13721372 ; AVX512BW-NEXT: imull $37, %edx, %esi
13731373 ; AVX512BW-NEXT: shrl $8, %esi
1374 ; AVX512BW-NEXT: movb %dl, %al
1374 ; AVX512BW-NEXT: movl %edx, %eax
13751375 ; AVX512BW-NEXT: subb %sil, %al
13761376 ; AVX512BW-NEXT: shrb %al
13771377 ; AVX512BW-NEXT: addb %sil, %al
13831383 ; AVX512BW-NEXT: vpextrb $5, %xmm1, %edx
13841384 ; AVX512BW-NEXT: imull $37, %edx, %esi
13851385 ; AVX512BW-NEXT: shrl $8, %esi
1386 ; AVX512BW-NEXT: movb %dl, %al
1386 ; AVX512BW-NEXT: movl %edx, %eax
13871387 ; AVX512BW-NEXT: subb %sil, %al
13881388 ; AVX512BW-NEXT: shrb %al
13891389 ; AVX512BW-NEXT: addb %sil, %al
13951395 ; AVX512BW-NEXT: vpextrb $6, %xmm1, %edx
13961396 ; AVX512BW-NEXT: imull $37, %edx, %esi
13971397 ; AVX512BW-NEXT: shrl $8, %esi
1398 ; AVX512BW-NEXT: movb %dl, %al
1398 ; AVX512BW-NEXT: movl %edx, %eax
13991399 ; AVX512BW-NEXT: subb %sil, %al
14001400 ; AVX512BW-NEXT: shrb %al
14011401 ; AVX512BW-NEXT: addb %sil, %al
14071407 ; AVX512BW-NEXT: vpextrb $7, %xmm1, %edx
14081408 ; AVX512BW-NEXT: imull $37, %edx, %esi
14091409 ; AVX512BW-NEXT: shrl $8, %esi
1410 ; AVX512BW-NEXT: movb %dl, %al
1410 ; AVX512BW-NEXT: movl %edx, %eax
14111411 ; AVX512BW-NEXT: subb %sil, %al
14121412 ; AVX512BW-NEXT: shrb %al
14131413 ; AVX512BW-NEXT: addb %sil, %al
14191419 ; AVX512BW-NEXT: vpextrb $8, %xmm1, %edx
14201420 ; AVX512BW-NEXT: imull $37, %edx, %esi
14211421 ; AVX512BW-NEXT: shrl $8, %esi
1422 ; AVX512BW-NEXT: movb %dl, %al
1422 ; AVX512BW-NEXT: movl %edx, %eax
14231423 ; AVX512BW-NEXT: subb %sil, %al
14241424 ; AVX512BW-NEXT: shrb %al
14251425 ; AVX512BW-NEXT: addb %sil, %al
14311431 ; AVX512BW-NEXT: vpextrb $9, %xmm1, %edx
14321432 ; AVX512BW-NEXT: imull $37, %edx, %esi
14331433 ; AVX512BW-NEXT: shrl $8, %esi
1434 ; AVX512BW-NEXT: movb %dl, %al
1434 ; AVX512BW-NEXT: movl %edx, %eax
14351435 ; AVX512BW-NEXT: subb %sil, %al
14361436 ; AVX512BW-NEXT: shrb %al
14371437 ; AVX512BW-NEXT: addb %sil, %al
14431443 ; AVX512BW-NEXT: vpextrb $10, %xmm1, %edx
14441444 ; AVX512BW-NEXT: imull $37, %edx, %esi
14451445 ; AVX512BW-NEXT: shrl $8, %esi
1446 ; AVX512BW-NEXT: movb %dl, %al
1446 ; AVX512BW-NEXT: movl %edx, %eax
14471447 ; AVX512BW-NEXT: subb %sil, %al
14481448 ; AVX512BW-NEXT: shrb %al
14491449 ; AVX512BW-NEXT: addb %sil, %al
14551455 ; AVX512BW-NEXT: vpextrb $11, %xmm1, %edx
14561456 ; AVX512BW-NEXT: imull $37, %edx, %esi
14571457 ; AVX512BW-NEXT: shrl $8, %esi
1458 ; AVX512BW-NEXT: movb %dl, %al
1458 ; AVX512BW-NEXT: movl %edx, %eax
14591459 ; AVX512BW-NEXT: subb %sil, %al
14601460 ; AVX512BW-NEXT: shrb %al
14611461 ; AVX512BW-NEXT: addb %sil, %al
14671467 ; AVX512BW-NEXT: vpextrb $12, %xmm1, %edx
14681468 ; AVX512BW-NEXT: imull $37, %edx, %esi
14691469 ; AVX512BW-NEXT: shrl $8, %esi
1470 ; AVX512BW-NEXT: movb %dl, %al
1470 ; AVX512BW-NEXT: movl %edx, %eax
14711471 ; AVX512BW-NEXT: subb %sil, %al
14721472 ; AVX512BW-NEXT: shrb %al
14731473 ; AVX512BW-NEXT: addb %sil, %al
14791479 ; AVX512BW-NEXT: vpextrb $13, %xmm1, %edx
14801480 ; AVX512BW-NEXT: imull $37, %edx, %esi
14811481 ; AVX512BW-NEXT: shrl $8, %esi
1482 ; AVX512BW-NEXT: movb %dl, %al
1482 ; AVX512BW-NEXT: movl %edx, %eax
14831483 ; AVX512BW-NEXT: subb %sil, %al
14841484 ; AVX512BW-NEXT: shrb %al
14851485 ; AVX512BW-NEXT: addb %sil, %al
14911491 ; AVX512BW-NEXT: vpextrb $14, %xmm1, %edx
14921492 ; AVX512BW-NEXT: imull $37, %edx, %esi
14931493 ; AVX512BW-NEXT: shrl $8, %esi
1494 ; AVX512BW-NEXT: movb %dl, %al
1494 ; AVX512BW-NEXT: movl %edx, %eax
14951495 ; AVX512BW-NEXT: subb %sil, %al
14961496 ; AVX512BW-NEXT: shrb %al
14971497 ; AVX512BW-NEXT: addb %sil, %al
15031503 ; AVX512BW-NEXT: vpextrb $15, %xmm1, %edx
15041504 ; AVX512BW-NEXT: imull $37, %edx, %esi
15051505 ; AVX512BW-NEXT: shrl $8, %esi
1506 ; AVX512BW-NEXT: movb %dl, %al
1506 ; AVX512BW-NEXT: movl %edx, %eax
15071507 ; AVX512BW-NEXT: subb %sil, %al
15081508 ; AVX512BW-NEXT: shrb %al
15091509 ; AVX512BW-NEXT: addb %sil, %al
15161516 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx
15171517 ; AVX512BW-NEXT: imull $37, %edx, %esi
15181518 ; AVX512BW-NEXT: shrl $8, %esi
1519 ; AVX512BW-NEXT: movb %dl, %al
1519 ; AVX512BW-NEXT: movl %edx, %eax
15201520 ; AVX512BW-NEXT: subb %sil, %al
15211521 ; AVX512BW-NEXT: shrb %al
15221522 ; AVX512BW-NEXT: addb %sil, %al
15271527 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi
15281528 ; AVX512BW-NEXT: imull $37, %esi, %edi
15291529 ; AVX512BW-NEXT: shrl $8, %edi
1530 ; AVX512BW-NEXT: movb %sil, %al
1530 ; AVX512BW-NEXT: movl %esi, %eax
15311531 ; AVX512BW-NEXT: subb %dil, %al
15321532 ; AVX512BW-NEXT: shrb %al
15331533 ; AVX512BW-NEXT: addb %dil, %al
15401540 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx
15411541 ; AVX512BW-NEXT: imull $37, %edx, %esi
15421542 ; AVX512BW-NEXT: shrl $8, %esi
1543 ; AVX512BW-NEXT: movb %dl, %al
1543 ; AVX512BW-NEXT: movl %edx, %eax
15441544 ; AVX512BW-NEXT: subb %sil, %al
15451545 ; AVX512BW-NEXT: shrb %al
15461546 ; AVX512BW-NEXT: addb %sil, %al
15521552 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx
15531553 ; AVX512BW-NEXT: imull $37, %edx, %esi
15541554 ; AVX512BW-NEXT: shrl $8, %esi
1555 ; AVX512BW-NEXT: movb %dl, %al
1555 ; AVX512BW-NEXT: movl %edx, %eax
15561556 ; AVX512BW-NEXT: subb %sil, %al
15571557 ; AVX512BW-NEXT: shrb %al
15581558 ; AVX512BW-NEXT: addb %sil, %al
15641564 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx
15651565 ; AVX512BW-NEXT: imull $37, %edx, %esi
15661566 ; AVX512BW-NEXT: shrl $8, %esi
1567 ; AVX512BW-NEXT: movb %dl, %al
1567 ; AVX512BW-NEXT: movl %edx, %eax
15681568 ; AVX512BW-NEXT: subb %sil, %al
15691569 ; AVX512BW-NEXT: shrb %al
15701570 ; AVX512BW-NEXT: addb %sil, %al
15761576 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx
15771577 ; AVX512BW-NEXT: imull $37, %edx, %esi
15781578 ; AVX512BW-NEXT: shrl $8, %esi
1579 ; AVX512BW-NEXT: movb %dl, %al
1579 ; AVX512BW-NEXT: movl %edx, %eax
15801580 ; AVX512BW-NEXT: subb %sil, %al
15811581 ; AVX512BW-NEXT: shrb %al
15821582 ; AVX512BW-NEXT: addb %sil, %al
15881588 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx
15891589 ; AVX512BW-NEXT: imull $37, %edx, %esi
15901590 ; AVX512BW-NEXT: shrl $8, %esi
1591 ; AVX512BW-NEXT: movb %dl, %al
1591 ; AVX512BW-NEXT: movl %edx, %eax
15921592 ; AVX512BW-NEXT: subb %sil, %al
15931593 ; AVX512BW-NEXT: shrb %al
15941594 ; AVX512BW-NEXT: addb %sil, %al
16001600 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx
16011601 ; AVX512BW-NEXT: imull $37, %edx, %esi
16021602 ; AVX512BW-NEXT: shrl $8, %esi
1603 ; AVX512BW-NEXT: movb %dl, %al
1603 ; AVX512BW-NEXT: movl %edx, %eax
16041604 ; AVX512BW-NEXT: subb %sil, %al
16051605 ; AVX512BW-NEXT: shrb %al
16061606 ; AVX512BW-NEXT: addb %sil, %al
16121612 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx
16131613 ; AVX512BW-NEXT: imull $37, %edx, %esi
16141614 ; AVX512BW-NEXT: shrl $8, %esi
1615 ; AVX512BW-NEXT: movb %dl, %al
1615 ; AVX512BW-NEXT: movl %edx, %eax
16161616 ; AVX512BW-NEXT: subb %sil, %al
16171617 ; AVX512BW-NEXT: shrb %al
16181618 ; AVX512BW-NEXT: addb %sil, %al
16241624 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx
16251625 ; AVX512BW-NEXT: imull $37, %edx, %esi
16261626 ; AVX512BW-NEXT: shrl $8, %esi
1627 ; AVX512BW-NEXT: movb %dl, %al
1627 ; AVX512BW-NEXT: movl %edx, %eax
16281628 ; AVX512BW-NEXT: subb %sil, %al
16291629 ; AVX512BW-NEXT: shrb %al
16301630 ; AVX512BW-NEXT: addb %sil, %al
16361636 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx
16371637 ; AVX512BW-NEXT: imull $37, %edx, %esi
16381638 ; AVX512BW-NEXT: shrl $8, %esi
1639 ; AVX512BW-NEXT: movb %dl, %al
1639 ; AVX512BW-NEXT: movl %edx, %eax
16401640 ; AVX512BW-NEXT: subb %sil, %al
16411641 ; AVX512BW-NEXT: shrb %al
16421642 ; AVX512BW-NEXT: addb %sil, %al
16481648 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx
16491649 ; AVX512BW-NEXT: imull $37, %edx, %esi
16501650 ; AVX512BW-NEXT: shrl $8, %esi
1651 ; AVX512BW-NEXT: movb %dl, %al
1651 ; AVX512BW-NEXT: movl %edx, %eax
16521652 ; AVX512BW-NEXT: subb %sil, %al
16531653 ; AVX512BW-NEXT: shrb %al
16541654 ; AVX512BW-NEXT: addb %sil, %al
16601660 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx
16611661 ; AVX512BW-NEXT: imull $37, %edx, %esi
16621662 ; AVX512BW-NEXT: shrl $8, %esi
1663 ; AVX512BW-NEXT: movb %dl, %al
1663 ; AVX512BW-NEXT: movl %edx, %eax
16641664 ; AVX512BW-NEXT: subb %sil, %al
16651665 ; AVX512BW-NEXT: shrb %al
16661666 ; AVX512BW-NEXT: addb %sil, %al
16721672 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx
16731673 ; AVX512BW-NEXT: imull $37, %edx, %esi
16741674 ; AVX512BW-NEXT: shrl $8, %esi
1675 ; AVX512BW-NEXT: movb %dl, %al
1675 ; AVX512BW-NEXT: movl %edx, %eax
16761676 ; AVX512BW-NEXT: subb %sil, %al
16771677 ; AVX512BW-NEXT: shrb %al
16781678 ; AVX512BW-NEXT: addb %sil, %al
16841684 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx
16851685 ; AVX512BW-NEXT: imull $37, %edx, %esi
16861686 ; AVX512BW-NEXT: shrl $8, %esi
1687 ; AVX512BW-NEXT: movb %dl, %al
1687 ; AVX512BW-NEXT: movl %edx, %eax
16881688 ; AVX512BW-NEXT: subb %sil, %al
16891689 ; AVX512BW-NEXT: shrb %al
16901690 ; AVX512BW-NEXT: addb %sil, %al
16961696 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx
16971697 ; AVX512BW-NEXT: imull $37, %edx, %esi
16981698 ; AVX512BW-NEXT: shrl $8, %esi
1699 ; AVX512BW-NEXT: movb %dl, %al
1699 ; AVX512BW-NEXT: movl %edx, %eax
17001700 ; AVX512BW-NEXT: subb %sil, %al
17011701 ; AVX512BW-NEXT: shrb %al
17021702 ; AVX512BW-NEXT: addb %sil, %al
17101710 ; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx
17111711 ; AVX512BW-NEXT: imull $37, %edx, %esi
17121712 ; AVX512BW-NEXT: shrl $8, %esi
1713 ; AVX512BW-NEXT: movb %dl, %al
1713 ; AVX512BW-NEXT: movl %edx, %eax
17141714 ; AVX512BW-NEXT: subb %sil, %al
17151715 ; AVX512BW-NEXT: shrb %al
17161716 ; AVX512BW-NEXT: addb %sil, %al
17211721 ; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi
17221722 ; AVX512BW-NEXT: imull $37, %esi, %edi
17231723 ; AVX512BW-NEXT: shrl $8, %edi
1724 ; AVX512BW-NEXT: movb %sil, %al
1724 ; AVX512BW-NEXT: movl %esi, %eax
17251725 ; AVX512BW-NEXT: subb %dil, %al
17261726 ; AVX512BW-NEXT: shrb %al
17271727 ; AVX512BW-NEXT: addb %dil, %al
17341734 ; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx
17351735 ; AVX512BW-NEXT: imull $37, %edx, %esi
17361736 ; AVX512BW-NEXT: shrl $8, %esi
1737 ; AVX512BW-NEXT: movb %dl, %al
1737 ; AVX512BW-NEXT: movl %edx, %eax
17381738 ; AVX512BW-NEXT: subb %sil, %al
17391739 ; AVX512BW-NEXT: shrb %al
17401740 ; AVX512BW-NEXT: addb %sil, %al
17461746 ; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx
17471747 ; AVX512BW-NEXT: imull $37, %edx, %esi
17481748 ; AVX512BW-NEXT: shrl $8, %esi
1749 ; AVX512BW-NEXT: movb %dl, %al
1749 ; AVX512BW-NEXT: movl %edx, %eax
17501750 ; AVX512BW-NEXT: subb %sil, %al
17511751 ; AVX512BW-NEXT: shrb %al
17521752 ; AVX512BW-NEXT: addb %sil, %al
17581758 ; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx
17591759 ; AVX512BW-NEXT: imull $37, %edx, %esi
17601760 ; AVX512BW-NEXT: shrl $8, %esi
1761 ; AVX512BW-NEXT: movb %dl, %al
1761 ; AVX512BW-NEXT: movl %edx, %eax
17621762 ; AVX512BW-NEXT: subb %sil, %al
17631763 ; AVX512BW-NEXT: shrb %al
17641764 ; AVX512BW-NEXT: addb %sil, %al
17701770 ; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx
17711771 ; AVX512BW-NEXT: imull $37, %edx, %esi
17721772 ; AVX512BW-NEXT: shrl $8, %esi
1773 ; AVX512BW-NEXT: movb %dl, %al
1773 ; AVX512BW-NEXT: movl %edx, %eax
17741774 ; AVX512BW-NEXT: subb %sil, %al
17751775 ; AVX512BW-NEXT: shrb %al
17761776 ; AVX512BW-NEXT: addb %sil, %al
17821782 ; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx
17831783 ; AVX512BW-NEXT: imull $37, %edx, %esi
17841784 ; AVX512BW-NEXT: shrl $8, %esi
1785 ; AVX512BW-NEXT: movb %dl, %al
1785 ; AVX512BW-NEXT: movl %edx, %eax
17861786 ; AVX512BW-NEXT: subb %sil, %al
17871787 ; AVX512BW-NEXT: shrb %al
17881788 ; AVX512BW-NEXT: addb %sil, %al
17941794 ; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx
17951795 ; AVX512BW-NEXT: imull $37, %edx, %esi
17961796 ; AVX512BW-NEXT: shrl $8, %esi
1797 ; AVX512BW-NEXT: movb %dl, %al
1797 ; AVX512BW-NEXT: movl %edx, %eax
17981798 ; AVX512BW-NEXT: subb %sil, %al
17991799 ; AVX512BW-NEXT: shrb %al
18001800 ; AVX512BW-NEXT: addb %sil, %al
18061806 ; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx
18071807 ; AVX512BW-NEXT: imull $37, %edx, %esi
18081808 ; AVX512BW-NEXT: shrl $8, %esi
1809 ; AVX512BW-NEXT: movb %dl, %al
1809 ; AVX512BW-NEXT: movl %edx, %eax
18101810 ; AVX512BW-NEXT: subb %sil, %al
18111811 ; AVX512BW-NEXT: shrb %al
18121812 ; AVX512BW-NEXT: addb %sil, %al
18181818 ; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx
18191819 ; AVX512BW-NEXT: imull $37, %edx, %esi
18201820 ; AVX512BW-NEXT: shrl $8, %esi
1821 ; AVX512BW-NEXT: movb %dl, %al
1821 ; AVX512BW-NEXT: movl %edx, %eax
18221822 ; AVX512BW-NEXT: subb %sil, %al
18231823 ; AVX512BW-NEXT: shrb %al
18241824 ; AVX512BW-NEXT: addb %sil, %al
18301830 ; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx
18311831 ; AVX512BW-NEXT: imull $37, %edx, %esi
18321832 ; AVX512BW-NEXT: shrl $8, %esi
1833 ; AVX512BW-NEXT: movb %dl, %al
1833 ; AVX512BW-NEXT: movl %edx, %eax
18341834 ; AVX512BW-NEXT: subb %sil, %al
18351835 ; AVX512BW-NEXT: shrb %al
18361836 ; AVX512BW-NEXT: addb %sil, %al
18421842 ; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx
18431843 ; AVX512BW-NEXT: imull $37, %edx, %esi
18441844 ; AVX512BW-NEXT: shrl $8, %esi
1845 ; AVX512BW-NEXT: movb %dl, %al
1845 ; AVX512BW-NEXT: movl %edx, %eax
18461846 ; AVX512BW-NEXT: subb %sil, %al
18471847 ; AVX512BW-NEXT: shrb %al
18481848 ; AVX512BW-NEXT: addb %sil, %al
18541854 ; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx
18551855 ; AVX512BW-NEXT: imull $37, %edx, %esi
18561856 ; AVX512BW-NEXT: shrl $8, %esi
1857 ; AVX512BW-NEXT: movb %dl, %al
1857 ; AVX512BW-NEXT: movl %edx, %eax
18581858 ; AVX512BW-NEXT: subb %sil, %al
18591859 ; AVX512BW-NEXT: shrb %al
18601860 ; AVX512BW-NEXT: addb %sil, %al
18661866 ; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx
18671867 ; AVX512BW-NEXT: imull $37, %edx, %esi
18681868 ; AVX512BW-NEXT: shrl $8, %esi
1869 ; AVX512BW-NEXT: movb %dl, %al
1869 ; AVX512BW-NEXT: movl %edx, %eax
18701870 ; AVX512BW-NEXT: subb %sil, %al
18711871 ; AVX512BW-NEXT: shrb %al
18721872 ; AVX512BW-NEXT: addb %sil, %al
18781878 ; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx
18791879 ; AVX512BW-NEXT: imull $37, %edx, %esi
18801880 ; AVX512BW-NEXT: shrl $8, %esi
1881 ; AVX512BW-NEXT: movb %dl, %al
1881 ; AVX512BW-NEXT: movl %edx, %eax
18821882 ; AVX512BW-NEXT: subb %sil, %al
18831883 ; AVX512BW-NEXT: shrb %al
18841884 ; AVX512BW-NEXT: addb %sil, %al
18901890 ; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx
18911891 ; AVX512BW-NEXT: imull $37, %edx, %esi
18921892 ; AVX512BW-NEXT: shrl $8, %esi
1893 ; AVX512BW-NEXT: movb %dl, %al
1893 ; AVX512BW-NEXT: movl %edx, %eax
18941894 ; AVX512BW-NEXT: subb %sil, %al
18951895 ; AVX512BW-NEXT: shrb %al
18961896 ; AVX512BW-NEXT: addb %sil, %al
19021902 ; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx
19031903 ; AVX512BW-NEXT: imull $37, %edx, %esi
19041904 ; AVX512BW-NEXT: shrl $8, %esi
1905 ; AVX512BW-NEXT: movb %dl, %al
1905 ; AVX512BW-NEXT: movl %edx, %eax
19061906 ; AVX512BW-NEXT: subb %sil, %al
19071907 ; AVX512BW-NEXT: shrb %al
19081908 ; AVX512BW-NEXT: addb %sil, %al
19131913 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi
19141914 ; AVX512BW-NEXT: imull $37, %esi, %edi
19151915 ; AVX512BW-NEXT: shrl $8, %edi
1916 ; AVX512BW-NEXT: movb %sil, %al
1916 ; AVX512BW-NEXT: movl %esi, %eax
19171917 ; AVX512BW-NEXT: subb %dil, %al
19181918 ; AVX512BW-NEXT: shrb %al
19191919 ; AVX512BW-NEXT: addb %dil, %al
19261926 ; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx
19271927 ; AVX512BW-NEXT: imull $37, %edx, %esi
19281928 ; AVX512BW-NEXT: shrl $8, %esi
1929 ; AVX512BW-NEXT: movb %dl, %al
1929 ; AVX512BW-NEXT: movl %edx, %eax
19301930 ; AVX512BW-NEXT: subb %sil, %al
19311931 ; AVX512BW-NEXT: shrb %al
19321932 ; AVX512BW-NEXT: addb %sil, %al
19381938 ; AVX512BW-NEXT: vpextrb $3, %xmm0, %edx
19391939 ; AVX512BW-NEXT: imull $37, %edx, %esi
19401940 ; AVX512BW-NEXT: shrl $8, %esi
1941 ; AVX512BW-NEXT: movb %dl, %al
1941 ; AVX512BW-NEXT: movl %edx, %eax
19421942 ; AVX512BW-NEXT: subb %sil, %al
19431943 ; AVX512BW-NEXT: shrb %al
19441944 ; AVX512BW-NEXT: addb %sil, %al
19501950 ; AVX512BW-NEXT: vpextrb $4, %xmm0, %edx
19511951 ; AVX512BW-NEXT: imull $37, %edx, %esi
19521952 ; AVX512BW-NEXT: shrl $8, %esi
1953 ; AVX512BW-NEXT: movb %dl, %al
1953 ; AVX512BW-NEXT: movl %edx, %eax
19541954 ; AVX512BW-NEXT: subb %sil, %al
19551955 ; AVX512BW-NEXT: shrb %al
19561956 ; AVX512BW-NEXT: addb %sil, %al
19621962 ; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx
19631963 ; AVX512BW-NEXT: imull $37, %edx, %esi
19641964 ; AVX512BW-NEXT: shrl $8, %esi
1965 ; AVX512BW-NEXT: movb %dl, %al
1965 ; AVX512BW-NEXT: movl %edx, %eax
19661966 ; AVX512BW-NEXT: subb %sil, %al
19671967 ; AVX512BW-NEXT: shrb %al
19681968 ; AVX512BW-NEXT: addb %sil, %al
19741974 ; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx
19751975 ; AVX512BW-NEXT: imull $37, %edx, %esi
19761976 ; AVX512BW-NEXT: shrl $8, %esi
1977 ; AVX512BW-NEXT: movb %dl, %al
1977 ; AVX512BW-NEXT: movl %edx, %eax
19781978 ; AVX512BW-NEXT: subb %sil, %al
19791979 ; AVX512BW-NEXT: shrb %al
19801980 ; AVX512BW-NEXT: addb %sil, %al
19861986 ; AVX512BW-NEXT: vpextrb $7, %xmm0, %edx
19871987 ; AVX512BW-NEXT: imull $37, %edx, %esi
19881988 ; AVX512BW-NEXT: shrl $8, %esi
1989 ; AVX512BW-NEXT: movb %dl, %al
1989 ; AVX512BW-NEXT: movl %edx, %eax
19901990 ; AVX512BW-NEXT: subb %sil, %al
19911991 ; AVX512BW-NEXT: shrb %al
19921992 ; AVX512BW-NEXT: addb %sil, %al
19981998 ; AVX512BW-NEXT: vpextrb $8, %xmm0, %edx
19991999 ; AVX512BW-NEXT: imull $37, %edx, %esi
20002000 ; AVX512BW-NEXT: shrl $8, %esi
2001 ; AVX512BW-NEXT: movb %dl, %al
2001 ; AVX512BW-NEXT: movl %edx, %eax
20022002 ; AVX512BW-NEXT: subb %sil, %al
20032003 ; AVX512BW-NEXT: shrb %al
20042004 ; AVX512BW-NEXT: addb %sil, %al
20102010 ; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx
20112011 ; AVX512BW-NEXT: imull $37, %edx, %esi
20122012 ; AVX512BW-NEXT: shrl $8, %esi
2013 ; AVX512BW-NEXT: movb %dl, %al
2013 ; AVX512BW-NEXT: movl %edx, %eax
20142014 ; AVX512BW-NEXT: subb %sil, %al
20152015 ; AVX512BW-NEXT: shrb %al
20162016 ; AVX512BW-NEXT: addb %sil, %al
20222022 ; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx
20232023 ; AVX512BW-NEXT: imull $37, %edx, %esi
20242024 ; AVX512BW-NEXT: shrl $8, %esi
2025 ; AVX512BW-NEXT: movb %dl, %al
2025 ; AVX512BW-NEXT: movl %edx, %eax
20262026 ; AVX512BW-NEXT: subb %sil, %al
20272027 ; AVX512BW-NEXT: shrb %al
20282028 ; AVX512BW-NEXT: addb %sil, %al
20342034 ; AVX512BW-NEXT: vpextrb $11, %xmm0, %edx
20352035 ; AVX512BW-NEXT: imull $37, %edx, %esi
20362036 ; AVX512BW-NEXT: shrl $8, %esi
2037 ; AVX512BW-NEXT: movb %dl, %al
2037 ; AVX512BW-NEXT: movl %edx, %eax
20382038 ; AVX512BW-NEXT: subb %sil, %al
20392039 ; AVX512BW-NEXT: shrb %al
20402040 ; AVX512BW-NEXT: addb %sil, %al
20462046 ; AVX512BW-NEXT: vpextrb $12, %xmm0, %edx
20472047 ; AVX512BW-NEXT: imull $37, %edx, %esi
20482048 ; AVX512BW-NEXT: shrl $8, %esi
2049 ; AVX512BW-NEXT: movb %dl, %al
2049 ; AVX512BW-NEXT: movl %edx, %eax
20502050 ; AVX512BW-NEXT: subb %sil, %al
20512051 ; AVX512BW-NEXT: shrb %al
20522052 ; AVX512BW-NEXT: addb %sil, %al
20582058 ; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx
20592059 ; AVX512BW-NEXT: imull $37, %edx, %esi
20602060 ; AVX512BW-NEXT: shrl $8, %esi
2061 ; AVX512BW-NEXT: movb %dl, %al
2061 ; AVX512BW-NEXT: movl %edx, %eax
20622062 ; AVX512BW-NEXT: subb %sil, %al
20632063 ; AVX512BW-NEXT: shrb %al
20642064 ; AVX512BW-NEXT: addb %sil, %al
20702070 ; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx
20712071 ; AVX512BW-NEXT: imull $37, %edx, %esi
20722072 ; AVX512BW-NEXT: shrl $8, %esi
2073 ; AVX512BW-NEXT: movb %dl, %al
2073 ; AVX512BW-NEXT: movl %edx, %eax
20742074 ; AVX512BW-NEXT: subb %sil, %al
20752075 ; AVX512BW-NEXT: shrb %al
20762076 ; AVX512BW-NEXT: addb %sil, %al
20822082 ; AVX512BW-NEXT: vpextrb $15, %xmm0, %edx
20832083 ; AVX512BW-NEXT: imull $37, %edx, %esi
20842084 ; AVX512BW-NEXT: shrl $8, %esi
2085 ; AVX512BW-NEXT: movb %dl, %al
2085 ; AVX512BW-NEXT: movl %edx, %eax
20862086 ; AVX512BW-NEXT: subb %sil, %al
20872087 ; AVX512BW-NEXT: shrb %al
20882088 ; AVX512BW-NEXT: addb %sil, %al
294294 define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) {
295295 entry:
296296 ; CHECK-LABEL: smulo.i8
297 ; CHECK: movb %dil, %al
297 ; CHECK: movl %edi, %eax
298298 ; CHECK-NEXT: imulb %sil
299299 ; CHECK-NEXT: seto %cl
300300 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
344344 define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) {
345345 entry:
346346 ; CHECK-LABEL: umulo.i8
347 ; CHECK: movb %dil, %al
347 ; CHECK: movl %edi, %eax
348348 ; CHECK-NEXT: mulb %sil
349349 ; CHECK-NEXT: seto %cl
350350 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)