llvm.org GIT mirror llvm / b3da570
[X86] Enable 8-bit OR with disjoint bits to convert to LEA We already support 8-bits adds in convertToThreeAddress. But we can also support 8-bit OR if the bits are disjoint. We already do this for 16/32/64. Differential Revision: https://reviews.llvm.org/D58863 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355423 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 7 months ago
16 changed file(s) with 146 addition(s) and 103 deletion(s). Raw diff Collapse all Expand all
13801380 let isConvertibleToThreeAddress = 1,
13811381 Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
13821382 let isCommutable = 1 in {
1383 def ADD8rr_DB : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
1384 "", // orb/addb REG, REG
1385 [(set GR8:$dst, (or_is_add GR8:$src1, GR8:$src2))]>;
13831386 def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
13841387 "", // orw/addw REG, REG
13851388 [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
13941397 // NOTE: These are order specific, we want the ri8 forms to be listed
13951398 // first so that they are slightly preferred to the ri forms.
13961399
1400 def ADD8ri_DB : I<0, Pseudo,
1401 (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
1402 "", // orb/addb REG, imm8
1403 [(set GR8:$dst, (or_is_add GR8:$src1, imm:$src2))]>;
13971404 def ADD16ri8_DB : I<0, Pseudo,
13981405 (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
13991406 "", // orw/addw REG, imm8
6464 { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
6565 { X86::ADD8ri, X86::ADD8mi, 0 },
6666 { X86::ADD8ri8, X86::ADD8mi8, 0 },
67 { X86::ADD8ri_DB, X86::ADD8mi, TB_NO_REVERSE },
6768 { X86::ADD8rr, X86::ADD8mr, 0 },
69 { X86::ADD8rr_DB, X86::ADD8mr, TB_NO_REVERSE },
6870 { X86::AND16ri, X86::AND16mi, 0 },
6971 { X86::AND16ri8, X86::AND16mi8, 0 },
7072 { X86::AND16rr, X86::AND16mr, 0 },
12171219 { X86::ADD64rr, X86::ADD64rm, 0 },
12181220 { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
12191221 { X86::ADD8rr, X86::ADD8rm, 0 },
1222 { X86::ADD8rr_DB, X86::ADD8rm, TB_NO_REVERSE },
12201223 { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 },
12211224 { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 },
12221225 { X86::ADDSDrr, X86::ADDSDrm, 0 },
709709
710710 MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
711711 unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
712 LiveVariables *LV) const {
712 LiveVariables *LV, bool Is8BitOp) const {
713713 // We handle 8-bit adds and various 16-bit opcodes in the switch below.
714 bool Is16BitOp = !(MIOpc == X86::ADD8rr || MIOpc == X86::ADD8ri);
715714 MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
716 assert((!Is16BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
715 assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
717716 *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
718717 "Unexpected type for LEA transform");
719718
743742 unsigned Src = MI.getOperand(1).getReg();
744743 bool IsDead = MI.getOperand(0).isDead();
745744 bool IsKill = MI.getOperand(1).isKill();
746 unsigned SubReg = Is16BitOp ? X86::sub_16bit : X86::sub_8bit;
745 unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
747746 assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
748747 BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
749748 MachineInstr *InsMI =
768767 addRegOffset(MIB, InRegLEA, true, -1);
769768 break;
770769 case X86::ADD8ri:
770 case X86::ADD8ri_DB:
771771 case X86::ADD16ri:
772772 case X86::ADD16ri8:
773773 case X86::ADD16ri_DB:
775775 addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
776776 break;
777777 case X86::ADD8rr:
778 case X86::ADD8rr_DB:
778779 case X86::ADD16rr:
779780 case X86::ADD16rr_DB: {
780781 unsigned Src2 = MI.getOperand(2).getReg();
861862 MachineInstr *NewMI = nullptr;
862863 bool Is64Bit = Subtarget.is64Bit();
863864
865 bool Is8BitOp = false;
864866 unsigned MIOpc = MI.getOpcode();
865867 switch (MIOpc) {
866868 default: return nullptr;
918920 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
919921 if (!isTruncatedShiftCountForLEA(ShAmt))
920922 return nullptr;
921 return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
923 return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
922924 }
923925 case X86::INC64r:
924926 case X86::INC32r: {
943945 break;
944946 }
945947 case X86::INC16r:
946 return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
948 return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
947949 case X86::DEC64r:
948950 case X86::DEC32r: {
949951 assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
968970 break;
969971 }
970972 case X86::DEC16r:
971 return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
973 return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
972974 case X86::ADD64rr:
973975 case X86::ADD64rr_DB:
974976 case X86::ADD32rr:
10071009 break;
10081010 }
10091011 case X86::ADD8rr:
1012 case X86::ADD8rr_DB:
1013 Is8BitOp = true;
1014 LLVM_FALLTHROUGH;
10101015 case X86::ADD16rr:
10111016 case X86::ADD16rr_DB:
1012 return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
1017 return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
10131018 case X86::ADD64ri32:
10141019 case X86::ADD64ri8:
10151020 case X86::ADD64ri32_DB:
10431048 break;
10441049 }
10451050 case X86::ADD8ri:
1051 case X86::ADD8ri_DB:
1052 Is8BitOp = true;
1053 LLVM_FALLTHROUGH;
10461054 case X86::ADD16ri:
10471055 case X86::ADD16ri8:
10481056 case X86::ADD16ri_DB:
10491057 case X86::ADD16ri8_DB:
1050 return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
1058 return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
10511059 case X86::VMOVDQU8Z128rmk:
10521060 case X86::VMOVDQU8Z256rmk:
10531061 case X86::VMOVDQU8Zrmk:
591591 MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc,
592592 MachineFunction::iterator &MFI,
593593 MachineInstr &MI,
594 LiveVariables *LV) const;
594 LiveVariables *LV,
595 bool Is8BitOp) const;
595596
596597 /// Handles memory folding for special case instructions, for instance those
597598 /// requiring custom manipulation of the address.
585585 // These are pseudo-ops for OR to help with the OR->ADD transformation. We do
586586 // this with an ugly goto in case the resultant OR uses EAX and needs the
587587 // short form.
588 case X86::ADD8rr_DB: OutMI.setOpcode(X86::OR8rr); goto ReSimplify;
588589 case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
589590 case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
590591 case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
592 case X86::ADD8ri_DB: OutMI.setOpcode(X86::OR8ri); goto ReSimplify;
591593 case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
592594 case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
593595 case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
139139 case X86::ADD64rr:
140140 case X86::ADD64rr_DB:
141141 case X86::ADD8ri:
142 case X86::ADD8ri_DB:
142143 case X86::ADD8rm:
143144 case X86::ADD8rr:
145 case X86::ADD8rr_DB:
144146 case X86::SUB16ri:
145147 case X86::SUB16ri8:
146148 case X86::SUB16rm:
339339 ;
340340 ; X64-LABEL: test_bitreverse_i8:
341341 ; X64: # %bb.0:
342 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
343 ; X64-NEXT: rolb $4, %dil
342344 ; X64-NEXT: movl %edi, %eax
343 ; X64-NEXT: rolb $4, %al
344 ; X64-NEXT: movl %eax, %ecx
345 ; X64-NEXT: andb $51, %cl
346 ; X64-NEXT: shlb $2, %cl
347 ; X64-NEXT: andb $-52, %al
348 ; X64-NEXT: shrb $2, %al
349 ; X64-NEXT: orb %cl, %al
350 ; X64-NEXT: movl %eax, %ecx
351 ; X64-NEXT: andb $85, %cl
352 ; X64-NEXT: addb %cl, %cl
353 ; X64-NEXT: andb $-86, %al
354 ; X64-NEXT: shrb %al
355 ; X64-NEXT: orb %cl, %al
345 ; X64-NEXT: andb $51, %al
346 ; X64-NEXT: shlb $2, %al
347 ; X64-NEXT: andb $-52, %dil
348 ; X64-NEXT: shrb $2, %dil
349 ; X64-NEXT: orb %al, %dil
350 ; X64-NEXT: movl %edi, %eax
351 ; X64-NEXT: andb $85, %al
352 ; X64-NEXT: addb %al, %al
353 ; X64-NEXT: andb $-86, %dil
354 ; X64-NEXT: shrb %dil
355 ; X64-NEXT: leal (%rdi,%rax), %eax
356356 ; X64-NEXT: # kill: def $al killed $al killed $eax
357357 ; X64-NEXT: retq
358358 %b = call i8 @llvm.bitreverse.i8(i8 %a)
383383 ;
384384 ; X64-LABEL: test_bitreverse_i4:
385385 ; X64: # %bb.0:
386 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
387 ; X64-NEXT: rolb $4, %dil
386388 ; X64-NEXT: movl %edi, %eax
387 ; X64-NEXT: rolb $4, %al
388 ; X64-NEXT: movl %eax, %ecx
389 ; X64-NEXT: andb $51, %cl
390 ; X64-NEXT: shlb $2, %cl
391 ; X64-NEXT: andb $-52, %al
392 ; X64-NEXT: shrb $2, %al
393 ; X64-NEXT: orb %cl, %al
394 ; X64-NEXT: movl %eax, %ecx
395 ; X64-NEXT: andb $80, %cl
396 ; X64-NEXT: addb %cl, %cl
397 ; X64-NEXT: andb $-96, %al
398 ; X64-NEXT: shrb %al
399 ; X64-NEXT: orb %cl, %al
389 ; X64-NEXT: andb $51, %al
390 ; X64-NEXT: shlb $2, %al
391 ; X64-NEXT: andb $-52, %dil
392 ; X64-NEXT: shrb $2, %dil
393 ; X64-NEXT: orb %al, %dil
394 ; X64-NEXT: movl %edi, %eax
395 ; X64-NEXT: andb $80, %al
396 ; X64-NEXT: addb %al, %al
397 ; X64-NEXT: andb $-96, %dil
398 ; X64-NEXT: shrb %dil
399 ; X64-NEXT: leal (%rdi,%rax), %eax
400400 ; X64-NEXT: shrb $4, %al
401401 ; X64-NEXT: # kill: def $al killed $al killed $eax
402402 ; X64-NEXT: retq
4646 define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) {
4747 ; X64-LABEL: sub_zext_cmp_mask_narrower_result:
4848 ; X64: # %bb.0:
49 ; X64-NEXT: movl %edi, %eax
50 ; X64-NEXT: andb $1, %al
51 ; X64-NEXT: orb $46, %al
49 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
50 ; X64-NEXT: andb $1, %dil
51 ; X64-NEXT: leal 46(%rdi), %eax
5252 ; X64-NEXT: # kill: def $al killed $al killed $eax
5353 ; X64-NEXT: retq
5454 ;
380380 ;
381381 ; X64-LABEL: const_shift_i8:
382382 ; X64: # %bb.0:
383 ; X64-NEXT: movl %edi, %eax
383 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
384 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
384385 ; X64-NEXT: shrb %sil
385 ; X64-NEXT: shlb $7, %al
386 ; X64-NEXT: orb %sil, %al
386 ; X64-NEXT: shlb $7, %dil
387 ; X64-NEXT: leal (%rdi,%rsi), %eax
387388 ; X64-NEXT: # kill: def $al killed $al killed $eax
388389 ; X64-NEXT: retq
389390 %tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 7)
10871087 }
10881088
10891089 define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) {
1090 ; CHECK-LABEL: trunc_select_miscompile:
1091 ; CHECK: ## %bb.0:
1092 ; CHECK-NEXT: movl %esi, %ecx
1093 ; CHECK-NEXT: movl %edi, %eax
1094 ; CHECK-NEXT: orb $2, %cl
1095 ; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx
1096 ; CHECK-NEXT: shll %cl, %eax
1097 ; CHECK-NEXT: retq
1090 ; GENERIC-LABEL: trunc_select_miscompile:
1091 ; GENERIC: ## %bb.0:
1092 ; GENERIC-NEXT: ## kill: def $esi killed $esi def $rsi
1093 ; GENERIC-NEXT: movl %edi, %eax
1094 ; GENERIC-NEXT: leal 2(%rsi), %ecx
1095 ; GENERIC-NEXT: ## kill: def $cl killed $cl killed $ecx
1096 ; GENERIC-NEXT: shll %cl, %eax
1097 ; GENERIC-NEXT: retq
1098 ;
1099 ; ATOM-LABEL: trunc_select_miscompile:
1100 ; ATOM: ## %bb.0:
1101 ; ATOM-NEXT: ## kill: def $esi killed $esi def $rsi
1102 ; ATOM-NEXT: leal 2(%rsi), %ecx
1103 ; ATOM-NEXT: movl %edi, %eax
1104 ; ATOM-NEXT: ## kill: def $cl killed $cl killed $ecx
1105 ; ATOM-NEXT: shll %cl, %eax
1106 ; ATOM-NEXT: nop
1107 ; ATOM-NEXT: nop
1108 ; ATOM-NEXT: retq
10981109 ;
10991110 ; ATHLON-LABEL: trunc_select_miscompile:
11001111 ; ATHLON: ## %bb.0:
327327 define i8 @select_pow2_diff(i1 zeroext %cond) {
328328 ; CHECK-LABEL: select_pow2_diff:
329329 ; CHECK: # %bb.0:
330 ; CHECK-NEXT: movl %edi, %eax
331 ; CHECK-NEXT: shlb $4, %al
332 ; CHECK-NEXT: orb $3, %al
330 ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
331 ; CHECK-NEXT: shlb $4, %dil
332 ; CHECK-NEXT: leal 3(%rdi), %eax
333333 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
334334 ; CHECK-NEXT: retq
335335 %sel = select i1 %cond, i8 19, i8 3
99 define i8 @out8_constmask(i8 %x, i8 %y) {
1010 ; CHECK-NOBMI-LABEL: out8_constmask:
1111 ; CHECK-NOBMI: # %bb.0:
12 ; CHECK-NOBMI-NEXT: movl %esi, %eax
12 ; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi
13 ; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
1314 ; CHECK-NOBMI-NEXT: andb $60, %dil
14 ; CHECK-NOBMI-NEXT: andb $-61, %al
15 ; CHECK-NOBMI-NEXT: orb %dil, %al
15 ; CHECK-NOBMI-NEXT: andb $-61, %sil
16 ; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax
1617 ; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax
1718 ; CHECK-NOBMI-NEXT: retq
1819 ;
1920 ; CHECK-BMI-LABEL: out8_constmask:
2021 ; CHECK-BMI: # %bb.0:
21 ; CHECK-BMI-NEXT: movl %esi, %eax
22 ; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi
23 ; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi
2224 ; CHECK-BMI-NEXT: andb $60, %dil
23 ; CHECK-BMI-NEXT: andb $-61, %al
24 ; CHECK-BMI-NEXT: orb %dil, %al
25 ; CHECK-BMI-NEXT: andb $-61, %sil
26 ; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax
2527 ; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax
2628 ; CHECK-BMI-NEXT: retq
2729 %mx = and i8 %x, 60
99 define i8 @out8_constmask(i8 %x, i8 %y) {
1010 ; CHECK-NOBMI-LABEL: out8_constmask:
1111 ; CHECK-NOBMI: # %bb.0:
12 ; CHECK-NOBMI-NEXT: movl %esi, %eax
12 ; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi
13 ; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
1314 ; CHECK-NOBMI-NEXT: andb $85, %dil
14 ; CHECK-NOBMI-NEXT: andb $-86, %al
15 ; CHECK-NOBMI-NEXT: orb %dil, %al
15 ; CHECK-NOBMI-NEXT: andb $-86, %sil
16 ; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax
1617 ; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax
1718 ; CHECK-NOBMI-NEXT: retq
1819 ;
1920 ; CHECK-BMI-LABEL: out8_constmask:
2021 ; CHECK-BMI: # %bb.0:
21 ; CHECK-BMI-NEXT: movl %esi, %eax
22 ; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi
23 ; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi
2224 ; CHECK-BMI-NEXT: andb $85, %dil
23 ; CHECK-BMI-NEXT: andb $-86, %al
24 ; CHECK-BMI-NEXT: orb %dil, %al
25 ; CHECK-BMI-NEXT: andb $-86, %sil
26 ; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax
2527 ; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax
2628 ; CHECK-BMI-NEXT: retq
2729 %mx = and i8 %x, 85
99 define i8 @out8_constmask(i8 %x, i8 %y) {
1010 ; CHECK-NOBMI-LABEL: out8_constmask:
1111 ; CHECK-NOBMI: # %bb.0:
12 ; CHECK-NOBMI-NEXT: movl %esi, %eax
12 ; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi
13 ; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
1314 ; CHECK-NOBMI-NEXT: andb $15, %dil
14 ; CHECK-NOBMI-NEXT: andb $-16, %al
15 ; CHECK-NOBMI-NEXT: orb %dil, %al
15 ; CHECK-NOBMI-NEXT: andb $-16, %sil
16 ; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax
1617 ; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax
1718 ; CHECK-NOBMI-NEXT: retq
1819 ;
1920 ; CHECK-BMI-LABEL: out8_constmask:
2021 ; CHECK-BMI: # %bb.0:
21 ; CHECK-BMI-NEXT: movl %esi, %eax
22 ; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi
23 ; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi
2224 ; CHECK-BMI-NEXT: andb $15, %dil
23 ; CHECK-BMI-NEXT: andb $-16, %al
24 ; CHECK-BMI-NEXT: orb %dil, %al
25 ; CHECK-BMI-NEXT: andb $-16, %sil
26 ; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax
2527 ; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax
2628 ; CHECK-BMI-NEXT: retq
2729 %mx = and i8 %x, 15
99 define i8 @out8_constmask(i8 %x, i8 %y) {
1010 ; CHECK-NOBMI-LABEL: out8_constmask:
1111 ; CHECK-NOBMI: # %bb.0:
12 ; CHECK-NOBMI-NEXT: movl %esi, %eax
12 ; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi
13 ; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
1314 ; CHECK-NOBMI-NEXT: andb $15, %dil
14 ; CHECK-NOBMI-NEXT: andb $-16, %al
15 ; CHECK-NOBMI-NEXT: orb %dil, %al
15 ; CHECK-NOBMI-NEXT: andb $-16, %sil
16 ; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax
1617 ; CHECK-NOBMI-NEXT: # kill: def $al killed $al killed $eax
1718 ; CHECK-NOBMI-NEXT: retq
1819 ;
1920 ; CHECK-BMI-LABEL: out8_constmask:
2021 ; CHECK-BMI: # %bb.0:
21 ; CHECK-BMI-NEXT: movl %esi, %eax
22 ; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi
23 ; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi
2224 ; CHECK-BMI-NEXT: andb $15, %dil
23 ; CHECK-BMI-NEXT: andb $-16, %al
24 ; CHECK-BMI-NEXT: orb %dil, %al
25 ; CHECK-BMI-NEXT: andb $-16, %sil
26 ; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax
2527 ; CHECK-BMI-NEXT: # kill: def $al killed $al killed $eax
2628 ; CHECK-BMI-NEXT: retq
2729 %mx = and i8 %x, 15
1313 define i8 @test_bitreverse_i8(i8 %a) nounwind {
1414 ; SSE-LABEL: test_bitreverse_i8:
1515 ; SSE: # %bb.0:
16 ; SSE-NEXT: # kill: def $edi killed $edi def $rdi
17 ; SSE-NEXT: rolb $4, %dil
1618 ; SSE-NEXT: movl %edi, %eax
17 ; SSE-NEXT: rolb $4, %al
18 ; SSE-NEXT: movl %eax, %ecx
19 ; SSE-NEXT: andb $51, %cl
20 ; SSE-NEXT: shlb $2, %cl
21 ; SSE-NEXT: andb $-52, %al
22 ; SSE-NEXT: shrb $2, %al
23 ; SSE-NEXT: orb %cl, %al
24 ; SSE-NEXT: movl %eax, %ecx
25 ; SSE-NEXT: andb $85, %cl
26 ; SSE-NEXT: addb %cl, %cl
27 ; SSE-NEXT: andb $-86, %al
28 ; SSE-NEXT: shrb %al
29 ; SSE-NEXT: orb %cl, %al
19 ; SSE-NEXT: andb $51, %al
20 ; SSE-NEXT: shlb $2, %al
21 ; SSE-NEXT: andb $-52, %dil
22 ; SSE-NEXT: shrb $2, %dil
23 ; SSE-NEXT: orb %al, %dil
24 ; SSE-NEXT: movl %edi, %eax
25 ; SSE-NEXT: andb $85, %al
26 ; SSE-NEXT: addb %al, %al
27 ; SSE-NEXT: andb $-86, %dil
28 ; SSE-NEXT: shrb %dil
29 ; SSE-NEXT: leal (%rdi,%rax), %eax
3030 ; SSE-NEXT: # kill: def $al killed $al killed $eax
3131 ; SSE-NEXT: retq
3232 ;
3333 ; AVX-LABEL: test_bitreverse_i8:
3434 ; AVX: # %bb.0:
35 ; AVX-NEXT: # kill: def $edi killed $edi def $rdi
36 ; AVX-NEXT: rolb $4, %dil
3537 ; AVX-NEXT: movl %edi, %eax
36 ; AVX-NEXT: rolb $4, %al
37 ; AVX-NEXT: movl %eax, %ecx
38 ; AVX-NEXT: andb $51, %cl
39 ; AVX-NEXT: shlb $2, %cl
40 ; AVX-NEXT: andb $-52, %al
41 ; AVX-NEXT: shrb $2, %al
42 ; AVX-NEXT: orb %cl, %al
43 ; AVX-NEXT: movl %eax, %ecx
44 ; AVX-NEXT: andb $85, %cl
45 ; AVX-NEXT: addb %cl, %cl
46 ; AVX-NEXT: andb $-86, %al
47 ; AVX-NEXT: shrb %al
48 ; AVX-NEXT: orb %cl, %al
38 ; AVX-NEXT: andb $51, %al
39 ; AVX-NEXT: shlb $2, %al
40 ; AVX-NEXT: andb $-52, %dil
41 ; AVX-NEXT: shrb $2, %dil
42 ; AVX-NEXT: orb %al, %dil
43 ; AVX-NEXT: movl %edi, %eax
44 ; AVX-NEXT: andb $85, %al
45 ; AVX-NEXT: addb %al, %al
46 ; AVX-NEXT: andb $-86, %dil
47 ; AVX-NEXT: shrb %dil
48 ; AVX-NEXT: leal (%rdi,%rax), %eax
4949 ; AVX-NEXT: # kill: def $al killed $al killed $eax
5050 ; AVX-NEXT: retq
5151 ;