llvm.org GIT mirror llvm / 624ddd8
Merge r332389 to pick up the fix for PR37431, a regression w/ the new EFLAGS lowering. Required switching $regname to %regname in the MIR test and regenerating the CHECKs for the other test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@332940 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 1 year, 4 months ago
3 changed file(s) with 271 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
126126 MachineInstr &JmpI, CondRegArray &CondRegs);
127127 void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
128128 MachineInstr &CopyDefI);
129 void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
130 MachineBasicBlock::iterator TestPos,
131 DebugLoc TestLoc, MachineInstr &SetBI,
132 MachineOperand &FlagUse, CondRegArray &CondRegs);
129133 void rewriteSetCC(MachineBasicBlock &TestMBB,
130134 MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
131135 MachineInstr &SetCCI, MachineOperand &FlagUse,
510514 } else if (MI.getOpcode() == TargetOpcode::COPY) {
511515 rewriteCopy(MI, *FlagUse, CopyDefI);
512516 } else {
513 // We assume that arithmetic instructions that use flags also def
514 // them.
517 // We assume all other instructions that use flags also def them.
515518 assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
516519 "Expected a def of EFLAGS for this instruction!");
517520
523526 // logic.
524527 FlagsKilled = true;
525528
526 rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
529 switch (MI.getOpcode()) {
530 case X86::SETB_C8r:
531 case X86::SETB_C16r:
532 case X86::SETB_C32r:
533 case X86::SETB_C64r:
534 // Use custom lowering for arithmetic that is merely extending the
535 // carry flag. We model this as the SETB_C* pseudo instructions.
536 rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse,
537 CondRegs);
538 break;
539
540 default:
541 // Generically handle remaining uses as arithmetic instructions.
542 rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse,
543 CondRegs);
544 break;
545 }
527546 break;
528547 }
529548
755774 MI.eraseFromParent();
756775 }
757776
777 void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
778 MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
779 DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
780 CondRegArray &CondRegs) {
781 // This routine is only used to handle pseudos for setting a register to zero
782 // or all ones based on CF. This is essentially the sign extended from 1-bit
783 // form of SETB and modeled with the SETB_C* pseudos. They require special
784 // handling as they aren't normal SETcc instructions and are lowered to an
785 // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
786 // they are only provided in reg-defining forms. A complicating factor is that
787 // they can define many different register widths.
788 assert(SetBI.getOperand(0).isReg() &&
789 "Cannot have a non-register defined operand to this variant of SETB!");
790
791 // Little helper to do the common final step of replacing the register def'ed
792 // by this SETB instruction with a new register and removing the SETB
793 // instruction.
794 auto RewriteToReg = [&](unsigned Reg) {
795 MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
796 SetBI.eraseFromParent();
797 };
798
799 // Grab the register class used for this particular instruction.
800 auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
801
802 MachineBasicBlock &MBB = *SetBI.getParent();
803 auto SetPos = SetBI.getIterator();
804 auto SetLoc = SetBI.getDebugLoc();
805
806 auto AdjustReg = [&](unsigned Reg) {
807 auto &OrigRC = *MRI->getRegClass(Reg);
808 if (&OrigRC == &SetBRC)
809 return Reg;
810
811 unsigned NewReg;
812
813 int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
814 int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
815 assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
816 assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
817 int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
818 X86::NoSubRegister, X86::sub_32bit};
819
820 // If the original size is smaller than the target *and* is smaller than 4
821 // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
822 // to maximize the chance of being able to CSE that operation and to avoid
823 // partial dependency stalls extending to 2-bytes.
824 if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
825 NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
826 BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
827 .addReg(Reg);
828 if (&SetBRC == &X86::GR32RegClass)
829 return NewReg;
830 Reg = NewReg;
831 OrigRegSize = 4;
832 }
833
834 NewReg = MRI->createVirtualRegister(&SetBRC);
835 if (OrigRegSize < TargetRegSize) {
836 BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
837 NewReg)
838 .addImm(0)
839 .addReg(Reg)
840 .addImm(SubRegIdx[OrigRegSize]);
841 } else if (OrigRegSize > TargetRegSize) {
842 BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG),
843 NewReg)
844 .addReg(Reg)
845 .addImm(SubRegIdx[TargetRegSize]);
846 } else {
847 BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
848 .addReg(Reg);
849 }
850 return NewReg;
851 };
852
853 unsigned &CondReg = CondRegs[X86::COND_B];
854 if (!CondReg)
855 CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
856
857 // Adjust the condition to have the desired register width by zero-extending
858 // as needed.
859 // FIXME: We should use a better API to avoid the local reference and using a
860 // different variable here.
861 unsigned ExtCondReg = AdjustReg(CondReg);
862
863 // Now we need to turn this into a bitmask. We do this by subtracting it from
864 // zero.
865 unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
866 BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
867 ZeroReg = AdjustReg(ZeroReg);
868
869 unsigned Sub;
870 switch (SetBI.getOpcode()) {
871 case X86::SETB_C8r:
872 Sub = X86::SUB8rr;
873 break;
874
875 case X86::SETB_C16r:
876 Sub = X86::SUB16rr;
877 break;
878
879 case X86::SETB_C32r:
880 Sub = X86::SUB32rr;
881 break;
882
883 case X86::SETB_C64r:
884 Sub = X86::SUB64rr;
885 break;
886
887 default:
888 llvm_unreachable("Invalid SETB_C* opcode!");
889 }
890 unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
891 BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
892 .addReg(ZeroReg)
893 .addReg(ExtCondReg);
894 return RewriteToReg(ResultReg);
895 }
896
758897 void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
759898 MachineBasicBlock::iterator TestPos,
760899 DebugLoc TestLoc,
303303 %tmp12 = trunc i32 %tmp11 to i16
304304 br label %bb1
305305 }
306
307 ; Use a particular instruction pattern in order to lower to the post-RA pseudo
308 ; used to lower SETB into an SBB pattern in order to make sure that kind of
309 ; usage of a copied EFLAGS continues to work.
310 define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
311 ; X32-LABEL: PR37431:
312 ; X32: # %bb.0: # %entry
313 ; X32-NEXT: pushl %esi
314 ; X32-NEXT: .cfi_def_cfa_offset 8
315 ; X32-NEXT: .cfi_offset %esi, -8
316 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
317 ; X32-NEXT: movl (%eax), %eax
318 ; X32-NEXT: movl %eax, %ecx
319 ; X32-NEXT: sarl $31, %ecx
320 ; X32-NEXT: cmpl %eax, %eax
321 ; X32-NEXT: sbbl %ecx, %eax
322 ; X32-NEXT: setb %al
323 ; X32-NEXT: sbbb %cl, %cl
324 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
325 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
326 ; X32-NEXT: movb %cl, (%edx)
327 ; X32-NEXT: movzbl %al, %eax
328 ; X32-NEXT: xorl %ecx, %ecx
329 ; X32-NEXT: subl %eax, %ecx
330 ; X32-NEXT: xorl %eax, %eax
331 ; X32-NEXT: xorl %edx, %edx
332 ; X32-NEXT: idivl %ecx
333 ; X32-NEXT: movb %dl, (%esi)
334 ; X32-NEXT: popl %esi
335 ; X32-NEXT: retl
336 ;
337 ; X64-LABEL: PR37431:
338 ; X64: # %bb.0: # %entry
339 ; X64-NEXT: movq %rdx, %rcx
340 ; X64-NEXT: movslq (%rdi), %rax
341 ; X64-NEXT: cmpq %rax, %rax
342 ; X64-NEXT: sbbb %dl, %dl
343 ; X64-NEXT: cmpq %rax, %rax
344 ; X64-NEXT: movb %dl, (%rsi)
345 ; X64-NEXT: sbbl %esi, %esi
346 ; X64-NEXT: xorl %eax, %eax
347 ; X64-NEXT: xorl %edx, %edx
348 ; X64-NEXT: idivl %esi
349 ; X64-NEXT: movb %dl, (%rcx)
350 ; X64-NEXT: retq
351 entry:
352 %tmp = load i32, i32* %arg1
353 %tmp1 = sext i32 %tmp to i64
354 %tmp2 = icmp ugt i64 %tmp1, undef
355 %tmp3 = zext i1 %tmp2 to i8
356 %tmp4 = sub i8 0, %tmp3
357 store i8 %tmp4, i8* %arg2
358 %tmp5 = sext i8 %tmp4 to i32
359 %tmp6 = srem i32 0, %tmp5
360 %tmp7 = trunc i32 %tmp6 to i8
361 store i8 %tmp7, i8* %arg3
362 ret void
363 }
6161 }
6262
6363 define void @test_rcr(i64 %a, i64 %b) {
64 entry:
65 call void @foo()
66 ret void
67 }
68
69 define void @test_setb_c(i64 %a, i64 %b) {
6470 entry:
6571 call void @foo()
6672 ret void
481487 RET 0
482488
483489 ...
490 ---
491 name: test_setb_c
492 # CHECK-LABEL: name: test_setb_c
493 liveins:
494 - { reg: '%rdi', virtual-reg: '%0' }
495 - { reg: '%rsi', virtual-reg: '%1' }
496 body: |
497 bb.0:
498 liveins: %rdi, %rsi
499
500 %0:gr64 = COPY %rdi
501 %1:gr64 = COPY %rsi
502 %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
503 %3:gr64 = COPY %eflags
504 ; CHECK-NOT: COPY{{( killed)?}} %eflags
505 ; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
506 ; CHECK-NOT: COPY{{( killed)?}} %eflags
507
508 ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
509 CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
510 ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
511
512 %eflags = COPY %3
513 %4:gr8 = SETB_C8r implicit-def %eflags, implicit %eflags
514 MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %4
515 ; CHECK-NOT: %eflags =
516 ; CHECK: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
517 ; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr8 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_8bit
518 ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]]
519 ; CHECK-NEXT: MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
520
521 %eflags = COPY %3
522 %5:gr16 = SETB_C16r implicit-def %eflags, implicit %eflags
523 MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %5
524 ; CHECK-NOT: %eflags =
525 ; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
526 ; CHECK-NEXT: %[[CF_TRUNC:[^:]*]]:gr16 = EXTRACT_SUBREG %[[CF_EXT]], %subreg.sub_16bit
527 ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
528 ; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr16 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_16bit
529 ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]]
530 ; CHECK-NEXT: MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
531
532 %eflags = COPY %3
533 %6:gr32 = SETB_C32r implicit-def %eflags, implicit %eflags
534 MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %6
535 ; CHECK-NOT: %eflags =
536 ; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
537 ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
538 ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]]
539 ; CHECK-NEXT: MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
540
541 %eflags = COPY %3
542 %7:gr64 = SETB_C64r implicit-def %eflags, implicit %eflags
543 MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %7
544 ; CHECK-NOT: %eflags =
545 ; CHECK: %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
546 ; CHECK-NEXT: %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit
547 ; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
548 ; CHECK-NEXT: %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit
549 ; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]]
550 ; CHECK-NEXT: MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]
551
552 RET 0
553
554 ...