llvm.org GIT mirror llvm / 1c3af77
Thumb2 and ARM add/subtract with carry fixes. Fixes Thumb2 ADCS and SBCS lowering: <rdar://problem/9275821>. t2ADCS/t2SBCS are now pseudo instructions, consistent with ARM, so the assembly printer correctly prints the 's' suffix. Fixes Thumb2 adde -> SBC matching to check for live/dead carry flags. Fixes the internal ARM machine opcode mnemonic for ADCS/SBCS. Fixes ARM SBC lowering to check for live carry (potential bug). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130048 91177308-0d34-0410-b5e6-96231b3b80d8 Andrew Trick 9 years ago
8 changed file(s) with 147 addition(s) and 123 deletion(s). Raw diff Collapse all Expand all
50395039 llvm_unreachable("Expecting a BB with two successors!");
50405040 }
50415041
5042 // FIXME: This opcode table should obviously be expressed in the target
5043 // description. We probably just need a "machine opcode" value in the pseudo
5044 // instruction. But the ideal solution maybe to simply remove the "S" version
5045 // of the opcode altogether.
5046 struct AddSubFlagsOpcodePair {
5047 unsigned PseudoOpc;
5048 unsigned MachineOpc;
5049 };
5050
5051 static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
5052 {ARM::ADCSri, ARM::ADCri},
5053 {ARM::ADCSrr, ARM::ADCrr},
5054 {ARM::ADCSrs, ARM::ADCrs},
5055 {ARM::SBCSri, ARM::SBCri},
5056 {ARM::SBCSrr, ARM::SBCrr},
5057 {ARM::SBCSrs, ARM::SBCrs},
5058 {ARM::RSBSri, ARM::RSBri},
5059 {ARM::RSBSrr, ARM::RSBrr},
5060 {ARM::RSBSrs, ARM::RSBrs},
5061 {ARM::RSCSri, ARM::RSCri},
5062 {ARM::RSCSrs, ARM::RSCrs},
5063 {ARM::t2ADCSri, ARM::t2ADCri},
5064 {ARM::t2ADCSrr, ARM::t2ADCrr},
5065 {ARM::t2ADCSrs, ARM::t2ADCrs},
5066 {ARM::t2SBCSri, ARM::t2SBCri},
5067 {ARM::t2SBCSrr, ARM::t2SBCrr},
5068 {ARM::t2SBCSrs, ARM::t2SBCrs},
5069 {ARM::t2RSBSri, ARM::t2RSBri},
5070 {ARM::t2RSBSrs, ARM::t2RSBrs},
5071 };
5072
5073 // Convert and Add or Subtract with Carry and Flags to a generic opcode with
5074 // CPSR operand. e.g. ADCS (...) -> ADC (... CPSR).
5075 //
5076 // FIXME: Somewhere we should assert that CPSR is in the correct
5077 // position to be recognized by the target descrition as the 'S' bit.
5078 bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI,
5079 MachineBasicBlock *BB) const {
5080 unsigned OldOpc = MI->getOpcode();
5081 unsigned NewOpc = 0;
5082
5083 // This is only called for instructions that need remapping, so iterating over
5084 // the tiny opcode table is not costly.
5085 static const int NPairs =
5086 sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
5087 for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0],
5088 *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) {
5089 if (OldOpc == Pair->PseudoOpc) {
5090 NewOpc = Pair->MachineOpc;
5091 break;
5092 }
5093 }
5094 if (!NewOpc)
5095 return false;
5096
5097 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5098 DebugLoc dl = MI->getDebugLoc();
5099 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
5100 for (unsigned i = 0; i < MI->getNumOperands(); ++i)
5101 MIB.addOperand(MI->getOperand(i));
5102 AddDefaultPred(MIB);
5103 MIB.addReg(ARM::CPSR, RegState::Define); // S bit
5104 MI->eraseFromParent();
5105 return true;
5106 }
5107
50425108 MachineBasicBlock *
50435109 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
50445110 MachineBasicBlock *BB) const {
50465112 DebugLoc dl = MI->getDebugLoc();
50475113 bool isThumb2 = Subtarget->isThumb2();
50485114 switch (MI->getOpcode()) {
5049 default:
5115 default: {
5116 if (RemapAddSubWithFlags(MI, BB))
5117 return BB;
5118
50505119 MI->dump();
50515120 llvm_unreachable("Unexpected instr type to insert");
5052
5121 }
50535122 case ARM::ATOMIC_LOAD_ADD_I8:
50545123 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
50555124 case ARM::ATOMIC_LOAD_ADD_I16:
50995168 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
51005169 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
51015170 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
5102
5103 case ARM::ADCSSri:
5104 case ARM::ADCSSrr:
5105 case ARM::ADCSSrs:
5106 case ARM::SBCSSri:
5107 case ARM::SBCSSrr:
5108 case ARM::SBCSSrs:
5109 case ARM::RSBSri:
5110 case ARM::RSBSrr:
5111 case ARM::RSBSrs:
5112 case ARM::RSCSri:
5113 case ARM::RSCSrs: {
5114 unsigned OldOpc = MI->getOpcode();
5115 unsigned Opc = 0;
5116 switch (OldOpc) {
5117 case ARM::ADCSSrr:
5118 Opc = ARM::ADCrr;
5119 break;
5120 case ARM::ADCSSri:
5121 Opc = ARM::ADCri;
5122 break;
5123 case ARM::ADCSSrs:
5124 Opc = ARM::ADCrs;
5125 break;
5126 case ARM::SBCSSrr:
5127 Opc = ARM::SBCrr;
5128 break;
5129 case ARM::SBCSSri:
5130 Opc = ARM::SBCri;
5131 break;
5132 case ARM::SBCSSrs:
5133 Opc = ARM::SBCrs;
5134 break;
5135 case ARM::RSBSri:
5136 Opc = ARM::RSBri;
5137 break;
5138 case ARM::RSBSrr:
5139 Opc = ARM::RSBrr;
5140 break;
5141 case ARM::RSBSrs:
5142 Opc = ARM::RSBrs;
5143 break;
5144 case ARM::RSCSri:
5145 Opc = ARM::RSCri;
5146 break;
5147 case ARM::RSCSrs:
5148 Opc = ARM::RSCrs;
5149 break;
5150 default:
5151 llvm_unreachable("Unknown opcode?");
5152 }
5153
5154 MachineInstrBuilder MIB =
5155 BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(Opc));
5156 for (unsigned i = 0; i < MI->getNumOperands(); ++i)
5157 MIB.addOperand(MI->getOperand(i));
5158 AddDefaultPred(MIB);
5159 MIB.addReg(ARM::CPSR, RegState::Define); // S bit
5160 MI->eraseFromParent();
5161 return BB;
5162 }
5163
51645171
51655172 case ARM::tMOVCCr_pseudo: {
51665173 // To "insert" a SELECT_CC instruction, we actually have to insert the
54735480
54745481 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
54755482 return SDValue();
5476
5483
54775484 APInt SplatBits, SplatUndef;
54785485 unsigned SplatBitSize;
54795486 bool HasAnyUndefs;
55095516
55105517 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
55115518 return SDValue();
5512
5519
55135520 APInt SplatBits, SplatUndef;
55145521 unsigned SplatBitSize;
55155522 bool HasAnyUndefs;
484484 unsigned Size,
485485 unsigned BinOpcode) const;
486486
487 bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
487488 };
488489
489490 enum NEONModImmType {
939939 // NOTE: CPSR def omitted because it will be handled by the custom inserter.
940940 let usesCustomInserter = 1 in {
941941 multiclass AI1_adde_sube_s_irs {
942 def Sri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
943 Size4Bytes, IIC_iALUi,
942 def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
943 Size4Bytes, IIC_iALUi,
944944 [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>;
945 def Srr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
946 Size4Bytes, IIC_iALUr,
945 def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
946 Size4Bytes, IIC_iALUr,
947947 [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
948948 let isCommutable = Commutable;
949949 }
950 def Srs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
951 Size4Bytes, IIC_iALUsr,
950 def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
951 Size4Bytes, IIC_iALUsr,
952952 [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>;
953953 }
954954 }
23292329 // The with-carry-in form matches bitwise not instead of the negation.
23302330 // Effectively, the inverse interpretation of the carry flag already accounts
23312331 // for part of the negation.
2332 def : ARMPat<(adde GPR:$src, so_imm_not:$imm),
2332 def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm),
23332333 (SBCri GPR:$src, so_imm_not:$imm)>;
2334 def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm),
2335 (SBCSri GPR:$src, so_imm_not:$imm)>;
23342336
23352337 // Note: These are implemented in C++ code, because they have to generate
23362338 // ADD/SUBrs instructions, which use a complex pattern that a xform function
680680 let Inst{24-21} = opcod;
681681 }
682682 }
683 }
683684
684685 // Carry setting variants
685 let isCodeGenOnly = 1, Defs = [CPSR] in {
686 multiclass T2I_adde_sube_s_irs opcod, string opc, PatFrag opnode,
687 bit Commutable = 0> {
686 // NOTE: CPSR def omitted because it will be handled by the custom inserter.
687 let usesCustomInserter = 1 in {
688 multiclass T2I_adde_sube_s_irs {
688689 // shifted imm
689 def ri : T2sTwoRegImm<
690 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
691 opc, "\t$Rd, $Rn, $imm",
692 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
693 Requires<[IsThumb2]> {
694 let Inst{31-27} = 0b11110;
695 let Inst{25} = 0;
696 let Inst{24-21} = opcod;
697 let Inst{20} = 1; // The S bit.
698 let Inst{15} = 0;
699 }
690 def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
691 Size4Bytes, IIC_iALUi,
692 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>;
700693 // register
701 def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
702 opc, ".w\t$Rd, $Rn, $Rm",
703 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
704 Requires<[IsThumb2]> {
694 def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
695 Size4Bytes, IIC_iALUr,
696 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
705697 let isCommutable = Commutable;
706 let Inst{31-27} = 0b11101;
707 let Inst{26-25} = 0b01;
708 let Inst{24-21} = opcod;
709 let Inst{20} = 1; // The S bit.
710 let Inst{14-12} = 0b000; // imm3
711 let Inst{7-6} = 0b00; // imm2
712 let Inst{5-4} = 0b00; // type
713698 }
714699 // shifted register
715 def rs : T2sTwoRegShiftedReg<
716 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
717 IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
718 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
719 Requires<[IsThumb2]> {
720 let Inst{31-27} = 0b11101;
721 let Inst{26-25} = 0b01;
722 let Inst{24-21} = opcod;
723 let Inst{20} = 1; // The S bit.
724 }
725 }
700 def rs : t2PseudoInst<
701 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
702 Size4Bytes, IIC_iALUsi,
703 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>;
726704 }
727705 }
728706
18021780 BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
18031781 defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
18041782 BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
1805 defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc",
1806 BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
1807 defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
1808 BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
1783 defm t2ADCS : T2I_adde_sube_s_irs, 1>;
1784 defm t2SBCS : T2I_adde_sube_s_irs>;
18091785
18101786 // RSB
18111787 defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
18361812 // Effectively, the inverse interpretation of the carry flag already accounts
18371813 // for part of the negation.
18381814 let AddedComplexity = 1 in
1839 def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm),
1815 def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm),
1816 (t2SBCri rGPR:$src, imm0_255_not:$imm)>;
1817 def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm),
1818 (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
1819 let AddedComplexity = 1 in
1820 def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm),
18401821 (t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
1841 def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm),
1822 def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm),
18421823 (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
18431824
18441825 // Select Bytes -- for disassembly only
1818 %tmp2 = sub i64 %tmp1, %b
1919 ret i64 %tmp2
2020 }
21
22 ; add with live carry
23 define i64 @f3(i32 %al, i32 %bl) {
24 ; CHECK: f3:
25 ; CHECK: adds r
26 ; CHECK: adcs r
27 ; CHECK: adc r
28 entry:
29 ; unsigned wide add
30 %aw = zext i32 %al to i64
31 %bw = zext i32 %bl to i64
32 %cw = add i64 %aw, %bw
33 ; ch == carry bit
34 %ch = lshr i64 %cw, 32
35 %dw = add i64 %ch, %bw
36 ret i64 %dw
37 }
5151 ret i64 %tmp
5252 }
5353
54 ; Example from numerics code that manually computes wider-than-64 values.
55 ;
56 ; CHECK: _livecarry:
57 ; CHECK: adds
58 ; CHECK: adcs
59 ; CHECK: adc
60 define i64 @livecarry(i64 %carry, i32 %digit) nounwind {
61 %ch = lshr i64 %carry, 32
62 %cl = and i64 %carry, 4294967295
63 %truncdigit = zext i32 %digit to i64
64 %prod = add i64 %cl, %truncdigit
65 %ph = lshr i64 %prod, 32
66 %carryresult = add i64 %ch, %ph
67 ret i64 %carryresult
68 }
33 define i64 @f1(i64 %a) {
44 ; CHECK: f1
55 ; CHECK: subs r0, #171
6 ; CHECK: adc r1, r1, #-1
6 ; CHECK: sbc r1, r1, #0
77 %tmp = sub i64 %a, 171
88 ret i64 %tmp
99 }
1212 define i64 @f2(i64 %a) {
1313 ; CHECK: f2
1414 ; CHECK: subs.w r0, r0, #1179666
15 ; CHECK: adc r1, r1, #-1
15 ; CHECK: sbc r1, r1, #0
1616 %tmp = sub i64 %a, 1179666
1717 ret i64 %tmp
1818 }
2121 define i64 @f3(i64 %a) {
2222 ; CHECK: f3
2323 ; CHECK: subs.w r0, r0, #872428544
24 ; CHECK: adc r1, r1, #-1
24 ; CHECK: sbc r1, r1, #0
2525 %tmp = sub i64 %a, 872428544
2626 ret i64 %tmp
2727 }
3030 define i64 @f4(i64 %a) {
3131 ; CHECK: f4
3232 ; CHECK: subs.w r0, r0, #1448498774
33 ; CHECK: adc r1, r1, #-1
33 ; CHECK: sbc r1, r1, #0
3434 %tmp = sub i64 %a, 1448498774
3535 ret i64 %tmp
3636 }
3939 define i64 @f5(i64 %a) {
4040 ; CHECK: f5
4141 ; CHECK: subs.w r0, r0, #66846720
42 ; CHECK: adc r1, r1, #-1
42 ; CHECK: sbc r1, r1, #0
4343 %tmp = sub i64 %a, 66846720
4444 ret i64 %tmp
4545 }
None ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
0 ; RUN: llc < %s -march=thumb -mattr=+thumb2 -mattr=+32bit | FileCheck %s
11
22 define i64 @f1(i64 %a, i64 %b) {
33 ; CHECK: f1:
4 ; CHECK: subs r0, r0, r2
5 ; CHECK: sbcs r1, r3
4 ; CHECK: subs.w r0, r0, r2
5 ; To test dead_carry, +32bit prevents sbc conveting to 16-bit sbcs
6 ; CHECK: sbc.w r1, r1, r3
67 %tmp = sub i64 %a, %b
78 ret i64 %tmp
89 }