llvm.org GIT mirror llvm / 0487bd8
ARM: use target-specific SUBS node when combining cmp with cmov. This has two positive effects. First, using a custom node prevents recombination leading to an infinite loop since the output DAG is notionally a little more complex than the input one. Using a flag-setting instruction also allows the subtraction to be folded with the related comparison more easily. https://reviews.llvm.org/D53190 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@348122 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 1 year, 9 months ago
8 changed file(s) with 62 addition(s) and 30 deletion(s). Raw diff Collapse all Expand all
12811281 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
12821282
12831283 case ARMISD::CMOV: return "ARMISD::CMOV";
1284 case ARMISD::SUBS: return "ARMISD::SUBS";
12841285
12851286 case ARMISD::SSAT: return "ARMISD::SSAT";
12861287 case ARMISD::USAT: return "ARMISD::USAT";
1270612707 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
1270712708 Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
1270812709 }
12709 } else if (CC == ARMCC::NE && LHS != RHS &&
12710 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
1271012711 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
1271112712 // This seems pointless but will allow us to combine it further below.
12712 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
12713 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12713 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
12714 SDValue Sub =
12715 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
12716 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
12717 Sub.getValue(1), SDValue());
1271412718 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
12715 N->getOperand(3), Cmp);
12719 N->getOperand(3), CPSRGlue.getValue(1));
12720 FalseVal = Sub;
1271612721 }
1271712722 } else if (isNullConstant(TrueVal)) {
12718 if (CC == ARMCC::EQ && LHS != RHS &&
12723 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
1271912724 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
1272012725 // This seems pointless but will allow us to combine it further below
1272112726 // Note that we change == for != as this is the dual for the case above.
12722 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUB x, y), z, !=, (CMPZ x, y)
12723 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12727 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
12728 SDValue Sub =
12729 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
12730 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
12731 Sub.getValue(1), SDValue());
1272412732 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
1272512733 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
12726 N->getOperand(3), Cmp);
12734 N->getOperand(3), CPSRGlue.getValue(1));
12735 FalseVal = Sub;
1272712736 }
1272812737 }
1272912738
1273012739 // On Thumb1, the DAG above may be further combined if z is a power of 2
1273112740 // (z == 2 ^ K).
12732 // CMOV (SUB x, y), z, !=, (CMPZ x, y) ->
12741 // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
1273312742 // merge t3, t4
1273412743 // where t1 = (SUBCARRY (SUB x, y), z, 0)
1273512744 // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
1273712746 // t4 = (SUB 1, t2:1) [ we want a carry, not a borrow ]
1273812747 const APInt *TrueConst;
1273912748 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
12740 (FalseVal.getOpcode() == ISD::SUB) && (FalseVal.getOperand(0) == LHS) &&
12741 (FalseVal.getOperand(1) == RHS) &&
12749 (FalseVal.getOpcode() == ARMISD::SUBS) &&
12750 (FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) &&
1274212751 (TrueConst = isPowerOf2Constant(TrueVal))) {
1274312752 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
1274412753 unsigned ShiftAmount = TrueConst->logBase2();
8484 FMSTAT, // ARM fmstat instruction.
8585
8686 CMOV, // ARM conditional move instructions.
87 SUBS, // Flag-setting subtraction.
8788
8889 SSAT, // Signed saturation
8990 USAT, // Unsigned saturation
143143 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
144144 def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
145145 [SDNPInGlue]>;
146 def ARMsubs : SDNode<"ARMISD::SUBS", SDTIntBinOp, [SDNPOutGlue]>;
146147
147148 def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
148149
36403641 defm ADDS : AsI1_bin_s_irs;
36413642 defm SUBS : AsI1_bin_s_irs;
36423643
3644 def : ARMPat<(ARMsubs GPR:$Rn, mod_imm:$imm), (SUBSri $Rn, mod_imm:$imm)>;
3645 def : ARMPat<(ARMsubs GPR:$Rn, GPR:$Rm), (SUBSrr $Rn, $Rm)>;
3646 def : ARMPat<(ARMsubs GPR:$Rn, so_reg_imm:$shift),
3647 (SUBSrsi $Rn, so_reg_imm:$shift)>;
3648 def : ARMPat<(ARMsubs GPR:$Rn, so_reg_reg:$shift),
3649 (SUBSrsr $Rn, so_reg_reg:$shift)>;
3650
3651
36433652 let isAdd = 1 in
36443653 defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>;
36453654 defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>;
13501350 Requires<[IsThumb1Only]>,
13511351 Sched<[WriteALU]>;
13521352 }
1353
1354
1355 def : T1Pat<(ARMsubs tGPR:$Rn, tGPR:$Rm), (tSUBSrr $Rn, $Rm)>;
1356 def : T1Pat<(ARMsubs tGPR:$Rn, imm0_7:$imm3), (tSUBSi3 $Rn, imm0_7:$imm3)>;
1357 def : T1Pat<(ARMsubs tGPR:$Rn, imm0_255:$imm8), (tSUBSi8 $Rn, imm0_255:$imm8)>;
1358
13531359
13541360 // Sign-extend byte
13551361 def tSXTB : // A8.6.222
20922092 // node's second value. We can then eliminate the implicit def of CPSR.
20932093 defm t2ADDS : T2I_bin_s_irs ;
20942094 defm t2SUBS : T2I_bin_s_irs ;
2095
2096 def : T2Pat<(ARMsubs GPRnopc:$Rn, t2_so_imm:$imm),
2097 (t2SUBSri $Rn, t2_so_imm:$imm)>;
2098 def : T2Pat<(ARMsubs GPRnopc:$Rn, rGPR:$Rm), (t2SUBSrr $Rn, $Rm)>;
2099 def : T2Pat<(ARMsubs GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
2100 (t2SUBSrs $Rn, t2_so_reg:$ShiftedRm)>;
20952101
20962102 let hasPostISelHook = 1 in {
20972103 defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>;
103103
104104 ; CHECK-COMMON-LABEL: or_icmp_ugt:
105105 ; CHECK-COMMON: ldrb
106 ; CHECK-COMMON: sub.w
107 ; CHECK-COMMON-NOT: uxt
108 ; CHECK-COMMON: cmp.w
106 ; CHECK-COMMON: subs.w
109107 ; CHECK-COMMON-NOT: uxt
110108 ; CHECK-COMMON: cmp
111109 define i1 @or_icmp_ugt(i32 %arg, i8* %ptr) {
3838
3939 define i1 @f7(i32 %a, i32 %b) {
4040 ; CHECK-LABEL: f7:
41 ; CHECK: sub r2, r0, r1, lsr #6
42 ; CHECK: cmp r0, r1, lsr #6
43 ; CHECK: movwne r2, #1
44 ; CHECK: mov r0, r2
45 ; CHECK-T2: sub.w r2, r0, r1, lsr #6
46 ; CHECK-T2: cmp.w r0, r1, lsr #6
41 ; CHECK: subs r0, r0, r1, lsr #6
42 ; CHECK: movwne r0, #1
43 ; CHECK-T2: subs.w r0, r0, r1, lsr #6
4744 ; CHECK-T2: it ne
48 ; CHECK-T2: movne r2, #1
49 ; CHECK-T2: mov r0, r2
45 ; CHECK-T2: movne r0, #1
5046 %tmp = lshr i32 %b, 6
5147 %tmp1 = icmp ne i32 %a, %tmp
5248 ret i1 %tmp1
6763
6864 define i1 @f9(i32 %a) {
6965 ; CHECK-LABEL: f9:
70 ; CHECK: sub r1, r0, r0, ror #8
71 ; CHECK: cmp r0, r0, ror #8
72 ; CHECK: movwne r1, #1
73 ; CHECK: mov r0, r1
74 ; CHECK-T2: sub.w r1, r0, r0, ror #8
75 ; CHECK-T2: cmp.w r0, r0, ror #8
66 ; CHECK: subs r0, r0, r0, ror #8
67 ; CHECK: movwne r0, #1
68 ; CHECK-T2: subs.w r0, r0, r0, ror #8
7669 ; CHECK-T2: it ne
77 ; CHECK-T2: movne r1, #1
78 ; CHECK-T2: mov r0, r1
70 ; CHECK-T2: movne r0, #1
7971 %l8 = shl i32 %a, 24
8072 %r8 = lshr i32 %a, 8
8173 %tmp = or i32 %l8, %r8
141141 ret float %2
142142 }
143143
144 ; CHECK-LABEL: test_overflow_recombine:
145 define i1 @test_overflow_recombine(i32 %in) {
146 ; CHECK: smull [[LO:r[0-9]+]], [[HI:r[0-9]+]]
147 ; CHECK: subs [[ZERO:r[0-9]+]], [[HI]], [[LO]], asr #31
148 ; CHECK: movne [[ZERO]], #1
149 %prod = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 0, i32 %in)
150 %overflow = extractvalue { i32, i1 } %prod, 1
151 ret i1 %overflow
152 }
153
154 declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)