llvm.org GIT mirror llvm / 7931efa
[ARM] Optimize {s,u}{add,sub}.with.overflow. The AArch64 backend contains code to optimize {s,u}{add,sub}.with.overflow during SelectionDAG. This commit ports that code to the ARM backend. Differential revision: https://reviews.llvm.org/D35635 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@321224 91177308-0d34-0410-b5e6-96231b3b80d8 Joel Galenson 2 years ago
3 changed file(s) with 207 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
10401040 if (!Subtarget->isThumb1Only())
10411041 setOperationAction(ISD::SETCCE, MVT::i32, Custom);
10421042
1043 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
1043 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
10441044 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
10451045 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
10461046 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
38933893 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
38943894 }
38953895
3896 // This function returns three things: the arithmetic computation itself
3897 // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
3898 // comparison and the condition code define the case in which the arithmetic
3899 // computation *does not* overflow.
38963900 std::pair
38973901 ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
38983902 SDValue &ARMcc) const {
39183922 break;
39193923 case ISD::UADDO:
39203924 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3921 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3925 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
3926 // We do not use it in the USUBO case as Value may not be used.
3927 Value = DAG.getNode(ARMISD::ADDC, dl,
3928 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
3929 .getValue(0);
39223930 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
39233931 break;
39243932 case ISD::SSUBO:
45174525 return SDValue();
45184526 }
45194527
4528 SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
4529 SDValue Chain = Op.getOperand(0);
4530 SDValue Cond = Op.getOperand(1);
4531 SDValue Dest = Op.getOperand(2);
4532 SDLoc dl(Op);
4533
4534 // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction.
4535 unsigned Opc = Cond.getOpcode();
4536 if (Cond.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO ||
4537 Opc == ISD::SSUBO || Opc == ISD::USUBO)) {
4538 // Only lower legal XALUO ops.
4539 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4540 return SDValue();
4541
4542 // The actual operation with overflow check.
4543 SDValue Value, OverflowCmp;
4544 SDValue ARMcc;
4545 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4546
4547 // Reverse the condition code.
4548 ARMCC::CondCodes CondCode =
4549 (ARMCC::CondCodes)cast(ARMcc)->getZExtValue();
4550 CondCode = ARMCC::getOppositeCondition(CondCode);
4551 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
4552 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4553
4554 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
4555 OverflowCmp);
4556 }
4557
4558 return SDValue();
4559 }
4560
45204561 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
45214562 SDValue Chain = Op.getOperand(0);
45224563 ISD::CondCode CC = cast(Op.getOperand(1))->get();
45354576 RHS = DAG.getConstant(0, dl, LHS.getValueType());
45364577 CC = ISD::SETNE;
45374578 }
4579 }
4580
4581 // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction.
4582 unsigned Opc = LHS.getOpcode();
4583 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
4584 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4585 Opc == ISD::USUBO) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4586 // Only lower legal XALUO ops.
4587 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4588 return SDValue();
4589
4590 // The actual operation with overflow check.
4591 SDValue Value, OverflowCmp;
4592 SDValue ARMcc;
4593 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
4594
4595 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
4596 // Reverse the condition code.
4597 ARMCC::CondCodes CondCode =
4598 (ARMCC::CondCodes)cast(ARMcc)->getZExtValue();
4599 CondCode = ARMCC::getOppositeCondition(CondCode);
4600 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
4601 }
4602 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4603
4604 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
4605 OverflowCmp);
45384606 }
45394607
45404608 if (LHS.getValueType() == MVT::i32) {
77927860 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
77937861 case ISD::SELECT: return LowerSELECT(Op, DAG);
77947862 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
7863 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
77957864 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
77967865 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
77977866 case ISD::VASTART: return LowerVASTART(Op, DAG);
643643 SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
644644 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
645645 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
646 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
646647 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
647648 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
648649 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
0 ; RUN: llc < %s -mtriple=arm-eabi -mcpu=generic | FileCheck %s
1
2 define i32 @sadd(i32 %a, i32 %b) local_unnamed_addr #0 {
3 ; CHECK-LABEL: sadd:
4 ; CHECK: mov r[[R0:[0-9]+]], r0
5 ; CHECK-NEXT: add r[[R1:[0-9]+]], r[[R0]], r1
6 ; CHECK-NEXT: cmp r[[R1]], r[[R0]]
7 ; CHECK-NEXT: movvc pc, lr
8 entry:
9 %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
10 %1 = extractvalue { i32, i1 } %0, 1
11 br i1 %1, label %trap, label %cont
12
13 trap:
14 tail call void @llvm.trap() #2
15 unreachable
16
17 cont:
18 %2 = extractvalue { i32, i1 } %0, 0
19 ret i32 %2
20
21 }
22
23 define i32 @uadd(i32 %a, i32 %b) local_unnamed_addr #0 {
24 ; CHECK-LABEL: uadd:
25 ; CHECK: mov r[[R0:[0-9]+]], r0
26 ; CHECK-NEXT: adds r[[R1:[0-9]+]], r[[R0]], r1
27 ; CHECK-NEXT: cmp r[[R1]], r[[R0]]
28 ; CHECK-NEXT: movhs pc, lr
29 entry:
30 %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
31 %1 = extractvalue { i32, i1 } %0, 1
32 br i1 %1, label %trap, label %cont
33
34 trap:
35 tail call void @llvm.trap() #2
36 unreachable
37
38 cont:
39 %2 = extractvalue { i32, i1 } %0, 0
40 ret i32 %2
41
42 }
43
44 define i32 @ssub(i32 %a, i32 %b) local_unnamed_addr #0 {
45 ; CHECK-LABEL: ssub:
46 ; CHECK: cmp r0, r1
47 ; CHECK-NEXT: subvc r0, r0, r1
48 ; CHECK-NEXT: movvc pc, lr
49 entry:
50 %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
51 %1 = extractvalue { i32, i1 } %0, 1
52 br i1 %1, label %trap, label %cont
53
54 trap:
55 tail call void @llvm.trap() #2
56 unreachable
57
58 cont:
59 %2 = extractvalue { i32, i1 } %0, 0
60 ret i32 %2
61
62 }
63
64 define i32 @usub(i32 %a, i32 %b) local_unnamed_addr #0 {
65 ; CHECK-LABEL: usub:
66 ; CHECK: mov r[[R0:[0-9]+]], r0
67 ; CHECK-NEXT: subs r[[R1:[0-9]+]], r[[R0]], r1
68 ; CHECK-NEXT: cmp r[[R0]], r1
69 ; CHECK-NEXT: movhs pc, lr
70 entry:
71 %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
72 %1 = extractvalue { i32, i1 } %0, 1
73 br i1 %1, label %trap, label %cont
74
75 trap:
76 tail call void @llvm.trap() #2
77 unreachable
78
79 cont:
80 %2 = extractvalue { i32, i1 } %0, 0
81 ret i32 %2
82
83 }
84
85 define void @sum(i32* %a, i32* %b, i32 %n) local_unnamed_addr #0 {
86 ; CHECK-LABEL: sum:
87 ; CHECK: ldr [[R0:r[0-9]+]],
88 ; CHECK-NEXT: ldr [[R1:r[0-9]+|lr]],
89 ; CHECK-NEXT: add [[R2:r[0-9]+]], [[R1]], [[R0]]
90 ; CHECK-NEXT: cmp [[R2]], [[R1]]
91 ; CHECK-NEXT: strvc [[R2]],
92 ; CHECK-NEXT: addvc
93 ; CHECK-NEXT: cmpvc
94 ; CHECK-NEXT: bvs
95 entry:
96 %cmp7 = icmp eq i32 %n, 0
97 br i1 %cmp7, label %for.cond.cleanup, label %for.body
98
99 for.cond.cleanup:
100 ret void
101
102 for.body:
103 %i.08 = phi i32 [ %7, %cont2 ], [ 0, %entry ]
104 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.08
105 %0 = load i32, i32* %arrayidx, align 4
106 %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 %i.08
107 %1 = load i32, i32* %arrayidx1, align 4
108 %2 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %1, i32 %0)
109 %3 = extractvalue { i32, i1 } %2, 1
110 br i1 %3, label %trap, label %cont
111
112 trap:
113 tail call void @llvm.trap() #2
114 unreachable
115
116 cont:
117 %4 = extractvalue { i32, i1 } %2, 0
118 store i32 %4, i32* %arrayidx1, align 4
119 %5 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.08, i32 1)
120 %6 = extractvalue { i32, i1 } %5, 1
121 br i1 %6, label %trap, label %cont2
122
123 cont2:
124 %7 = extractvalue { i32, i1 } %5, 0
125 %cmp = icmp eq i32 %7, %n
126 br i1 %cmp, label %for.cond.cleanup, label %for.body
127
128 }
129
130 declare void @llvm.trap() #2
131 declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1
132 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
133 declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #1
134 declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1