llvm.org GIT mirror llvm / f815701
[ARM] Use ADDCARRY / SUBCARRY This is a preparatory step for D34515. This change: - makes nodes ISD::ADDCARRY and ISD::SUBCARRY legal for i32 - lowering is done by first converting the boolean value into the carry flag using (_, C) ← (ARMISD::ADDC R, -1) and converted back to an integer value using (R, _) ← (ARMISD::ADDE 0, 0, C). An ARMISD::ADDE between the two operations does the actual addition. - for subtraction, given that ISD::SUBCARRY second result is actually a borrow, we need to invert the value of the second operand and result before and after using ARMISD::SUBE. We need to invert the carry result of ARMISD::SUBE to preserve the semantics. - given that the generic combiner may lower ISD::ADDCARRY and ISD::SUBCARRYinto ISD::UADDO and ISD::USUBO we need to update their lowering as well otherwise i64 operations now would require branches. This implies updating the corresponding test for unsigned. - add new combiner to remove the redundant conversions from/to carry flags to/from boolean values (ARMISD::ADDC (ARMISD::ADDE 0, 0, C), -1) → C - fixes PR34045 - fixes PR34564 - fixes PR35103 Differential Revision: https://reviews.llvm.org/D35192 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320355 91177308-0d34-0410-b5e6-96231b3b80d8 Roger Ferrer Ibanez 2 years ago
7 changed file(s) with 365 addition(s) and 36 deletion(s). Raw diff Collapse all Expand all
21542154 }
21552155
21562156 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2157 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
2157 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2158 N1.getResNo() == 0)
21582159 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
21592160 N0, N1.getOperand(0), N1.getOperand(2));
21602161
798798 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
799799 setOperationAction(ISD::USUBO, MVT::i32, Custom);
800800
801 setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
802 setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
803
801804 // i64 operation support.
802805 setOperationAction(ISD::MUL, MVT::i64, Expand);
803806 setOperationAction(ISD::MULHU, MVT::i32, Expand);
39423945 }
39433946
39443947 SDValue
3945 ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3948 ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
39463949 // Let legalize expand this if it isn't a legal type yet.
39473950 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
39483951 return SDValue();
39613964 ARMcc, CCR, OverflowCmp);
39623965
39633966 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3967 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3968 }
3969
3970 static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
3971 SelectionDAG &DAG) {
3972 SDLoc DL(BoolCarry);
3973 EVT CarryVT = BoolCarry.getValueType();
3974
3975 APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
3976 // This converts the boolean value carry into the carry flag by doing
3977 // ARMISD::ADDC Carry, ~0
3978 return DAG.getNode(ARMISD::ADDC, DL, DAG.getVTList(CarryVT, MVT::i32),
3979 BoolCarry, DAG.getConstant(NegOne, DL, CarryVT));
3980 }
3981
3982 static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
3983 SelectionDAG &DAG) {
3984 SDLoc DL(Flags);
3985
3986 // Now convert the carry flag into a boolean carry. We do this
3987 // using ARMISD:ADDE 0, 0, Carry
3988 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
3989 DAG.getConstant(0, DL, MVT::i32),
3990 DAG.getConstant(0, DL, MVT::i32), Flags);
3991 }
3992
3993 SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
3994 SelectionDAG &DAG) const {
3995 // Let legalize expand this if it isn't a legal type yet.
3996 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3997 return SDValue();
3998
3999 SDValue LHS = Op.getOperand(0);
4000 SDValue RHS = Op.getOperand(1);
4001 SDLoc dl(Op);
4002
4003 EVT VT = Op.getValueType();
4004 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4005 SDValue Value;
4006 SDValue Overflow;
4007 switch (Op.getOpcode()) {
4008 default:
4009 llvm_unreachable("Unknown overflow instruction!");
4010 case ISD::UADDO:
4011 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4012 // Convert the carry flag into a boolean value.
4013 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4014 break;
4015 case ISD::USUBO: {
4016 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4017 // Convert the carry flag into a boolean value.
4018 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4019 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4020 // value. So compute 1 - C.
4021 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4022 DAG.getConstant(1, dl, MVT::i32), Overflow);
4023 break;
4024 }
4025 }
4026
39644027 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
39654028 }
39664029
73657428 Op.getOperand(1));
73667429 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
73677430 Op.getOperand(1), Op.getOperand(2));
7431 }
7432
7433 static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
7434 SDNode *N = Op.getNode();
7435 EVT VT = N->getValueType(0);
7436 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
7437
7438 SDValue Carry = Op.getOperand(2);
7439 EVT CarryVT = Carry.getValueType();
7440
7441 SDLoc DL(Op);
7442
7443 APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
7444
7445 SDValue Result;
7446 if (Op.getOpcode() == ISD::ADDCARRY) {
7447 // This converts the boolean value carry into the carry flag.
7448 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
7449
7450 // Do the addition proper using the carry flag we wanted.
7451 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
7452 Op.getOperand(1), Carry.getValue(1));
7453
7454 // Now convert the carry flag into a boolean value.
7455 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
7456 } else {
7457 // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
7458 // have to invert the carry first.
7459 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
7460 DAG.getConstant(1, DL, MVT::i32), Carry);
7461 // This converts the boolean value carry into the carry flag.
7462 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
7463
7464 // Do the subtraction proper using the carry flag we wanted.
7465 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
7466 Op.getOperand(1), Carry.getValue(1));
7467
7468 // Now convert the carry flag into a boolean value.
7469 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
7470 // But the carry returned by ARMISD::SUBE is not a borrow as expected
7471 // by ISD::SUBCARRY, so compute 1 - C.
7472 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
7473 DAG.getConstant(1, DL, MVT::i32), Carry);
7474 }
7475
7476 // Return both values.
7477 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
73687478 }
73697479
73707480 SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
77227832 case ISD::ADDE:
77237833 case ISD::SUBC:
77247834 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
7835 case ISD::ADDCARRY:
7836 case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
77257837 case ISD::SADDO:
7838 case ISD::SSUBO:
7839 return LowerSignedALUO(Op, DAG);
77267840 case ISD::UADDO:
7727 case ISD::SSUBO:
77287841 case ISD::USUBO:
7729 return LowerXALUO(Op, DAG);
7842 return LowerUnsignedALUO(Op, DAG);
77307843 case ISD::ATOMIC_LOAD:
77317844 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
77327845 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
96749787 // a S/UMLAL instruction.
96759788 // UMUL_LOHI
96769789 // / :lo \ :hi
9677 // / \ [no multiline comment]
9678 // loAdd -> ADDE |
9679 // \ :glue /
9680 // \ /
9681 // ADDC <- hiAdd
9790 // V \ [no multiline comment]
9791 // loAdd -> ADDC |
9792 // \ :carry /
9793 // V V
9794 // ADDE <- hiAdd
96829795 //
96839796 assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE");
96849797
96869799 AddeNode->getOperand(2).getValueType() == MVT::i32 &&
96879800 "ADDE node has the wrong inputs");
96889801
9689 // Check that we have a glued ADDC node.
9802 // Check that we are chained to the right ADDC node.
96909803 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
96919804 if (AddcNode->getOpcode() != ARMISD::ADDC)
96929805 return SDValue();
97379850 SDValue* LoMul = nullptr;
97389851 SDValue* LowAdd = nullptr;
97399852
9740 // Ensure that ADDE is from high result of ISD::SMUL_LOHI.
9853 // Ensure that ADDE is from high result of ISD::xMUL_LOHI.
97419854 if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
97429855 return SDValue();
97439856
97609873 }
97619874
97629875 if (!LoMul)
9876 return SDValue();
9877
9878 // If HiAdd is the same node as ADDC or is a predecessor of ADDC the
9879 // replacement below will create a cycle.
9880 if (AddcNode == HiAdd->getNode() ||
9881 AddcNode->isPredecessorOf(HiAdd->getNode()))
97639882 return SDValue();
97649883
97659884 // Create the merged node.
98639982 return SDValue();
98649983 }
98659984
9866 static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG,
9985 static SDValue PerformAddcSubcCombine(SDNode *N,
9986 TargetLowering::DAGCombinerInfo &DCI,
98679987 const ARMSubtarget *Subtarget) {
9988 SelectionDAG &DAG(DCI.DAG);
9989
9990 if (N->getOpcode() == ARMISD::ADDC) {
9991 // (ADDC (ADDE 0, 0, C), -1) -> C
9992 SDValue LHS = N->getOperand(0);
9993 SDValue RHS = N->getOperand(1);
9994 if (LHS->getOpcode() == ARMISD::ADDE &&
9995 isNullConstant(LHS->getOperand(0)) &&
9996 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
9997 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
9998 }
9999 }
10000
986810001 if (Subtarget->isThumb1Only()) {
986910002 SDValue RHS = N->getOperand(1);
987010003 if (ConstantSDNode *C = dyn_cast(RHS)) {
1188212015 return SDValue();
1188312016 }
1188412017
12018 static const APInt *isPowerOf2Constant(SDValue V) {
12019 ConstantSDNode *C = dyn_cast(V);
12020 if (!C)
12021 return nullptr;
12022 const APInt *CV = &C->getAPIntValue();
12023 return CV->isPowerOf2() ? CV : nullptr;
12024 }
12025
1188512026 SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
1188612027 // If we have a CMOV, OR and AND combination such as:
1188712028 // if (x & CN)
1191012051 SDValue And = CmpZ->getOperand(0);
1191112052 if (And->getOpcode() != ISD::AND)
1191212053 return SDValue();
11913 ConstantSDNode *AndC = dyn_cast(And->getOperand(1));
11914 if (!AndC || !AndC->getAPIntValue().isPowerOf2())
12054 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
12055 if (!AndC)
1191512056 return SDValue();
1191612057 SDValue X = And->getOperand(0);
1191712058
1195112092 SDValue V = Y;
1195212093 SDLoc dl(X);
1195312094 EVT VT = X.getValueType();
11954 unsigned BitInX = AndC->getAPIntValue().logBase2();
12095 unsigned BitInX = AndC->logBase2();
1195512096
1195612097 if (BitInX != 0) {
1195712098 // We must shift X first.
1211212253 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
1211312254 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
1211412255 case ARMISD::ADDC:
12115 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget);
12256 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
1211612257 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
1211712258 case ARMISD::BFI: return PerformBFICombine(N, DCI);
1211812259 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
1282712968 case ARMISD::ADDE:
1282812969 case ARMISD::SUBC:
1282912970 case ARMISD::SUBE:
12830 // These nodes' second result is a boolean
12831 if (Op.getResNo() == 0)
12832 break;
12833 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
12971 // Special cases when we convert a carry to a boolean.
12972 if (Op.getResNo() == 0) {
12973 SDValue LHS = Op.getOperand(0);
12974 SDValue RHS = Op.getOperand(1);
12975 // (ADDE 0, 0, C) will give us a single bit.
12976 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
12977 isNullConstant(RHS)) {
12978 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
12979 return;
12980 }
12981 }
1283412982 break;
1283512983 case ARMISD::CMOV: {
1283612984 // Bits are known zero/one if known on the LHS and RHS.
637637 SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
638638 SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
639639 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
640 SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
640 SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
641 SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
641642 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
642643 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
643644 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
None ; RUN: llc < %s -mtriple=arm-linux -mcpu=generic | FileCheck %s
0 ; RUN: llc < %s -mtriple=arm-linux -mcpu=generic -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
1 ; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV6
2 ; RUN: llc < %s -mtriple=thumbv7-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV7
13
24 define i32 @uadd_overflow(i32 %a, i32 %b) #0 {
35 %sadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
68 ret i32 %2
79
810 ; CHECK-LABEL: uadd_overflow:
9 ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
10 ; CHECK: mov r[[R1]], #1
11 ; CHECK: cmp r[[R2]], r[[R0]]
12 ; CHECK: movhs r[[R1]], #0
11
12 ; ARM: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
13 ; ARM: mov r[[R2:[0-9]+]], #0
14 ; ARM: adc r[[R0]], r[[R2]], #0
15
16 ; THUMBV6: movs r[[R2:[0-9]+]], #0
17 ; THUMBV6: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
18 ; THUMBV6: adcs r[[R2]], r[[R2]]
19 ; THUMBV6: mov r[[R0]], r[[R2]]
20
21 ; THUMBV7: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
22 ; THUMBV7: mov.w r[[R2:[0-9]+]], #0
23 ; THUMBV7: adc r[[R0]], r[[R2]], #0
1324 }
1425
1526
2031 ret i32 %2
2132
2233 ; CHECK-LABEL: sadd_overflow:
23 ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
24 ; CHECK: mov r[[R1]], #1
25 ; CHECK: cmp r[[R2]], r[[R0]]
26 ; CHECK: movvc r[[R1]], #0
34
35 ; ARM: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
36 ; ARM: mov r[[R1]], #1
37 ; ARM: cmp r[[R2]], r[[R0]]
38 ; ARM: movvc r[[R1]], #0
39
40 ; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]]
41 ; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]]
42 ; THUMBV6: movs r[[R0]], #0
43 ; THUMBV6: movs r[[R1]], #1
44 ; THUMBV6: cmp r[[R3]], r[[R2]]
45 ; THUMBV6: bvc .L[[LABEL:.*]]
46 ; THUMBV6: mov r[[R0]], r[[R1]]
47 ; THUMBV6: .L[[LABEL]]:
48
49 ; THUMBV7: movs r[[R1]], #1
50 ; THUMBV7: cmp r[[R2]], r[[R0]]
51 ; THUMBV7: it vc
52 ; THUMBV7: movvc r[[R1]], #0
53 ; THUMBV7: mov r[[R0]], r[[R1]]
2754 }
2855
2956 define i32 @usub_overflow(i32 %a, i32 %b) #0 {
3360 ret i32 %2
3461
3562 ; CHECK-LABEL: usub_overflow:
36 ; CHECK: mov r[[R2]], #1
37 ; CHECK: cmp r[[R0]], r[[R1]]
38 ; CHECK: movhs r[[R2]], #0
63
64 ; ARM: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
65 ; ARM: mov r[[R2:[0-9]+]], #0
66 ; ARM: adc r[[R0]], r[[R2]], #0
67 ; ARM: rsb r[[R0]], r[[R0]], #1
68
69 ; THUMBV6: movs r[[R2:[0-9]+]], #0
70 ; THUMBV6: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
71 ; THUMBV6: adcs r[[R2]], r[[R2]]
72 ; THUMBV6: movs r[[R0]], #1
73 ; THUMBV6: subs r[[R0]], r[[R0]], r[[R2]]
74
75 ; THUMBV7: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
76 ; THUMBV7: mov.w r[[R2:[0-9]+]], #0
77 ; THUMBV7: adc r[[R0]], r[[R2]], #0
78 ; THUMBV7: rsb.w r[[R0]], r[[R0]], #1
79
80 ; We should know that the overflow is just 1 bit,
81 ; no need to clear any other bit
82 ; CHECK-NOT: and
3983 }
4084
4185 define i32 @ssub_overflow(i32 %a, i32 %b) #0 {
4589 ret i32 %2
4690
4791 ; CHECK-LABEL: ssub_overflow:
48 ; CHECK: mov r[[R2]], #1
49 ; CHECK: cmp r[[R0]], r[[R1]]
50 ; CHECK: movvc r[[R2]], #0
92
93 ; ARM: mov r[[R2]], #1
94 ; ARM: cmp r[[R0]], r[[R1]]
95 ; ARM: movvc r[[R2]], #0
96
97 ; THUMBV6: movs r[[R0]], #0
98 ; THUMBV6: movs r[[R3:[0-9]+]], #1
99 ; THUMBV6: cmp r[[R2]], r[[R1:[0-9]+]]
100 ; THUMBV6: bvc .L[[LABEL:.*]]
101 ; THUMBV6: mov r[[R0]], r[[R3]]
102 ; THUMBV6: .L[[LABEL]]:
103
104 ; THUMBV7: movs r[[R2:[0-9]+]], #1
105 ; THUMBV7: cmp r[[R0:[0-9]+]], r[[R1:[0-9]+]]
106 ; THUMBV7: it vc
107 ; THUMBV7: movvc r[[R2]], #0
108 ; THUMBV7: mov r[[R0]], r[[R2]]
51109 }
52110
53111 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
0 ; RUN: llc < %s -mtriple thumbv7 | FileCheck %s
1
2 define hidden void @foo(i32* %ptr, i1 zeroext %long_blocks) {
3 entry:
4 ; This test is actually checking that no cycle is introduced but at least we
5 ; want to see one umull.
6 ; CHECK: umull
7 %0 = load i32, i32* %ptr, align 4
8 %conv.i.i13.i = zext i32 %0 to i64
9 %mul.i.i14.i = mul nuw nsw i64 %conv.i.i13.i, 18782
10 %1 = load i32, i32* undef, align 4
11 %conv4.i.i16.i = zext i32 %1 to i64
12 %add5.i.i17.i = add nuw nsw i64 %mul.i.i14.i, %conv4.i.i16.i
13 %shr.i.i18.i = lshr i64 %add5.i.i17.i, 32
14 %add10.i.i20.i = add nuw nsw i64 %shr.i.i18.i, %add5.i.i17.i
15 %conv11.i.i21.i = trunc i64 %add10.i.i20.i to i32
16 %x.0.neg.i.i26.i = sub i32 -2, %conv11.i.i21.i
17 %sub.i.i27.i = add i32 %x.0.neg.i.i26.i, 0
18 store i32 %sub.i.i27.i, i32* %ptr, align 4
19 br label %while.body.i
20
21 while.body.i: ; preds = %while.body.i, %entry
22 br label %while.body.i
23 }
24
0 ; RUN: llc < %s -mtriple thumbv7 | FileCheck %s
1
2 ; ModuleID = 'bugpoint-reduced-simplified.bc'
3 define hidden void @bn_mul_comba8(i32* nocapture %r, i32* nocapture readonly %a, i32* nocapture readonly %b) local_unnamed_addr {
4 entry:
5 ; This test is actually checking that no cycle is introduced but at least we
6 ; want to see a couple of umull and one umlal in the output
7 ; CHECK: umull
8 ; CHECK: umull
9 ; CHECK: umlal
10 %0 = load i32, i32* %a, align 4
11 %conv = zext i32 %0 to i64
12 %1 = load i32, i32* %b, align 4
13 %conv2 = zext i32 %1 to i64
14 %mul = mul nuw i64 %conv2, %conv
15 %shr = lshr i64 %mul, 32
16 %2 = load i32, i32* %a, align 4
17 %conv13 = zext i32 %2 to i64
18 %3 = load i32, i32* undef, align 4
19 %conv15 = zext i32 %3 to i64
20 %mul16 = mul nuw i64 %conv15, %conv13
21 %add18 = add i64 %mul16, %shr
22 %shr20 = lshr i64 %add18, 32
23 %conv21 = trunc i64 %shr20 to i32
24 %4 = load i32, i32* undef, align 4
25 %conv34 = zext i32 %4 to i64
26 %5 = load i32, i32* %b, align 4
27 %conv36 = zext i32 %5 to i64
28 %mul37 = mul nuw i64 %conv36, %conv34
29 %conv38 = and i64 %add18, 4294967295
30 %add39 = add i64 %mul37, %conv38
31 %shr41 = lshr i64 %add39, 32
32 %conv42 = trunc i64 %shr41 to i32
33 %add43 = add i32 %conv42, %conv21
34 %cmp44 = icmp ult i32 %add43, %conv42
35 %c1.1 = zext i1 %cmp44 to i32
36 %add65 = add i32 0, %c1.1
37 %add86 = add i32 %add65, 0
38 %add107 = add i32 %add86, 0
39 %conv124 = zext i32 %add107 to i64
40 %add125 = add i64 0, %conv124
41 %conv145 = and i64 %add125, 4294967295
42 %add146 = add i64 %conv145, 0
43 %conv166 = and i64 %add146, 4294967295
44 %add167 = add i64 %conv166, 0
45 %conv187 = and i64 %add167, 4294967295
46 %add188 = add i64 %conv187, 0
47 %conv189 = trunc i64 %add188 to i32
48 %arrayidx200 = getelementptr inbounds i32, i32* %r, i32 3
49 store i32 %conv189, i32* %arrayidx200, align 4
50 ret void
51 }
52
0 ; RUN: llc -O2 -mtriple arm < %s | FileCheck %s
1
2 ; Function Attrs: norecurse nounwind readnone
3 define i32 @foo(i32 %vreg0, i32 %vreg1, i32 %vreg2, i32 %vreg3, i32 %vreg4) local_unnamed_addr {
4 entry:
5 %conv = zext i32 %vreg2 to i64
6 %conv1 = zext i32 %vreg0 to i64
7 %add2 = add nuw nsw i64 %conv, %conv1
8 %shr = lshr i64 %add2, 32
9 %conv4 = trunc i64 %shr to i32
10 %conv5 = and i64 %add2, 4294967295
11 %add8 = add nuw nsw i64 %conv5, %conv1
12 %shr9 = lshr i64 %add8, 32
13 %conv10 = trunc i64 %shr9 to i32
14 %add11 = add nuw nsw i32 %conv10, %conv4
15 %conv12 = zext i32 %vreg3 to i64
16 %conv14 = zext i32 %vreg1 to i64
17 %add15 = add nuw nsw i64 %conv12, %conv14
18 %shr16 = lshr i64 %add15, 32
19 %conv19 = zext i32 %vreg4 to i64
20 %add20 = add nuw nsw i64 %shr16, %conv19
21 %shr22 = lshr i64 %add20, 32
22 %conv23 = trunc i64 %shr22 to i32
23 %add24 = add nuw nsw i32 %add11, %conv23
24 ret i32 %add24
25
26 ; CHECK: push {r11, lr}
27 ; CHECK-NEXT: adds r2, r2, r0
28 ; CHECK-NEXT: mov r12, #0
29 ; CHECK-NEXT: adc lr, r12, #0
30 ; CHECK-NEXT: adds r0, r2, r0
31 ; CHECK-NEXT: ldr r2, [sp, #8]
32 ; CHECK-NEXT: adc r0, r12, #0
33 ; CHECK-NEXT: adds r1, r3, r1
34 ; The interesting bit is the next instruction which looks
35 ; like is computing a dead r1 but is actually computing a carry
36 ; for the final adc.
37 ; CHECK-NEXT: adcs r1, r2, #0
38 ; CHECK-NEXT: adc r0, r0, lr
39 ; CHECK-NEXT: pop {r11, lr}
40 ; CHECK-NEXT: mov pc, lr
41
42 }