llvm.org GIT mirror llvm / 759d988
Scaling up values in ARMBaseInstrInfo::isProfitableToIfCvt() before they are scaled by a probability to avoid precision issue. In ARMBaseInstrInfo::isProfitableToIfCvt(), there is a simple cost model in which the number of cycles is scaled by a probability to estimate the cost. However, when the number of cycles is small (which is usually the case), there is a precision issue after the computation. To avoid this issue, this patch scales those cycles by 1024 (chosen to make the multiplication a litter faster) before they are scaled by the probability. Other variables are also scaled up for the final comparison. Differential Revision: http://reviews.llvm.org/D12742 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248018 91177308-0d34-0410-b5e6-96231b3b80d8 Cong Hou 4 years ago
6 changed file(s) with 30 addition(s) and 24 deletion(s). Raw diff Collapse all Expand all
16691669 }
16701670
16711671 // Attempt to estimate the relative costs of predication versus branching.
1672 unsigned UnpredCost = Probability.scale(NumCycles);
1673 UnpredCost += 1; // The branch itself
1674 UnpredCost += Subtarget.getMispredictionPenalty() / 10;
1675
1676 return (NumCycles + ExtraPredCycles) <= UnpredCost;
1672 // Here we scale up each component of UnpredCost to avoid precision issue when
1673 // scaling NumCycles by Probability.
1674 const unsigned ScalingUpFactor = 1024;
1675 unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor);
1676 UnpredCost += ScalingUpFactor; // The branch itself
1677 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
1678
1679 return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost;
16771680 }
16781681
16791682 bool ARMBaseInstrInfo::
16861689 return false;
16871690
16881691 // Attempt to estimate the relative costs of predication versus branching.
1689 unsigned TUnpredCost = Probability.scale(TCycles);
1690 unsigned FUnpredCost = Probability.getCompl().scale(FCycles);
1692 // Here we scale up each component of UnpredCost to avoid precision issue when
1693 // scaling TCycles/FCycles by Probability.
1694 const unsigned ScalingUpFactor = 1024;
1695 unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
1696 unsigned FUnpredCost =
1697 Probability.getCompl().scale(FCycles * ScalingUpFactor);
16911698 unsigned UnpredCost = TUnpredCost + FUnpredCost;
1692 UnpredCost += 1; // The branch itself
1693 UnpredCost += Subtarget.getMispredictionPenalty() / 10;
1694
1695 return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
1699 UnpredCost += 1 * ScalingUpFactor; // The branch itself
1700 UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
1701
1702 return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
16961703 }
16971704
16981705 bool
0 ; REQUIRES: asserts
1 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -stats 2>&1 | not grep "Number of pipeline stalls"
1 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -disable-ifcvt-diamond -stats 2>&1 | FileCheck %s
22 ; Evaluate the two vld1.8 instructions in separate MBB's,
33 ; instead of stalling on one and conditionally overwriting its result.
4 ;
5 ; Update: After if-conversion the two vld1.8 instructions are in the same MBB
6 ; again. So we disable this if-conversion to eliminate its influence to this
7 ; test.
48
9 ; CHECK-NOT: Number of pipeline stalls
510 define <16 x i8> @multiselect(i32 %avail, i8* %foo, i8* %bar) {
611 entry:
712 %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %foo, i32 1)
0 ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
11
2 ; Do not if-convert when branches go to the different loops.
32 ; CHECK-LABEL: t:
4 ; CHECK-NOT: subgt
5 ; CHECK-NOT: suble
6 ; Don't use
3 ; CHECK: subgt
4 ; CHECK: suble
75 define i32 @t(i32 %a, i32 %b) {
86 entry:
97 %tmp1434 = icmp eq i32 %a, %b ; [#uses=1]
7676 ; CHECK: blx __Znwm
7777 ; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge
7878 ; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]]
79 ; CHECK: {{.*}}@ %do.body.i.i.i
7980 ; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
80 ; CHECK: {{.*}}@ %do.body.i.i.i
8181 ; CHECK: cbz [[R0]]
8282
8383 %"class.std::__1::basic_string" = type { %"class.std::__1::__compressed_pair" }
5858 br i1 %cmp.1, label %for.body.2, label %for.end
5959
6060 for.body.2: ; preds = %for.body.1
61 ; CHECK: %for.body.2
62 ; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
63 ; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
6461 %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
6562 %4 = load i8, i8* %arrayidx.2, align 1
6663 %conv6.2 = zext i8 %4 to i32
2323
2424 define i32 @t2(i32 %a, i32 %b) nounwind {
2525 entry:
26 ; Do not if-convert when branches go to the different loops.
2726 ; CHECK-LABEL: t2:
28 ; CHECK-NOT: ite gt
29 ; CHECK-NOT: subgt
30 ; CHECK-NOT: suble
27 ; CHECK: ite gt
28 ; CHECK: subgt
29 ; CHECK: suble
3130 %tmp1434 = icmp eq i32 %a, %b ; [#uses=1]
3231 br i1 %tmp1434, label %bb17, label %bb.outer
3332