llvm.org GIT mirror llvm / 34b6df0
[PPC] Correctly adjust branch probability in PPCReduceCRLogicals In PPCReduceCRLogicals after splitting the original MBB into 2, the 2 impacted branches still use original branch probability. This is unreasonable. Suppose we have following code, and the probability of each successor is 50%. condc = conda || condb br condc, label %target, label %fallthrough It can be transformed to following, br conda, label %target, label %newbb newbb: br condb, label %target, label %fallthrough Since each branch has a probability of 50% to each successor, the total probability to %fallthrough is 25% now, and the total probability to %target is 75%. This actually changed the original profiling data. A more reasonable probability can be set to 70% to the false side for each branch instruction, so the total probability to %fallthrough is close to 50%. This patch assumes the branch target with two incoming edges have same edge frequency and computes new probability fore each target, and keep the total probability to original targets unchanged. Differential Revision: https://reviews.llvm.org/D62430 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362237 91177308-0d34-0410-b5e6-96231b3b80d8 Guozhi Wei 4 months ago
4 changed file(s) with 138 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
117117 return *this;
118118 }
119119
120 BranchProbability &operator/=(BranchProbability RHS) {
121 assert(N != UnknownN && RHS.N != UnknownN &&
122 "Unknown probability cannot participate in arithmetics.");
123 N = (static_cast(N) * D + RHS.N / 2) / RHS.N;
124 return *this;
125 }
126
120127 BranchProbability &operator/=(uint32_t RHS) {
121128 assert(N != UnknownN &&
122129 "Unknown probability cannot participate in arithmetics.");
146153 BranchProbability operator*(uint32_t RHS) const {
147154 BranchProbability Prob(*this);
148155 Prob *= RHS;
156 return Prob;
157 }
158
159 BranchProbability operator/(BranchProbability RHS) const {
160 BranchProbability Prob(*this);
161 Prob /= RHS;
149162 return Prob;
150163 }
151164
165165 : *ThisMBB->succ_begin();
166166 MachineBasicBlock *NewBRTarget =
167167 BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
168 BranchProbability ProbToNewTarget =
169 !BSI.MBPI ? BranchProbability::getUnknown()
170 : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget);
168
169 // It's impossible to know the precise branch probability after the split.
170 // But it still needs to be reasonable, the whole probability to original
171 // targets should not be changed.
172 // After split NewBRTarget will get two incoming edges. Assume P0 is the
173 // original branch probability to NewBRTarget, P1 and P2 are new branch
174 // probabilies to NewBRTarget after split. If the two edge frequencies are
175 // same, then
176 // F * P1 = F * P0 / 2 ==> P1 = P0 / 2
177 // F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1)
178 BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br.
179 BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br.
180 ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
181 ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
182 if (BSI.MBPI) {
183 if (BSI.BranchToFallThrough) {
184 ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
185 ProbFallThrough = ProbToNewTarget.getCompl();
186 ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
187 ProbOrigTarget = ProbOrigFallThrough.getCompl();
188 } else {
189 ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
190 ProbFallThrough = ProbToNewTarget.getCompl();
191 ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
192 ProbOrigFallThrough = ProbOrigTarget.getCompl();
193 }
194 }
171195
172196 // Create a new basic block.
173197 MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
179203 // Move everything after SplitBefore into the new block.
180204 NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
181205 NewMBB->transferSuccessors(ThisMBB);
182
183 // Add the two successors to ThisMBB. The probabilities come from the
184 // existing blocks if available.
206 if (!ProbOrigTarget.isUnknown()) {
207 auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget);
208 NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
209 MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough);
210 NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
211 }
212
213 // Add the two successors to ThisMBB.
185214 ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
186 ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl());
215 ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
187216
188217 // Add the branches to ThisMBB.
189218 BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
0 ; RUN: llc -O2 -ppc-reduce-cr-logicals -print-machine-bfi -o - %s 2>&1 | FileCheck %s
1 target datalayout = "e-m:e-i64:64-n32:64"
2 target triple = "powerpc64le-grtev4-linux-gnu"
3
4 ; First block frequency info
5 ;CHECK: block-frequency-info: loop_test
6 ;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12
7 ;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34
8 ;CHECK-NEXT: - BB2[test1]: float = 1.6667, int = 21
9 ;CHECK-NEXT: - BB3[optional1]: float = 0.625, int = 8
10
11 ;CHECK: block-frequency-info: loop_test
12 ;CHECK: block-frequency-info: loop_test
13 ;CHECK: block-frequency-info: loop_test
14
15 ; Last block frequency info
16 ;CHECK: block-frequency-info: loop_test
17 ;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12
18 ;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34
19 ;CHECK-NEXT: - BB2[for.check]: float = 2.1667, int = 27
20 ;CHECK-NEXT: - BB3[test1]: float = 1.6667, int = 21
21 ;CHECK-NEXT: - BB4[optional1]: float = 0.625, int = 8
22
23
24 define void @loop_test(i32* %tags, i32 %count) {
25 entry:
26 br label %for.check
27 for.check:
28 %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
29 %done.count = icmp ugt i32 %count.loop, 0
30 %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count
31 %tag = load i32, i32* %tag_ptr
32 %done.tag = icmp eq i32 %tag, 0
33 %done = and i1 %done.count, %done.tag
34 br i1 %done, label %test1, label %exit, !prof !1
35 test1:
36 %tagbit1 = and i32 %tag, 1
37 %tagbit1eq0 = icmp eq i32 %tagbit1, 0
38 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
39 optional1:
40 call void @a()
41 call void @a()
42 call void @a()
43 call void @a()
44 br label %test2
45 test2:
46 %tagbit2 = and i32 %tag, 2
47 %tagbit2eq0 = icmp eq i32 %tagbit2, 0
48 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
49 optional2:
50 call void @b()
51 call void @b()
52 call void @b()
53 call void @b()
54 br label %test3
55 test3:
56 %tagbit3 = and i32 %tag, 4
57 %tagbit3eq0 = icmp eq i32 %tagbit3, 0
58 br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
59 optional3:
60 call void @c()
61 call void @c()
62 call void @c()
63 call void @c()
64 br label %test4
65 test4:
66 %tagbit4 = and i32 %tag, 8
67 %tagbit4eq0 = icmp eq i32 %tagbit4, 0
68 br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1
69 optional4:
70 call void @d()
71 call void @d()
72 call void @d()
73 call void @d()
74 br label %for.latch
75 for.latch:
76 %count.sub = sub i32 %count.loop, 1
77 br label %for.check
78 exit:
79 ret void
80 }
81
82 declare void @a()
83 declare void @b()
84 declare void @c()
85 declare void @d()
86
87 !1 = !{!"branch_weights", i32 5, i32 3}
None ; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs < %s | FileCheck %s
0 ; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs -tail-dup-placement=false < %s | FileCheck %s
11 ; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs \
22 ; RUN: -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s
33 target datalayout = "E-m:e-i64:64-n32:64"