llvm.org GIT mirror llvm / 77226a0
Add a new function attribute 'cold' to functions. Other than recognizing the attribute, the patch does little else. It changes the branch probability analyzer so that edges into blocks postdominated by a cold function are given low weight. Added analysis and code generation tests. Added documentation for the new attribute. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182638 91177308-0d34-0410-b5e6-96231b3b80d8 Diego Novillo 7 years ago
13 changed file(s) with 200 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
811811 This attribute indicates that the inliner should attempt to inline
812812 this function into callers whenever possible, ignoring any active
813813 inlining size threshold for this caller.
814 ``cold``
815 This attribute indicates that this function is rarely called. When
816 computing edge weights, basic blocks post-dominated by a cold
817 function call are also considered to be cold; and, thus, given low
818 weight.
814819 ``nonlazybind``
815820 This attribute suppresses lazy symbol binding for the function. This
816821 may make calls to the function faster, at the cost of extra program
130130 /// \brief Track the set of blocks directly succeeded by a returning block.
131131 SmallPtrSet PostDominatedByUnreachable;
132132
133 /// \brief Track the set of blocks that always lead to a cold call.
134 SmallPtrSet PostDominatedByColdCall;
135
133136 /// \brief Get sum of the block successors' weights.
134137 uint32_t getSumForBlock(const BasicBlock *BB) const;
135138
136139 bool calcUnreachableHeuristics(BasicBlock *BB);
137140 bool calcMetadataWeights(BasicBlock *BB);
141 bool calcColdCallHeuristics(BasicBlock *BB);
138142 bool calcPointerHeuristics(BasicBlock *BB);
139143 bool calcLoopBranchHeuristics(BasicBlock *BB);
140144 bool calcZeroHeuristics(BasicBlock *BB);
6767 ///< 0 means unaligned (different from align(1))
6868 AlwaysInline, ///< inline=always
6969 ByVal, ///< Pass structure by value
70 Cold, ///< Marks function as being in a cold path.
7071 InlineHint, ///< Source said inlining was desirable
7172 InReg, ///< Force argument to be passed in register
7273 MinSize, ///< Function must be optimized for size first
165165 and the path forward agreed upon.
166166 LLVMAddressSafety = 1ULL << 32,
167167 LLVMStackProtectStrongAttribute = 1ULL<<33
168 LLVMCold = 1ULL << 34
168169 */
169170 } LLVMAttribute;
170171
6868 /// easily subsume it.
6969 static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
7070
71 /// \brief Weight for a branch taken going into a cold block.
72 ///
73 /// This is the weight for a branch taken toward a block marked
74 /// cold. A block is marked cold if it's postdominated by a
75 /// block containing a call to a cold function. Cold functions
76 /// are those marked with attribute 'cold'.
77 static const uint32_t CC_TAKEN_WEIGHT = 4;
78
79 /// \brief Weight for a branch not-taken into a cold block.
80 ///
81 /// This is the weight for a branch not taken toward a block marked
82 /// cold.
83 static const uint32_t CC_NONTAKEN_WEIGHT = 64;
84
7185 static const uint32_t PH_TAKEN_WEIGHT = 20;
7286 static const uint32_t PH_NONTAKEN_WEIGHT = 12;
7387
188202 assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
189203 for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
190204 setEdgeWeight(BB, i, Weights[i]);
205
206 return true;
207 }
208
209 /// \brief Calculate edge weights for edges leading to cold blocks.
210 ///
211 /// A cold block is one post-dominated by a block with a call to a
212 /// cold function. Those edges are unlikely to be taken, so we give
213 /// them relatively low weight.
214 ///
215 /// Return true if we could compute the weights for cold edges.
216 /// Return false, otherwise.
217 bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) {
218 TerminatorInst *TI = BB->getTerminator();
219 if (TI->getNumSuccessors() == 0)
220 return false;
221
222 // Determine which successors are post-dominated by a cold block.
223 SmallVector ColdEdges;
224 ColdEdges.reserve(TI->getNumSuccessors());
225 SmallVector NormalEdges;
226 NormalEdges.reserve(TI->getNumSuccessors());
227 for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
228 if (PostDominatedByColdCall.count(*I))
229 ColdEdges.push_back(I.getSuccessorIndex());
230 else
231 NormalEdges.push_back(I.getSuccessorIndex());
232
233 // If all successors are in the set of blocks post-dominated by cold calls,
234 // this block is in the set post-dominated by cold calls.
235 if (ColdEdges.size() == TI->getNumSuccessors())
236 PostDominatedByColdCall.insert(BB);
237 else {
238 // Otherwise, if the block itself contains a cold function, add it to the
239 // set of blocks postdominated by a cold call.
240 assert(!PostDominatedByColdCall.count(BB));
241 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
242 if (CallInst *CI = dyn_cast(I))
243 if (CI->hasFnAttr(Attribute::Cold)) {
244 PostDominatedByColdCall.insert(BB);
245 break;
246 }
247 }
248
249 // Skip probabilities if this block has a single successor.
250 if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
251 return false;
252
253 uint32_t ColdWeight =
254 std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT);
255 for (SmallVector::iterator I = ColdEdges.begin(),
256 E = ColdEdges.end();
257 I != E; ++I)
258 setEdgeWeight(BB, *I, ColdWeight);
259
260 if (NormalEdges.empty())
261 return true;
262 uint32_t NormalWeight = std::max(
263 CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT);
264 for (SmallVector::iterator I = NormalEdges.begin(),
265 E = NormalEdges.end();
266 I != E; ++I)
267 setEdgeWeight(BB, *I, NormalWeight);
191268
192269 return true;
193270 }
396473 LastF = &F; // Store the last function we ran on for printing.
397474 LI = &getAnalysis();
398475 assert(PostDominatedByUnreachable.empty());
476 assert(PostDominatedByColdCall.empty());
399477
400478 // Walk the basic blocks in post-order so that we can build up state about
401479 // the successors of a block iteratively.
407485 continue;
408486 if (calcMetadataWeights(*I))
409487 continue;
488 if (calcColdCallHeuristics(*I))
489 continue;
410490 if (calcLoopBranchHeuristics(*I))
411491 continue;
412492 if (calcPointerHeuristics(*I))
419499 }
420500
421501 PostDominatedByUnreachable.clear();
502 PostDominatedByColdCall.clear();
422503 return false;
423504 }
424505
563563
564564 KEYWORD(alwaysinline);
565565 KEYWORD(byval);
566 KEYWORD(cold);
566567 KEYWORD(inlinehint);
567568 KEYWORD(inreg);
568569 KEYWORD(minsize);
908908 continue;
909909 }
910910 case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break;
911 case lltok::kw_cold: B.addAttribute(Attribute::Cold); break;
911912 case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break;
912913 case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break;
913914 case lltok::kw_naked: B.addAttribute(Attribute::Naked); break;
12211222
12221223 case lltok::kw_alignstack:
12231224 case lltok::kw_alwaysinline:
1225 case lltok::kw_cold:
12241226 case lltok::kw_inlinehint:
12251227 case lltok::kw_minsize:
12261228 case lltok::kw_naked:
9595 kw_alwaysinline,
9696 kw_sanitize_address,
9797 kw_byval,
98 kw_cold,
9899 kw_inlinehint,
99100 kw_inreg,
100101 kw_minsize,
216216 return "uwtable";
217217 if (hasAttribute(Attribute::ZExt))
218218 return "zeroext";
219 if (hasAttribute(Attribute::Cold))
220 return "cold";
219221
220222 // FIXME: These should be output like this:
221223 //
395397 case Attribute::SanitizeMemory: return 1ULL << 37;
396398 case Attribute::NoBuiltin: return 1ULL << 38;
397399 case Attribute::Returned: return 1ULL << 39;
400 case Attribute::Cold: return 1ULL << 40;
398401 }
399402 llvm_unreachable("Unsupported attribute type");
400403 }
691691 I->getKindAsEnum() == Attribute::SanitizeMemory ||
692692 I->getKindAsEnum() == Attribute::MinSize ||
693693 I->getKindAsEnum() == Attribute::NoDuplicate ||
694 I->getKindAsEnum() == Attribute::NoBuiltin) {
694 I->getKindAsEnum() == Attribute::NoBuiltin ||
695 I->getKindAsEnum() == Attribute::Cold) {
695696 if (!isFunction)
696697 CheckFailed("Attribute '" + I->getKindAsString() +
697698 "' only applies to functions!", V);
114114 }
115115
116116 !2 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
117
118 declare void @coldfunc() cold
119
120 define i32 @test5(i32 %a, i32 %b, i1 %flag) {
121 ; CHECK: Printing analysis {{.*}} for function 'test5'
122 entry:
123 br i1 %flag, label %then, label %else
124 ; CHECK: edge entry -> then probability is 4 / 68
125 ; CHECK: edge entry -> else probability is 64 / 68
126
127 then:
128 call void @coldfunc()
129 br label %exit
130 ; CHECK: edge then -> exit probability is 16 / 16 = 100%
131
132 else:
133 br label %exit
134 ; CHECK: edge else -> exit probability is 16 / 16 = 100%
135
136 exit:
137 %result = phi i32 [ %a, %then ], [ %b, %else ]
138 ret i32 %result
139 }
140
141 declare i32 @regular_function(i32 %i)
142
143 define i32 @test_cold_call_sites(i32* %a) {
144 ; Test that edges to blocks post-dominated by cold call sites
145 ; are marked as not expected to be taken.
146 ; TODO(dnovillo) The calls to regular_function should not be merged, but
147 ; they are currently being merged. Convert this into a code generation test
148 ; after that is fixed.
149
150 ; CHECK: Printing analysis {{.*}} for function 'test_cold_call_sites'
151 ; CHECK: edge entry -> then probability is 4 / 68 = 5.88235%
152 ; CHECK: edge entry -> else probability is 64 / 68 = 94.1176% [HOT edge]
153
154 entry:
155 %gep1 = getelementptr i32* %a, i32 1
156 %val1 = load i32* %gep1
157 %cond1 = icmp ugt i32 %val1, 1
158 br i1 %cond1, label %then, label %else
159
160 then:
161 ; This function is not declared cold, but this call site is.
162 %val4 = call i32 @regular_function(i32 %val1) cold
163 br label %exit
164
165 else:
166 %gep2 = getelementptr i32* %a, i32 2
167 %val2 = load i32* %gep2
168 %val3 = call i32 @regular_function(i32 %val2)
169 br label %exit
170
171 exit:
172 %ret = phi i32 [ %val4, %then ], [ %val3, %else ]
173 ret i32 %ret
174 }
10881088 store double %rra.0, double* %arrayidx34, align 8
10891089 br label %for.cond
10901090 }
1091
1092 declare void @cold_function() cold
1093
1094 define i32 @test_cold_calls(i32* %a) {
1095 ; Test that edges to blocks post-dominated by cold calls are
1096 ; marked as not expected to be taken. They should be laid out
1097 ; at the bottom.
1098 ; CHECK: test_cold_calls:
1099 ; CHECK: %entry
1100 ; CHECK: %else
1101 ; CHECK: %exit
1102 ; CHECK: %then
1103
1104 entry:
1105 %gep1 = getelementptr i32* %a, i32 1
1106 %val1 = load i32* %gep1
1107 %cond1 = icmp ugt i32 %val1, 1
1108 br i1 %cond1, label %then, label %else
1109
1110 then:
1111 call void @cold_function()
1112 br label %exit
1113
1114 else:
1115 %gep2 = getelementptr i32* %a, i32 2
1116 %val2 = load i32* %gep2
1117 br label %exit
1118
1119 exit:
1120 %ret = phi i32 [ %val1, %then ], [ %val2, %else ]
1121 ret i32 %ret
1122 }
0 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
1
2 ; CHECK: @fun() #0
3 define void @fun() #0 {
4 ret void
5 }
6
7 ; CHECK: attributes #0 = { cold }
8 attributes #0 = { cold }