llvm.org GIT mirror llvm / ed2657e
[XRay] Minimal tool to convert xray traces to Chrome's Trace Event Format. Minimal tool to convert xray traces to Chrome's Trace Event Format. Summary: Make use of Chrome Trace Event format's Duration events and stack frame dict to produce Json files that chrome://tracing can visualize from xray function call traces. Trace Event format is more robust and has several features like argument logging, function categorization, multi process traces, etc. that we can add as needed. Duration events cover an important base case. Part of this change is rearranging the code so that the TrieNode data structure can be used from multiple tools and can carry parameterized baggage on the nodes. I put the actual behavior changes in llvm-xray convert exclusively. Exploring the trace of instrumented llc was pretty nifty if overwhelming. I can envision this being very useful for analyzing contention scenarios or tuning parameters like batch sizes in a producer consumer queue. For more targeted traces likemthis, let's talk about how we want to approach trace pruning. Reviewers: dberris, pelikan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D39362 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317531 91177308-0d34-0410-b5e6-96231b3b80d8 Keith Wyss 1 year, 11 months ago
4 changed file(s) with 375 addition(s) and 113 deletion(s). Raw diff Collapse all Expand all
0 //===- trie-node.h - XRay Call Stack Data Structure -----------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides a data structure and routines for working with call stacks
10 // of instrumented functions.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H
15 #define LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H
16
17 #include
18 #include
19
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SmallVector.h"
23
24 /// A type to represent a trie of invocations. It is useful to construct a
25 /// graph of these nodes from reading an XRay trace, such that each function
26 /// call can be placed in a larger context.
27 ///
28 /// The template parameter allows users of the template to attach their own
29 /// data elements to each node in the invocation graph.
30 template struct TrieNode {
31 /// The function ID.
32 int32_t FuncId;
33
34 /// The caller of this function.
35 TrieNode *Parent;
36
37 /// The callees from this function.
38 llvm::SmallVector *, 4> Callees;
39
40 /// Additional parameterized data on each node.
41 AssociatedData ExtraData;
42 };
43
44 /// Merges together two TrieNodes with like function ids, aggregating their
45 /// callee lists and durations. The caller must provide storage where new merged
46 /// nodes can be allocated in the form of a linked list.
47 template
48 TrieNode *
49 mergeTrieNodes(const TrieNode &Left, const TrieNode &Right,
50 /*Non-deduced pointer type for nullptr compatibility*/
51 typename std::remove_reference *>::type NewParent,
52 std::forward_list> &NodeStore,
53 Callable &&MergeCallable) {
54 llvm::function_ref MergeFn(
55 std::forward(MergeCallable));
56 assert(Left.FuncId == Right.FuncId);
57 NodeStore.push_front(TrieNode{
58 Left.FuncId, NewParent, {}, MergeFn(Left.ExtraData, Right.ExtraData)});
59 auto I = NodeStore.begin();
60 auto *Node = &*I;
61
62 // Build a map of callees from the left side.
63 llvm::DenseMap *> LeftCalleesByFuncId;
64 for (auto *Callee : Left.Callees) {
65 LeftCalleesByFuncId[Callee->FuncId] = Callee;
66 }
67
68 // Iterate through the right side, either merging with the map values or
69 // directly adding to the Callees vector. The iteration also removes any
70 // merged values from the left side map.
71 // TODO: Unroll into iterative and explicit stack for efficiency.
72 for (auto *Callee : Right.Callees) {
73 auto iter = LeftCalleesByFuncId.find(Callee->FuncId);
74 if (iter != LeftCalleesByFuncId.end()) {
75 Node->Callees.push_back(
76 mergeTrieNodes(*(iter->second), *Callee, Node, NodeStore, MergeFn));
77 LeftCalleesByFuncId.erase(iter);
78 } else {
79 Node->Callees.push_back(Callee);
80 }
81 }
82
83 // Add any callees that weren't found in the right side.
84 for (auto MapPairIter : LeftCalleesByFuncId) {
85 Node->Callees.push_back(MapPairIter.second);
86 }
87
88 return Node;
89 }
90
91 #endif // LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H
1111 //===----------------------------------------------------------------------===//
1212 #include "xray-converter.h"
1313
14 #include "trie-node.h"
1415 #include "xray-registry.h"
1516 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
1617 #include "llvm/Support/EndianStream.h"
1718 #include "llvm/Support/FileSystem.h"
19 #include "llvm/Support/FormatVariadic.h"
1820 #include "llvm/Support/ScopedPrinter.h"
1921 #include "llvm/Support/YAMLTraits.h"
2022 #include "llvm/Support/raw_ostream.h"
3133 static cl::opt ConvertInput(cl::Positional,
3234 cl::desc(""),
3335 cl::Required, cl::sub(Convert));
34 enum class ConvertFormats { BINARY, YAML };
36 enum class ConvertFormats { BINARY, YAML, CHROME_TRACE_EVENT };
3537 static cl::opt ConvertOutputFormat(
3638 "output-format", cl::desc("output format"),
3739 cl::values(clEnumValN(ConvertFormats::BINARY, "raw", "output in binary"),
38 clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml")),
40 clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml"),
41 clEnumValN(ConvertFormats::CHROME_TRACE_EVENT, "trace_event",
42 "Output in chrome's trace event format. "
43 "May be visualized with the Catapult trace viewer.")),
3944 cl::sub(Convert));
4045 static cl::alias ConvertOutputFormat2("f", cl::aliasopt(ConvertOutputFormat),
4146 cl::desc("Alias for -output-format"),
141146 }
142147 }
143148
149 namespace {
150
151 // A structure that allows building a dictionary of stack ids for the Chrome
152 // trace event format.
153 struct StackIdData {
154 // Each Stack of function calls has a unique ID.
155 unsigned id;
156
157 // Bookkeeping so that IDs can be maintained uniquely across threads.
158 // Traversal keeps sibling pointers to other threads stacks. This is helpful
159 // to determine when a thread encounters a new stack and should assign a new
160 // unique ID.
161 SmallVector *, 4> siblings;
162 };
163
164 using StackTrieNode = TrieNode;
165
166 // A helper function to find the sibling nodes for an encountered function in a
167 // thread of execution. Relies on the invariant that each time a new node is
168 // traversed in a thread, sibling bidirectional pointers are maintained.
169 SmallVector
170 findSiblings(StackTrieNode *parent, int32_t FnId, uint32_t TId,
171 const DenseMap>
172 &StackRootsByThreadId) {
173
174 SmallVector Siblings{};
175
176 if (parent == nullptr) {
177 for (auto map_iter : StackRootsByThreadId) {
178 // Only look for siblings in other threads.
179 if (map_iter.first != TId)
180 for (auto node_iter : map_iter.second) {
181 if (node_iter->FuncId == FnId)
182 Siblings.push_back(node_iter);
183 }
184 }
185 return Siblings;
186 }
187
188 for (auto *ParentSibling : parent->ExtraData.siblings)
189 for (auto node_iter : ParentSibling->Callees)
190 if (node_iter->FuncId == FnId)
191 Siblings.push_back(node_iter);
192
193 return Siblings;
194 }
195
196 // Given a function being invoked in a thread with id TId, finds and returns the
197 // StackTrie representing the function call stack. If no node exists, creates
198 // the node. Assigns unique IDs to stacks newly encountered among all threads
199 // and keeps sibling links up to when creating new nodes.
200 StackTrieNode *findOrCreateStackNode(
201 StackTrieNode *Parent, int32_t FuncId, uint32_t TId,
202 DenseMap> &StackRootsByThreadId,
203 DenseMap &StacksByStackId, unsigned *id_counter,
204 std::forward_list &NodeStore) {
205 SmallVector &ParentCallees =
206 Parent == nullptr ? StackRootsByThreadId[TId] : Parent->Callees;
207 auto match = find_if(ParentCallees, [FuncId](StackTrieNode *ParentCallee) {
208 return FuncId == ParentCallee->FuncId;
209 });
210 if (match != ParentCallees.end())
211 return *match;
212
213 SmallVector siblings =
214 findSiblings(Parent, FuncId, TId, StackRootsByThreadId);
215 if (siblings.empty()) {
216 NodeStore.push_front({FuncId, Parent, {}, {(*id_counter)++, {}}});
217 StackTrieNode *CurrentStack = &NodeStore.front();
218 StacksByStackId[*id_counter - 1] = CurrentStack;
219 ParentCallees.push_back(CurrentStack);
220 return CurrentStack;
221 }
222 unsigned stack_id = siblings[0]->ExtraData.id;
223 NodeStore.push_front({FuncId, Parent, {}, {stack_id, std::move(siblings)}});
224 StackTrieNode *CurrentStack = &NodeStore.front();
225 for (auto *sibling : CurrentStack->ExtraData.siblings)
226 sibling->ExtraData.siblings.push_back(CurrentStack);
227 ParentCallees.push_back(CurrentStack);
228 return CurrentStack;
229 }
230
231 void writeTraceViewerRecord(raw_ostream &OS, int32_t FuncId, uint32_t TId,
232 bool Symbolize,
233 const FuncIdConversionHelper &FuncIdHelper,
234 double EventTimestampUs,
235 const StackTrieNode &StackCursor,
236 StringRef FunctionPhenotype) {
237 OS << " ";
238 OS << llvm::formatv(
239 R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )"
240 R"("ts" : "{3:f3}", "sf" : "{4}" })",
241 (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
242 : llvm::to_string(FuncId)),
243 FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id);
244 }
245
246 } // namespace
247
248 void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
249 raw_ostream &OS) {
250 const auto &FH = Records.getFileHeader();
251 auto CycleFreq = FH.CycleFrequency;
252
253 unsigned id_counter = 0;
254
255 OS << "{\n \"traceEvents\": [";
256 DenseMap StackCursorByThreadId{};
257 DenseMap> StackRootsByThreadId{};
258 DenseMap StacksByStackId{};
259 std::forward_list NodeStore{};
260 int loop_count = 0;
261 for (const auto &R : Records) {
262 if (loop_count++ == 0)
263 OS << "\n";
264 else
265 OS << ",\n";
266
267 // Chrome trace event format always wants data in micros.
268 // CyclesPerMicro = CycleHertz / 10^6
269 // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp
270 // Could lose some precision here by converting the TSC to a double to
271 // multiply by the period in micros. 52 bit mantissa is a good start though.
272 // TODO: Make feature request to Chrome Trace viewer to accept ticks and a
273 // frequency or do some more involved calculation to avoid dangers of
274 // conversion.
275 double EventTimestampUs = double(1000000) / CycleFreq * double(R.TSC);
276 StackTrieNode *&StackCursor = StackCursorByThreadId[R.TId];
277 switch (R.Type) {
278 case RecordTypes::ENTER:
279 case RecordTypes::ENTER_ARG:
280 StackCursor = findOrCreateStackNode(StackCursor, R.FuncId, R.TId,
281 StackRootsByThreadId, StacksByStackId,
282 &id_counter, NodeStore);
283 // Each record is represented as a json dictionary with function name,
284 // type of B for begin or E for end, thread id, process id (faked),
285 // timestamp in microseconds, and a stack frame id. The ids are logged
286 // in an id dictionary after the events.
287 writeTraceViewerRecord(OS, R.FuncId, R.TId, Symbolize, FuncIdHelper,
288 EventTimestampUs, *StackCursor, "B");
289 break;
290 case RecordTypes::EXIT:
291 case RecordTypes::TAIL_EXIT:
292 // No entries to record end for.
293 if (StackCursor == nullptr)
294 break;
295 // Should we emit an END record anyway or account this condition?
296 // (And/Or in loop termination below)
297 StackTrieNode *PreviousCursor = nullptr;
298 do {
299 writeTraceViewerRecord(OS, StackCursor->FuncId, R.TId, Symbolize,
300 FuncIdHelper, EventTimestampUs, *StackCursor,
301 "E");
302 PreviousCursor = StackCursor;
303 StackCursor = StackCursor->Parent;
304 } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr);
305 break;
306 }
307 }
308 OS << "\n ],\n"; // Close the Trace Events array.
309 OS << " "
310 << "\"displayTimeUnit\": \"ns\",\n";
311
312 // The stackFrames dictionary substantially reduces size of the output file by
313 // avoiding repeating the entire call stack of function names for each entry.
314 OS << R"( "stackFrames": {)";
315 int stack_frame_count = 0;
316 for (auto map_iter : StacksByStackId) {
317 if (stack_frame_count++ == 0)
318 OS << "\n";
319 else
320 OS << ",\n";
321 OS << " ";
322 OS << llvm::formatv(
323 R"("{0}" : { "name" : "{1}")", map_iter.first,
324 (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId)
325 : llvm::to_string(map_iter.second->FuncId)));
326 if (map_iter.second->Parent != nullptr)
327 OS << llvm::formatv(R"(, "parent": "{0}")",
328 map_iter.second->Parent->ExtraData.id);
329 OS << " }";
330 }
331 OS << "\n }\n"; // Close the stack frames map.
332 OS << "}\n"; // Close the JSON entry.
333 }
334
144335 namespace llvm {
145336 namespace xray {
146337
190381 case ConvertFormats::BINARY:
191382 TC.exportAsRAWv1(T, OS);
192383 break;
384 case ConvertFormats::CHROME_TRACE_EVENT:
385 TC.exportAsChromeTraceEventFormat(T, OS);
386 break;
193387 }
194388 return Error::success();
195389 });
1414 #define LLVM_TOOLS_LLVM_XRAY_XRAY_CONVERTER_H
1515
1616 #include "func-id-helper.h"
17 #include "llvm/XRay/Trace.h"
1718 #include "llvm/XRay/XRayRecord.h"
18 #include "llvm/XRay/Trace.h"
1919
2020 namespace llvm {
2121 namespace xray {
3030
3131 void exportAsYAML(const Trace &Records, raw_ostream &OS);
3232 void exportAsRAWv1(const Trace &Records, raw_ostream &OS);
33
34 /// For this conversion, the Function records within each thread are expected
35 /// to be in sorted TSC order. The trace event format encodes stack traces, so
36 /// the linear history is essential for correct output.
37 void exportAsChromeTraceEventFormat(const Trace &Records, raw_ostream &OS);
3338 };
3439
3540 } // namespace xray
1818 #include
1919
2020 #include "func-id-helper.h"
21 #include "trie-node.h"
2122 #include "xray-registry.h"
2223 #include "llvm/ADT/StringExtras.h"
2324 #include "llvm/Support/CommandLine.h"
254255 /// maintain an index of unique functions, and provide a means of iterating
255256 /// through all the instrumented call stacks which we know about.
256257
257 struct TrieNode {
258 int32_t FuncId;
259 TrieNode *Parent;
260 SmallVector Callees;
261 // Separate durations depending on whether the node is the deepest node in the
262 // stack.
263 SmallVector TerminalDurations;
264 SmallVector IntermediateDurations;
258 struct StackDuration {
259 llvm::SmallVector TerminalDurations;
260 llvm::SmallVector IntermediateDurations;
265261 };
266262
267 /// Merges together two TrieNodes with like function ids, aggregating their
268 /// callee lists and durations. The caller must provide storage where new merged
269 /// nodes can be allocated in the form of a linked list.
270 TrieNode *mergeTrieNodes(const TrieNode &Left, const TrieNode &Right,
271 TrieNode *NewParent,
272 std::forward_list &NodeStore) {
273 assert(Left.FuncId == Right.FuncId);
274 NodeStore.push_front(TrieNode{Left.FuncId, NewParent, {}, {}, {}});
275 auto I = NodeStore.begin();
276 auto *Node = &*I;
277
278 // Build a map of callees from the left side.
279 DenseMap LeftCalleesByFuncId;
280 for (auto *Callee : Left.Callees) {
281 LeftCalleesByFuncId[Callee->FuncId] = Callee;
282 }
283
284 // Iterate through the right side, either merging with the map values or
285 // directly adding to the Callees vector. The iteration also removes any
286 // merged values from the left side map.
287 for (auto *Callee : Right.Callees) {
288 auto iter = LeftCalleesByFuncId.find(Callee->FuncId);
289 if (iter != LeftCalleesByFuncId.end()) {
290 Node->Callees.push_back(
291 mergeTrieNodes(*(iter->second), *Callee, Node, NodeStore));
292 LeftCalleesByFuncId.erase(iter);
293 } else {
294 Node->Callees.push_back(Callee);
295 }
296 }
297
298 // Add any callees that weren't found in the right side.
299 for (auto MapPairIter : LeftCalleesByFuncId) {
300 Node->Callees.push_back(MapPairIter.second);
301 }
302
263 StackDuration mergeStackDuration(const StackDuration &Left,
264 const StackDuration &Right) {
265 StackDuration Data{};
266 Data.TerminalDurations.reserve(Left.TerminalDurations.size() +
267 Right.TerminalDurations.size());
268 Data.IntermediateDurations.reserve(Left.IntermediateDurations.size() +
269 Right.IntermediateDurations.size());
303270 // Aggregate the durations.
304 for (auto duration : Left.TerminalDurations) {
305 Node->TerminalDurations.push_back(duration);
306 }
307 for (auto duration : Right.TerminalDurations) {
308 Node->TerminalDurations.push_back(duration);
309 }
310 for (auto duration : Left.IntermediateDurations) {
311 Node->IntermediateDurations.push_back(duration);
312 }
313 for (auto duration : Right.IntermediateDurations) {
314 Node->IntermediateDurations.push_back(duration);
315 }
316
317 return Node;
271 for (auto duration : Left.TerminalDurations)
272 Data.TerminalDurations.push_back(duration);
273 for (auto duration : Right.TerminalDurations)
274 Data.TerminalDurations.push_back(duration);
275
276 for (auto duration : Left.IntermediateDurations)
277 Data.IntermediateDurations.push_back(duration);
278 for (auto duration : Right.IntermediateDurations)
279 Data.IntermediateDurations.push_back(duration);
280 return Data;
318281 }
319282
283 using StackTrieNode = TrieNode;
284
320285 template
321 std::size_t GetValueForStack(const TrieNode *Node);
286 std::size_t GetValueForStack(const StackTrieNode *Node);
322287
323288 // When computing total time spent in a stack, we're adding the timings from
324289 // its callees and the timings from when it was a leaf.
325290 template <>
326291 std::size_t
327 GetValueForStack(const TrieNode *Node) {
328 auto TopSum = std::accumulate(Node->TerminalDurations.begin(),
329 Node->TerminalDurations.end(), 0uLL);
330 return std::accumulate(Node->IntermediateDurations.begin(),
331 Node->IntermediateDurations.end(), TopSum);
292 GetValueForStack(const StackTrieNode *Node) {
293 auto TopSum = std::accumulate(Node->ExtraData.TerminalDurations.begin(),
294 Node->ExtraData.TerminalDurations.end(), 0uLL);
295 return std::accumulate(Node->ExtraData.IntermediateDurations.begin(),
296 Node->ExtraData.IntermediateDurations.end(), TopSum);
332297 }
333298
334299 // Calculates how many times a function was invoked.
335300 // TODO: Hook up option to produce stacks
336301 template <>
337302 std::size_t
338 GetValueForStack(const TrieNode *Node) {
339 return Node->TerminalDurations.size() + Node->IntermediateDurations.size();
303 GetValueForStack(const StackTrieNode *Node) {
304 return Node->ExtraData.TerminalDurations.size() +
305 Node->ExtraData.IntermediateDurations.size();
340306 }
341307
342308 // Make sure there are implementations for each enum value.
343309 template struct DependentFalseType : std::false_type {};
344310
345311 template
346 std::size_t GetValueForStack(const TrieNode *Node) {
312 std::size_t GetValueForStack(const StackTrieNode *Node) {
347313 static_assert(DependentFalseType::value,
348314 "No implementation found for aggregation type provided.");
349315 return 0;
352318 class StackTrie {
353319 // Avoid the magic number of 4 propagated through the code with an alias.
354320 // We use this SmallVector to track the root nodes in a call graph.
355 using RootVector = SmallVector<TrieNode *, 4>;
321 using RootVector = SmallVector<StackTrieNode *, 4>;
356322
357323 // We maintain pointers to the roots of the tries we see.
358324 DenseMap Roots;
359325
360326 // We make sure all the nodes are accounted for in this list.
361 std::forward_list<TrieNode> NodeStore;
327 std::forward_list<StackTrieNode> NodeStore;
362328
363329 // A map of thread ids to pairs call stack trie nodes and their start times.
364 DenseMapTrieNode *, uint64_t>, 8>>
330 DenseMapStackTrieNode *, uint64_t>, 8>>
365331 ThreadStackMap;
366332
367 TrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId,
368 TrieNode *Parent) {
369 NodeStore.push_front(TrieNode{FuncId, Parent, {}, {}, {}});
333 StackTrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId,
334 StackTrieNode *Parent) {
335 NodeStore.push_front(StackTrieNode{FuncId, Parent, {}, {{}, {}}});
370336 auto I = NodeStore.begin();
371337 auto *Node = &*I;
372338 if (!Parent)
374340 return Node;
375341 }
376342
377 TrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) {
343 StackTrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) {
378344 const auto &RootsByThread = Roots[ThreadId];
379345 auto I = find_if(RootsByThread,
380 [&](TrieNode *N) { return N->FuncId == FuncId; });
346 [&](StackTrieNode *N) { return N->FuncId == FuncId; });
381347 return (I == RootsByThread.end()) ? nullptr : *I;
382348 }
383349
415381
416382 auto &Top = TS.back();
417383 auto I = find_if(Top.first->Callees,
418 [&](TrieNode *N) { return N->FuncId == R.FuncId; });
384 [&](StackTrieNode *N) { return N->FuncId == R.FuncId; });
419385 if (I == Top.first->Callees.end()) {
420386 // We didn't find the callee in the stack trie, so we're going to
421387 // add to the stack then set up the pointers properly.
446412 return AccountRecordStatus::ENTRY_NOT_FOUND;
447413 }
448414
449 auto FunctionEntryMatch =
450 find_if(reverse(TS), [&](const std::pair &E) {
415 auto FunctionEntryMatch = find_if(
416 reverse(TS), [&](const std::pair &E) {
451417 return E.first->FuncId == R.FuncId;
452418 });
453419 auto status = AccountRecordStatus::OK;
460426 }
461427 auto I = FunctionEntryMatch.base();
462428 for (auto &E : make_range(I, TS.end() - 1))
463 E.first->IntermediateDurations.push_back(std::max(E.second, R.TSC) -
464 std::min(E.second, R.TSC));
429 E.first->ExtraData.IntermediateDurations.push_back(
430 std::max(E.second, R.TSC) - std::min(E.second, R.TSC));
465431 auto &Deepest = TS.back();
466432 if (wasLastRecordExit)
467 Deepest.first->IntermediateDurations.push_back(
433 Deepest.first->ExtraData.IntermediateDurations.push_back(
468434 std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC));
469435 else
470 Deepest.first->TerminalDurations.push_back(
436 Deepest.first->ExtraData.TerminalDurations.push_back(
471437 std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC));
472438 TS.erase(I, TS.end());
473439 return status;
478444
479445 bool isEmpty() const { return Roots.empty(); }
480446
481 void printStack(raw_ostream &OS, const TrieNode *Top,
447 void printStack(raw_ostream &OS, const StackTrieNode *Top,
482448 FuncIdConversionHelper &FN) {
483449 // Traverse the pointers up to the parent, noting the sums, then print
484450 // in reverse order (callers at top, callees down bottom).
485 SmallVectorTrieNode *, 8> CurrentStack;
451 SmallVectorStackTrieNode *, 8> CurrentStack;
486452 for (auto *F = Top; F != nullptr; F = F->Parent)
487453 CurrentStack.push_back(F);
488454 int Level = 0;
490456 "count", "sum");
491457 for (auto *F :
492458 reverse(make_range(CurrentStack.begin() + 1, CurrentStack.end()))) {
493 auto Sum = std::accumulate(F->IntermediateDurations.begin(),
494 F->IntermediateDurations.end(), 0LL);
459 auto Sum = std::accumulate(F->ExtraData.IntermediateDurations.begin(),
460 F->ExtraData.IntermediateDurations.end(), 0LL);
495461 auto FuncId = FN.SymbolOrNumber(F->FuncId);
496462 OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++,
497463 FuncId.size() > 60 ? FuncId.substr(0, 57) + "..." : FuncId,
498 F->IntermediateDurations.size(), Sum);
464 F->ExtraData.IntermediateDurations.size(), Sum);
499465 }
500466 auto *Leaf = *CurrentStack.begin();
501 auto LeafSum = std::accumulate(Leaf->TerminalDurations.begin(),
502 Leaf->TerminalDurations.end(), 0LL);
467 auto LeafSum =
468 std::accumulate(Leaf->ExtraData.TerminalDurations.begin(),
469 Leaf->ExtraData.TerminalDurations.end(), 0LL);
503470 auto LeafFuncId = FN.SymbolOrNumber(Leaf->FuncId);
504471 OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++,
505472 LeafFuncId.size() > 60 ? LeafFuncId.substr(0, 57) + "..."
506473 : LeafFuncId,
507 Leaf->TerminalDurations.size(), LeafSum);
474 Leaf->ExtraData.TerminalDurations.size(), LeafSum);
508475 OS << "\n";
509476 }
510477
551518
552519 /// Creates a merged list of Tries for unique stacks that disregards their
553520 /// thread IDs.
554 RootVector mergeAcrossThreads(std::forward_list<TrieNode> &NodeStore) {
521 RootVector mergeAcrossThreads(std::forward_list<StackTrieNode> &NodeStore) {
555522 RootVector MergedByThreadRoots;
556523 for (auto MapIter : Roots) {
557524 const auto &RootNodeVector = MapIter.second;
558525 for (auto *Node : RootNodeVector) {
559526 auto MaybeFoundIter =
560 find_if(MergedByThreadRoots, [Node](TrieNode *elem) {
527 find_if(MergedByThreadRoots, [Node](StackTrieNode *elem) {
561528 return Node->FuncId == elem->FuncId;
562529 });
563530 if (MaybeFoundIter == MergedByThreadRoots.end()) {
564531 MergedByThreadRoots.push_back(Node);
565532 } else {
566 MergedByThreadRoots.push_back(
567 mergeTrieNodes(**MaybeFoundIter, *Node, nullptr, NodeStore));
533 MergedByThreadRoots.push_back(mergeTrieNodes(
534 **MaybeFoundIter, *Node, nullptr, NodeStore, mergeStackDuration));
568535 MergedByThreadRoots.erase(MaybeFoundIter);
569536 }
570537 }
576543 template
577544 void printAllAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN,
578545 StackOutputFormat format) {
579 std::forward_list<TrieNode> AggregatedNodeStore;
546 std::forward_list<StackTrieNode> AggregatedNodeStore;
580547 RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore);
581548 bool reportThreadId = false;
582549 printAll(OS, FN, MergedByThreadRoots,
585552
586553 /// Merges the trie by thread id before printing top stacks.
587554 void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) {
588 std::forward_list<TrieNode> AggregatedNodeStore;
555 std::forward_list<StackTrieNode> AggregatedNodeStore;
589556 RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore);
590557 print(OS, FN, MergedByThreadRoots);
591558 }
594561 template
595562 void printAll(raw_ostream &OS, FuncIdConversionHelper &FN,
596563 RootVector RootValues, uint32_t ThreadId, bool ReportThread) {
597 SmallVectorTrieNode *, 16> S;
564 SmallVectorStackTrieNode *, 16> S;
598565 for (const auto *N : RootValues) {
599566 S.clear();
600567 S.push_back(N);
615582 template
616583 void printSingleStack(raw_ostream &OS, FuncIdConversionHelper &Converter,
617584 bool ReportThread, uint32_t ThreadId,
618 const TrieNode *Node) {
585 const StackTrieNode *Node) {
619586 if (ReportThread)
620587 OS << "thread_" << ThreadId << ";";
621 SmallVectorTrieNode *, 5> lineage{};
588 SmallVectorStackTrieNode *, 5> lineage{};
622589 lineage.push_back(Node);
623590 while (lineage.back()->Parent != nullptr)
624591 lineage.push_back(lineage.back()->Parent);
638605 // - Total number of unique stacks
639606 // - Top 10 stacks by count
640607 // - Top 10 stacks by aggregate duration
641 SmallVector, 11> TopStacksByCount;
642 SmallVector, 11> TopStacksBySum;
643 auto greater_second = [](const std::pair &A,
644 const std::pair &B) {
645 return A.second > B.second;
646 };
608 SmallVector, 11>
609 TopStacksByCount;
610 SmallVector, 11> TopStacksBySum;
611 auto greater_second =
612 [](const std::pair &A,
613 const std::pair &B) {
614 return A.second > B.second;
615 };
647616 uint64_t UniqueStacks = 0;
648617 for (const auto *N : RootValues) {
649 SmallVectorTrieNode *, 16> S;
618 SmallVectorStackTrieNode *, 16> S;
650619 S.emplace_back(N);
651620
652621 while (!S.empty()) {
654623
655624 // We only start printing the stack (by walking up the parent pointers)
656625 // when we get to a leaf function.
657 if (!Top->TerminalDurations.empty()) {
626 if (!Top->ExtraData.TerminalDurations.empty()) {
658627 ++UniqueStacks;
659 auto TopSum = std::accumulate(Top->TerminalDurations.begin(),
660 Top->TerminalDurations.end(), 0uLL);
628 auto TopSum =
629 std::accumulate(Top->ExtraData.TerminalDurations.begin(),
630 Top->ExtraData.TerminalDurations.end(), 0uLL);
661631 {
662632 auto E = std::make_pair(Top, TopSum);
663633 TopStacksBySum.insert(std::lower_bound(TopStacksBySum.begin(),
668638 TopStacksBySum.pop_back();
669639 }
670640 {
671 auto E = std::make_pair(Top, Top->TerminalDurations.size());
641 auto E =
642 std::make_pair(Top, Top->ExtraData.TerminalDurations.size());
672643 TopStacksByCount.insert(std::lower_bound(TopStacksByCount.begin(),
673644 TopStacksByCount.end(), E,
674645 greater_second),