llvm.org GIT mirror llvm / 959cfda
[XRay][tools] Updated stacks tool with flamegraph output. Summary: As the first step to allow analysis and visualization of xray collected data, allow using the llvm-xray stacks tool to emit a complete listing of stacks in the format consumable by a flamegraph tool. Possible follow up formats include chrome trace viewer format and sql load files. As a POC, I'm able to generate flamegraphs of an xray instrumented llc compiling hello world. Reviewers: dberris, pelikan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D38650 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315635 91177308-0d34-0410-b5e6-96231b3b80d8 Keith Wyss 2 years ago
1 changed file(s) with 221 addition(s) and 23 deletion(s). Raw diff Collapse all Expand all
6565 cl::desc("Aggregate stack times across threads"),
6666 cl::sub(Stack), cl::init(false));
6767
68 /// A helper struct to work with formatv and XRayRecords. Makes it easier to use
69 /// instrumentation map names or addresses in formatted output.
68 static cl::opt
69 DumpAllStacks("all-stacks",
70 cl::desc("Dump sum of timings for all stacks. "
71 "By default separates stacks per-thread."),
72 cl::sub(Stack), cl::init(false));
73 static cl::alias DumpAllStacksShort("all", cl::aliasopt(DumpAllStacks),
74 cl::desc("Alias for -all-stacks"),
75 cl::sub(Stack));
76
77 // TODO(kpw): Add other interesting formats. Perhaps chrome trace viewer format
78 // possibly with aggregations or just a linear trace of timings.
79 enum StackOutputFormat { HUMAN, FLAMETOOL };
80
81 static cl::opt StacksOutputFormat(
82 "stack-format",
83 cl::desc("The format that output stacks should be "
84 "output in. Only applies with all-stacks."),
85 cl::values(
86 clEnumValN(HUMAN, "human",
87 "Human readable output. Only valid without -all-stacks."),
88 clEnumValN(FLAMETOOL, "flame",
89 "Format consumable by Brendan Gregg's FlameGraph tool. "
90 "Only valid with -all-stacks.")),
91 cl::sub(Stack), cl::init(HUMAN));
92
93 // Types of values for each stack in a CallTrie.
94 enum class AggregationType {
95 TOTAL_TIME, // The total time spent in a stack and its callees.
96 INVOCATION_COUNT // The number of times the stack was invoked.
97 };
98
99 static cl::opt RequestedAggregation(
100 "aggregation-type",
101 cl::desc("The type of aggregation to do on call stacks."),
102 cl::values(
103 clEnumValN(
104 AggregationType::TOTAL_TIME, "time",
105 "Capture the total time spent in an all invocations of a stack."),
106 clEnumValN(AggregationType::INVOCATION_COUNT, "count",
107 "Capture the number of times a stack was invoked. "
108 "In flamegraph mode, this count also includes invocations "
109 "of all callees.")),
110 cl::sub(Stack), cl::init(AggregationType::TOTAL_TIME));
111
112 /// A helper struct to work with formatv and XRayRecords. Makes it easier to
113 /// use instrumentation map names or addresses in formatted output.
70114 struct format_xray_record : public FormatAdapter {
71115 explicit format_xray_record(XRayRecord record,
72116 const FuncIdConversionHelper &conv)
273317 return Node;
274318 }
275319
320 template
321 std::size_t GetValueForStack(const TrieNode *Node);
322
323 // When computing total time spent in a stack, we're adding the timings from
324 // its callees and the timings from when it was a leaf.
325 template <>
326 std::size_t
327 GetValueForStack(const TrieNode *Node) {
328 auto TopSum = std::accumulate(Node->TerminalDurations.begin(),
329 Node->TerminalDurations.end(), 0uLL);
330 return std::accumulate(Node->IntermediateDurations.begin(),
331 Node->IntermediateDurations.end(), TopSum);
332 }
333
334 // Calculates how many times a function was invoked.
335 // TODO: Hook up option to produce stacks
336 template <>
337 std::size_t
338 GetValueForStack(const TrieNode *Node) {
339 return Node->TerminalDurations.size() + Node->IntermediateDurations.size();
340 }
341
342 // Make sure there are implementations for each enum value.
343 template struct DependentFalseType : std::false_type {};
344
345 template
346 std::size_t GetValueForStack(const TrieNode *Node) {
347 static_assert(DependentFalseType::value,
348 "No implementation found for aggregation type provided.");
349 return 0;
350 }
351
276352 class StackTrie {
353 // Avoid the magic number of 4 propagated through the code with an alias.
354 // We use this SmallVector to track the root nodes in a call graph.
355 using RootVector = SmallVector;
277356
278357 // We maintain pointers to the roots of the tries we see.
279 DenseMapSmallVector> Roots;
358 DenseMapRootVector> Roots;
280359
281360 // We make sure all the nodes are accounted for in this list.
282361 std::forward_list NodeStore;
438517 }
439518 }
440519
520 /// Prints timing sums for each stack in each threads.
521 template
522 void printAllPerThread(raw_ostream &OS, FuncIdConversionHelper &FN,
523 StackOutputFormat format) {
524 for (auto iter : Roots) {
525 uint32_t threadId = iter.first;
526 RootVector &perThreadRoots = iter.second;
527 bool reportThreadId = true;
528 printAll(OS, FN, perThreadRoots, threadId, reportThreadId);
529 }
530 }
531
441532 /// Prints top stacks from looking at all the leaves and ignoring thread IDs.
442533 /// Stacks that consist of the same function IDs but were called in different
443534 /// thread IDs are not considered unique in this printout.
444535 void printIgnoringThreads(raw_ostream &OS, FuncIdConversionHelper &FN) {
445 SmallVector RootValues;
536 RootVector RootValues;
446537
447538 // Function to pull the values out of a map iterator.
448539 using RootsType = decltype(Roots.begin())::value_type;
458549 print(OS, FN, RootValues);
459550 }
460551
552 /// Creates a merged list of Tries for unique stacks that disregards their
553 /// thread IDs.
554 RootVector mergeAcrossThreads(std::forward_list &NodeStore) {
555 RootVector MergedByThreadRoots;
556 for (auto MapIter : Roots) {
557 const auto &RootNodeVector = MapIter.second;
558 for (auto *Node : RootNodeVector) {
559 auto MaybeFoundIter =
560 find_if(MergedByThreadRoots, [Node](TrieNode *elem) {
561 return Node->FuncId == elem->FuncId;
562 });
563 if (MaybeFoundIter == MergedByThreadRoots.end()) {
564 MergedByThreadRoots.push_back(Node);
565 } else {
566 MergedByThreadRoots.push_back(
567 mergeTrieNodes(**MaybeFoundIter, *Node, nullptr, NodeStore));
568 MergedByThreadRoots.erase(MaybeFoundIter);
569 }
570 }
571 }
572 return MergedByThreadRoots;
573 }
574
575 /// Print timing sums for all stacks merged by Thread ID.
576 template
577 void printAllAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN,
578 StackOutputFormat format) {
579 std::forward_list AggregatedNodeStore;
580 RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore);
581 bool reportThreadId = false;
582 printAll(OS, FN, MergedByThreadRoots,
583 /*threadId*/ 0, reportThreadId);
584 }
585
461586 /// Merges the trie by thread id before printing top stacks.
462587 void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) {
463588 std::forward_list AggregatedNodeStore;
464 SmallVector RootValues;
465 for (auto MapIter : Roots) {
466 const auto &RootNodeVector = MapIter.second;
467 for (auto *Node : RootNodeVector) {
468 auto MaybeFoundIter = find_if(RootValues, [Node](TrieNode *elem) {
469 return Node->FuncId == elem->FuncId;
470 });
471 if (MaybeFoundIter == RootValues.end()) {
472 RootValues.push_back(Node);
473 } else {
474 RootValues.push_back(mergeTrieNodes(**MaybeFoundIter, *Node, nullptr,
475 AggregatedNodeStore));
476 RootValues.erase(MaybeFoundIter);
477 }
478 }
479 }
480 print(OS, FN, RootValues);
589 RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore);
590 print(OS, FN, MergedByThreadRoots);
591 }
592
593 // TODO: Add a format option when more than one are supported.
594 template
595 void printAll(raw_ostream &OS, FuncIdConversionHelper &FN,
596 RootVector RootValues, uint32_t ThreadId, bool ReportThread) {
597 SmallVector S;
598 for (const auto *N : RootValues) {
599 S.clear();
600 S.push_back(N);
601 while (!S.empty()) {
602 auto *Top = S.pop_back_val();
603 printSingleStack(OS, FN, ReportThread, ThreadId, Top);
604 for (const auto *C : Top->Callees)
605 S.push_back(C);
606 }
607 }
608 }
609
610 /// Prints values for stacks in a format consumable for the flamegraph.pl
611 /// tool. This is a line based format that lists each level in the stack
612 /// hierarchy in a semicolon delimited form followed by a space and a numeric
613 /// value. If breaking down by thread, the thread ID will be added as the
614 /// root level of the stack.
615 template
616 void printSingleStack(raw_ostream &OS, FuncIdConversionHelper &Converter,
617 bool ReportThread, uint32_t ThreadId,
618 const TrieNode *Node) {
619 if (ReportThread)
620 OS << "thread_" << ThreadId << ";";
621 SmallVector lineage{};
622 lineage.push_back(Node);
623 while (lineage.back()->Parent != nullptr)
624 lineage.push_back(lineage.back()->Parent);
625 while (!lineage.empty()) {
626 OS << Converter.SymbolOrNumber(lineage.back()->FuncId) << ";";
627 lineage.pop_back();
628 }
629 OS << " " << GetValueForStack(Node) << "\n";
481630 }
482631
483632 void print(raw_ostream &OS, FuncIdConversionHelper &FN,
484 SmallVector RootValues) {
633 RootVector RootValues) {
485634 // Go through each of the roots, and traverse the call stack, producing the
486635 // aggregates as you go along. Remember these aggregates and stacks, and
487636 // show summary statistics about:
501650 S.emplace_back(N);
502651
503652 while (!S.empty()) {
504 auto Top = S.pop_back_val();
653 auto *Top = S.pop_back_val();
505654
506655 // We only start printing the stack (by walking up the parent pointers)
507656 // when we get to a leaf function.
586735 "that aggregates threads."),
587736 std::make_error_code(std::errc::invalid_argument));
588737
738 if (!DumpAllStacks && StacksOutputFormat != HUMAN)
739 return make_error(
740 Twine("Can't specify a non-human format without -all-stacks."),
741 std::make_error_code(std::errc::invalid_argument));
742
743 if (DumpAllStacks && StacksOutputFormat == HUMAN)
744 return make_error(
745 Twine("You must specify a non-human format when reporting with "
746 "-all-stacks."),
747 std::make_error_code(std::errc::invalid_argument));
748
589749 symbolize::LLVMSymbolizer::Options Opts(
590750 symbolize::FunctionNameKind::LinkageName, true, true, false, "");
591751 symbolize::LLVMSymbolizer Symbolizer(Opts);
624784 "No instrumented calls were accounted in the input file.",
625785 make_error_code(errc::result_out_of_range));
626786 }
787
788 // Report the stacks in a long form mode for another tool to analyze.
789 if (DumpAllStacks) {
790 if (AggregateThreads) {
791 switch (RequestedAggregation) {
792 case AggregationType::TOTAL_TIME:
793 ST.printAllAggregatingThreads(
794 outs(), FuncIdHelper, StacksOutputFormat);
795 break;
796 case AggregationType::INVOCATION_COUNT:
797 ST.printAllAggregatingThreads(
798 outs(), FuncIdHelper, StacksOutputFormat);
799 break;
800 default:
801 return make_error(
802 "Illegal value for aggregation-type.",
803 make_error_code(errc::result_out_of_range));
804 }
805 } else {
806 switch (RequestedAggregation) {
807 case AggregationType::TOTAL_TIME:
808 ST.printAllPerThread(outs(), FuncIdHelper,
809 StacksOutputFormat);
810 break;
811 case AggregationType::INVOCATION_COUNT:
812 ST.printAllPerThread(
813 outs(), FuncIdHelper, StacksOutputFormat);
814 break;
815 default:
816 return make_error(
817 "Illegal value for aggregation-type.",
818 make_error_code(errc::result_out_of_range));
819 }
820 }
821 return Error::success();
822 }
823
824 // We're only outputting top stacks.
627825 if (AggregateThreads) {
628826 ST.printAggregatingThreads(outs(), FuncIdHelper);
629827 } else if (SeparateThreadStacks) {