llvm.org GIT mirror llvm / 6039b79
Retry^2 "[llvm-profdata] Add option to ingest filepaths from a file" Changes since the initial commit: - Normalize file paths read from the file to prevent Windows path separators from escaping parts of the path. - Since we need to store the normalized file paths in WeightedFile, don't do tricky things to keep the source MemoryBuffer alive. - Don't use list-initialization for a std::string in WeightedFile. Differential Revision: http://reviews.llvm.org/D20980 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271953 91177308-0d34-0410-b5e6-96231b3b80d8 Vedant Kumar 3 years ago
3 changed file(s) with 89 addition(s) and 14 deletion(s). Raw diff Collapse all Expand all
4343 nature of the training runs it may be useful to adjust the weight given to each
4444 input file by using the ``-weighted-input`` option.
4545
46 Profiles passed in via ``-weighted-input``, ``-input-files``, or via positional
47 arguments are processed once for each time they are seen.
48
4649
4750 OPTIONS
4851 ^^^^^^^
6366 ``weight``, where where ``weight`` is a decimal integer >= 1.
6467 Input files specified without using this option are assigned a default
6568 weight of 1. Examples are shown below.
69
70 .. option:: -input-files=path, -f=path
71
72 Specify a file which contains a list of files to merge. The entries in this
73 file are newline-separated. Lines starting with '#' are skipped. Entries may
74 be of the form or ,.
6675
6776 .. option:: -instr (default)
6877
0 RUN: printf '# comment 1\n' > %t
1 RUN: printf ' # comment 2\n' >> %t
2
3 RUN: printf 'bar\n' >> %t
4 RUN: printf ' baz\n' >> %t
5
6 RUN: printf '2,%t.weighted\n' >> %t
7 RUN: printf ' ' > %t.weighted
8
9 RUN: llvm-profdata merge -input-files %t -dump-input-file-list foo -o /dev/null | FileCheck %s
10 RUN: llvm-profdata merge -f %t -dump-input-file-list foo -o /dev/null | FileCheck %s
11
12 CHECK: 1,foo
13 CHECK-NEXT: 1,bar
14 CHECK-NEXT: 1,baz
15 CHECK-NEXT: 2,{{.*}}.weighted
107107 }
108108
109109 struct WeightedFile {
110 StringRef Filename;
110 std::string Filename;
111111 uint64_t Weight;
112112
113113 WeightedFile() {}
114114
115 WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {}
115 WeightedFile(std::string F, uint64_t W) : Filename(F), Weight(W) {}
116116 };
117117 typedef SmallVector WeightedFileVector;
118118
207207 Writer->write(ProfileMap);
208208 }
209209
210 static std::string canonicalizeFilePath(StringRef Filename) {
211 SmallString<256> CanonicalFilename;
212 sys::path::native(Filename, CanonicalFilename);
213 return StringRef(CanonicalFilename).str();
214 }
215
210216 static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
211 StringRef WeightStr, FileName;
212 std::tie(WeightStr, FileName) = WeightedFilename.split(',');
217 StringRef WeightStr, FilenameStr;
218 std::tie(WeightStr, FilenameStr) = WeightedFilename.split(',');
213219
214220 uint64_t Weight;
215221 if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
216222 exitWithError("Input weight must be a positive integer.");
217223
218 if (!sys::fs::exists(FileName))
224 std::string CanonicalFilename = canonicalizeFilePath(FilenameStr);
225
226 if (!sys::fs::exists(CanonicalFilename))
219227 exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
220 FileName);
221
222 return WeightedFile(FileName, Weight);
228 CanonicalFilename);
229
230 return WeightedFile(StringRef(CanonicalFilename).str(), Weight);
231 }
232
233 static void parseInputFilenamesFile(const StringRef &InputFilenamesFile,
234 WeightedFileVector &WFV) {
235 if (InputFilenamesFile == "")
236 return;
237
238 auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile);
239 if (!BufOrError)
240 exitWithErrorCode(BufOrError.getError(), InputFilenamesFile);
241
242 auto Buffer = std::move(*BufOrError);
243 StringRef Data = Buffer->getBuffer();
244 SmallVector Entries;
245 Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
246 for (const StringRef &FileWeightEntry : Entries) {
247 StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
248 // Skip comments.
249 if (SanitizedEntry.startswith("#"))
250 continue;
251 // If there's no comma, it's an unweighted profile.
252 else if (SanitizedEntry.find(',') == StringRef::npos)
253 WFV.emplace_back(canonicalizeFilePath(SanitizedEntry), 1);
254 else
255 WFV.emplace_back(parseWeightedFile(SanitizedEntry));
256 }
223257 }
224258
225259 static int merge_main(int argc, const char *argv[]) {
227261 cl::desc(""));
228262 cl::list WeightedInputFilenames("weighted-input",
229263 cl::desc(","));
264 cl::opt InputFilenamesFile(
265 "input-files", cl::init(""),
266 cl::desc("Path to file containing newline-separated "
267 "[,] entries"));
268 cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
269 cl::aliasopt(InputFilenamesFile));
270 cl::opt DumpInputFileList(
271 "dump-input-file-list", cl::init(false), cl::Hidden,
272 cl::desc("Dump the list of input files and their weights, then exit"));
230273 cl::opt OutputFilename("output", cl::value_desc("output"),
231274 cl::init("-"), cl::Required,
232275 cl::desc("Output file"));
248291
249292 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
250293
251 if (InputFilenames.empty() && WeightedInputFilenames.empty())
294 WeightedFileVector WeightedInputs;
295 for (StringRef Filename : InputFilenames)
296 WeightedInputs.emplace_back(canonicalizeFilePath(Filename), 1);
297 for (StringRef WeightedFilename : WeightedInputFilenames)
298 WeightedInputs.emplace_back(parseWeightedFile(WeightedFilename));
299 parseInputFilenamesFile(InputFilenamesFile, WeightedInputs);
300
301 if (WeightedInputs.empty())
252302 exitWithError("No input files specified. See " +
253303 sys::path::filename(argv[0]) + " -help");
254304
255 WeightedFileVector WeightedInputs;
256 for (StringRef Filename : InputFilenames)
257 WeightedInputs.push_back(WeightedFile(Filename, 1));
258 for (StringRef WeightedFilename : WeightedInputFilenames)
259 WeightedInputs.push_back(parseWeightedFile(WeightedFilename));
305 if (DumpInputFileList) {
306 for (auto &WF : WeightedInputs)
307 outs() << WF.Weight << "," << WF.Filename << "\n";
308 return 0;
309 }
260310
261311 if (ProfileKind == instr)
262312 mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,