llvm.org GIT mirror llvm / 8771e17
Retry "[llvm-profdata] Add option to ingest filepaths from a file" Changes since the initial commit: - Normalize file paths read from the file to prevent Windows path separators from escaping parts of the path. - Since we need to store the normalized file paths in WeightedFile, don't do tricky things to keep the source MemoryBuffer alive. Differential Revision: http://reviews.llvm.org/D20980 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271949 91177308-0d34-0410-b5e6-96231b3b80d8 Vedant Kumar 3 years ago
3 changed file(s) with 84 addition(s) and 14 deletion(s). Raw diff Collapse all Expand all
4343 nature of the training runs it may be useful to adjust the weight given to each
4444 input file by using the ``-weighted-input`` option.
4545
46 Profiles passed in via ``-weighted-input``, ``-input-files``, or via positional
47 arguments are processed once for each time they are seen.
48
4649
4750 OPTIONS
4851 ^^^^^^^
6366 ``weight``, where where ``weight`` is a decimal integer >= 1.
6467 Input files specified without using this option are assigned a default
6568 weight of 1. Examples are shown below.
69
70 .. option:: -input-files=path, -f=path
71
72 Specify a file which contains a list of files to merge. The entries in this
73 file are newline-separated. Lines starting with '#' are skipped. Entries may
74 be of the form or ,.
6675
6776 .. option:: -instr (default)
6877
0 RUN: printf '# comment 1\n' > %t
1 RUN: printf ' # comment 2\n' >> %t
2
3 RUN: printf 'bar\n' >> %t
4 RUN: printf ' baz\n' >> %t
5
6 RUN: printf '2,%t.weighted\n' >> %t
7 RUN: printf ' ' > %t.weighted
8
9 RUN: llvm-profdata merge -input-files %t -dump-input-file-list foo -o /dev/null | FileCheck %s
10 RUN: llvm-profdata merge -f %t -dump-input-file-list foo -o /dev/null | FileCheck %s
11
12 CHECK: 1,foo
13 CHECK-NEXT: 1,bar
14 CHECK-NEXT: 1,baz
15 CHECK-NEXT: 2,{{.*}}.weighted
107107 }
108108
109109 struct WeightedFile {
110 StringRef Filename;
110 std::string Filename;
111111 uint64_t Weight;
112112
113113 WeightedFile() {}
114114
115 WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {}
115 WeightedFile(std::string F, uint64_t W) : Filename{F}, Weight{W} {}
116116 };
117117 typedef SmallVector WeightedFileVector;
118118
208208 }
209209
210210 static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
211 StringRef WeightStr, FileName;
212 std::tie(WeightStr, FileName) = WeightedFilename.split(',');
211 StringRef WeightStr, FilenameStr;
212 std::tie(WeightStr, FilenameStr) = WeightedFilename.split(',');
213213
214214 uint64_t Weight;
215215 if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
216216 exitWithError("Input weight must be a positive integer.");
217217
218 if (!sys::fs::exists(FileName))
218 SmallString<256> CanonicalFilename;
219 sys::path::native(FilenameStr, CanonicalFilename);
220
221 if (!sys::fs::exists(CanonicalFilename))
219222 exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
220 FileName);
221
222 return WeightedFile(FileName, Weight);
223 CanonicalFilename);
224
225 return WeightedFile(StringRef(CanonicalFilename).str(), Weight);
226 }
227
228 static void parseInputFilenamesFile(const StringRef &InputFilenamesFile,
229 WeightedFileVector &WFV) {
230 if (InputFilenamesFile == "")
231 return;
232
233 auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile);
234 if (!BufOrError)
235 exitWithErrorCode(BufOrError.getError(), InputFilenamesFile);
236
237 auto Buffer = std::move(*BufOrError);
238 StringRef Data = Buffer->getBuffer();
239 SmallVector Entries;
240 Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
241 for (const StringRef &FileWeightEntry : Entries) {
242 StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
243 // Skip comments.
244 if (SanitizedEntry.startswith("#"))
245 continue;
246 // If there's no comma, it's an unweighted profile.
247 else if (SanitizedEntry.rfind(',') == StringRef::npos)
248 WFV.emplace_back(SanitizedEntry, 1);
249 else
250 WFV.emplace_back(parseWeightedFile(SanitizedEntry));
251 }
223252 }
224253
225254 static int merge_main(int argc, const char *argv[]) {
227256 cl::desc(""));
228257 cl::list WeightedInputFilenames("weighted-input",
229258 cl::desc(","));
259 cl::opt InputFilenamesFile(
260 "input-files", cl::init(""),
261 cl::desc("Path to file containing newline-separated "
262 "[,] entries"));
263 cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
264 cl::aliasopt(InputFilenamesFile));
265 cl::opt DumpInputFileList(
266 "dump-input-file-list", cl::init(false), cl::Hidden,
267 cl::desc("Dump the list of input files and their weights, then exit"));
230268 cl::opt OutputFilename("output", cl::value_desc("output"),
231269 cl::init("-"), cl::Required,
232270 cl::desc("Output file"));
248286
249287 cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
250288
251 if (InputFilenames.empty() && WeightedInputFilenames.empty())
289 WeightedFileVector WeightedInputs;
290 for (StringRef Filename : InputFilenames)
291 WeightedInputs.emplace_back(Filename, 1);
292 for (StringRef WeightedFilename : WeightedInputFilenames)
293 WeightedInputs.emplace_back(parseWeightedFile(WeightedFilename));
294 parseInputFilenamesFile(InputFilenamesFile, WeightedInputs);
295
296 if (WeightedInputs.empty())
252297 exitWithError("No input files specified. See " +
253298 sys::path::filename(argv[0]) + " -help");
254299
255 WeightedFileVector WeightedInputs;
256 for (StringRef Filename : InputFilenames)
257 WeightedInputs.push_back(WeightedFile(Filename, 1));
258 for (StringRef WeightedFilename : WeightedInputFilenames)
259 WeightedInputs.push_back(parseWeightedFile(WeightedFilename));
300 if (DumpInputFileList) {
301 for (auto &WF : WeightedInputs)
302 outs() << WF.Weight << "," << WF.Filename << "\n";
303 return 0;
304 }
260305
261306 if (ProfileKind == instr)
262307 mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,