llvm.org GIT mirror llvm / 79aaa0b
Refactor FileCheck some to reduce memory allocation and copying. Also make some readability improvements. Both the check file and input file have to be fully buffered to normalize their whitespace. But previously this would be done in a stack SmallString and then copied into a heap allocated MemoryBuffer. That seems pretty wasteful, especially for something like FileCheck where there are only ever two such entities. This just rearranges the code so that we can keep the canonicalized buffers on the stack of the main function, use reasonably large stack buffers to reduce allocation. A rough estimate seems to show that about 80% of LLVM's .ll and .s files will fit into a 4k buffer, so this should completely avoid heap allocation for the buffer in those cases. My system's malloc is fast enough that the allocations don't directly show up in timings. However, on some very slow test cases, this saves 1% - 2% by avoiding the copy into the heap allocated buffer. This also splits out the code which checks the input into a helper much like the code to build the checks as that made the code much more readable to me. Nit picks and suggestions welcome here. It has really exposed a *bunch* of stuff that could be cleaned up though, so I'm probably going to go and spring clean all of this code as I have more changes coming to speed things up. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289378 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 2 years ago
1 changed file(s) with 92 addition(s) and 89 deletion(s). Raw diff Collapse all Expand all
653653 StringMap &VariableTable) const;
654654 };
655655
656 /// Canonicalize whitespaces in the input file. Line endings are replaced
657 /// with UNIX-style '\n'.
656 /// Canonicalize whitespaces in the file. Line endings are replaced with
657 /// UNIX-style '\n'.
658658 ///
659659 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
660660 /// characters to a single space.
661 static std::unique_ptr
662 CanonicalizeInputFile(std::unique_ptr MB,
663 bool PreserveHorizontal) {
664 SmallString<128> NewFile;
665 NewFile.reserve(MB->getBufferSize());
666
667 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
661 static StringRef CanonicalizeFile(MemoryBuffer &MB, bool PreserveHorizontal,
662 SmallVectorImpl &OutputBuffer) {
663 OutputBuffer.reserve(MB.getBufferSize());
664
665 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
668666 Ptr != End; ++Ptr) {
669667 // Eliminate trailing dosish \r.
670668 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
674672 // If current char is not a horizontal whitespace or if horizontal
675673 // whitespace canonicalization is disabled, dump it to output as is.
676674 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
677 NewFile.push_back(*Ptr);
675 OutputBuffer.push_back(*Ptr);
678676 continue;
679677 }
680678
681679 // Otherwise, add one space and advance over neighboring space.
682 NewFile.push_back(' ');
680 OutputBuffer.push_back(' ');
683681 while (Ptr+1 != End &&
684682 (Ptr[1] == ' ' || Ptr[1] == '\t'))
685683 ++Ptr;
686684 }
687685
688 return std::unique_ptr(
689 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
686 // Add a null byte and then return all but that byte.
687 OutputBuffer.push_back('\0');
688 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
690689 }
691690
692691 static bool IsPartOfWord(char c) {
860859 /// ReadCheckFile - Read the check file, which specifies the sequence of
861860 /// expected strings. The strings are added to the CheckStrings vector.
862861 /// Returns true in case of an error, false otherwise.
863 static bool ReadCheckFile(SourceMgr &SM,
862 static bool ReadCheckFile(SourceMgr &SM, StringRef Buffer,
864863 std::vector &CheckStrings) {
865 ErrorOr> FileOrErr =
866 MemoryBuffer::getFileOrSTDIN(CheckFilename);
867 if (std::error_code EC = FileOrErr.getError()) {
868 errs() << "Could not open check file '" << CheckFilename
869 << "': " << EC.message() << '\n';
870 return true;
871 }
872
873 // If we want to canonicalize whitespace, strip excess whitespace from the
874 // buffer containing the CHECK lines. Remove DOS style line endings.
875 std::unique_ptr F = CanonicalizeInputFile(
876 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
877
878 // Find all instances of CheckPrefix followed by : in the file.
879 StringRef Buffer = F->getBuffer();
880
881 SM.AddNewSourceBuffer(std::move(F), SMLoc());
882
883864 std::vector ImplicitNegativeChecks;
884865 for (const auto &PatternString : ImplicitCheckNot) {
885866 // Create a buffer with fake command line content in order to display the
13071288 errs() << "\n";
13081289 }
13091290
1310 int main(int argc, char **argv) {
1311 sys::PrintStackTraceOnErrorSignal(argv[0]);
1312 PrettyStackTraceProgram X(argc, argv);
1313 cl::ParseCommandLineOptions(argc, argv);
1314
1315 if (!ValidateCheckPrefixes()) {
1316 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1317 "start with a letter and contain only alphanumeric characters, "
1318 "hyphens and underscores\n";
1319 return 2;
1320 }
1321
1322 AddCheckPrefixIfNeeded();
1323
1324 SourceMgr SM;
1325
1326 // Read the expected strings from the check file.
1327 std::vector CheckStrings;
1328 if (ReadCheckFile(SM, CheckStrings))
1329 return 2;
1330
1331 // Open the file to check and add it to SourceMgr.
1332 ErrorOr> FileOrErr =
1333 MemoryBuffer::getFileOrSTDIN(InputFilename);
1334 if (std::error_code EC = FileOrErr.getError()) {
1335 errs() << "Could not open input file '" << InputFilename
1336 << "': " << EC.message() << '\n';
1337 return 2;
1338 }
1339 std::unique_ptr &File = FileOrErr.get();
1340
1341 if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1342 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1343 DumpCommandLine(argc, argv);
1344 return 2;
1345 }
1346
1347 // Remove duplicate spaces in the input file if requested.
1348 // Remove DOS style line endings.
1349 std::unique_ptr F =
1350 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1351
1352 // Check that we have all of the expected strings, in order, in the input
1353 // file.
1354 StringRef Buffer = F->getBuffer();
1355
1356 SM.AddNewSourceBuffer(std::move(F), SMLoc());
1291 /// Check the input to FileCheck provided in the \p Buffer against the \p
1292 /// CheckStrings read from the check file.
1293 ///
1294 /// Returns false if the input fails to satisfy the checks.
1295 bool CheckInput(SourceMgr &SM, StringRef Buffer,
1296 ArrayRef CheckStrings) {
1297 bool ChecksFailed = false;
13571298
13581299 /// VariableTable - This holds all the current filecheck variables.
13591300 StringMap VariableTable;
13601301
1361 bool hasError = false;
1362
13631302 unsigned i = 0, j = 0, e = CheckStrings.size();
1364
13651303 while (true) {
13661304 StringRef CheckRegion;
13671305 if (j == e) {
13771315 size_t MatchLabelLen = 0;
13781316 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
13791317 MatchLabelLen, VariableTable);
1380 if (MatchLabelPos == StringRef::npos) {
1381 hasError = true;
1382 break;
1383 }
1318 if (MatchLabelPos == StringRef::npos)
1319 // Immediately bail of CHECK-LABEL fails, nothing else we can do.
1320 return false;
13841321
13851322 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
13861323 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
13971334 VariableTable);
13981335
13991336 if (MatchPos == StringRef::npos) {
1400 hasError = true;
1337 ChecksFailed = true;
14011338 i = j;
14021339 break;
14031340 }
14091346 break;
14101347 }
14111348
1412 return hasError ? 1 : 0;
1413 }
1349 // Success if no checks failed.
1350 return !ChecksFailed;
1351 }
1352
1353 int main(int argc, char **argv) {
1354 sys::PrintStackTraceOnErrorSignal(argv[0]);
1355 PrettyStackTraceProgram X(argc, argv);
1356 cl::ParseCommandLineOptions(argc, argv);
1357
1358 if (!ValidateCheckPrefixes()) {
1359 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1360 "start with a letter and contain only alphanumeric characters, "
1361 "hyphens and underscores\n";
1362 return 2;
1363 }
1364
1365 AddCheckPrefixIfNeeded();
1366
1367 SourceMgr SM;
1368
1369 // Read the expected strings from the check file.
1370 ErrorOr> CheckFileOrErr =
1371 MemoryBuffer::getFileOrSTDIN(CheckFilename);
1372 if (std::error_code EC = CheckFileOrErr.getError()) {
1373 errs() << "Could not open check file '" << CheckFilename
1374 << "': " << EC.message() << '\n';
1375 return 2;
1376 }
1377 MemoryBuffer &CheckFile = *CheckFileOrErr.get();
1378
1379 SmallString<4096> CheckFileBuffer;
1380 StringRef CheckFileText =
1381 CanonicalizeFile(CheckFile, NoCanonicalizeWhiteSpace, CheckFileBuffer);
1382
1383 SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
1384 CheckFileText, CheckFile.getBufferIdentifier()),
1385 SMLoc());
1386
1387 std::vector CheckStrings;
1388 if (ReadCheckFile(SM, CheckFileText, CheckStrings))
1389 return 2;
1390
1391
1392 // Open the file to check and add it to SourceMgr.
1393 ErrorOr> InputFileOrErr =
1394 MemoryBuffer::getFileOrSTDIN(InputFilename);
1395 if (std::error_code EC = InputFileOrErr.getError()) {
1396 errs() << "Could not open input file '" << InputFilename
1397 << "': " << EC.message() << '\n';
1398 return 2;
1399 }
1400 MemoryBuffer &InputFile = *InputFileOrErr.get();
1401
1402 if (InputFile.getBufferSize() == 0 && !AllowEmptyInput) {
1403 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1404 DumpCommandLine(argc, argv);
1405 return 2;
1406 }
1407
1408 SmallString<4096> InputFileBuffer;
1409 StringRef InputFileText =
1410 CanonicalizeFile(InputFile, NoCanonicalizeWhiteSpace, InputFileBuffer);
1411
1412 SM.AddNewSourceBuffer(
1413 MemoryBuffer::getMemBuffer(InputFileText, InputFile.getBufferIdentifier()), SMLoc());
1414
1415 return CheckInput(SM, InputFileText, CheckStrings) ? EXIT_SUCCESS : 1;
1416 }