llvm.org GIT mirror llvm / 515396c
Add a module Hash in the bitcode and the combined index, implementing a kind of "build-id" This is intended to be used for ThinLTO incremental build. Differential Revision: http://reviews.llvm.org/D18213 From: Mehdi Amini <mehdi.amini@apple.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265095 91177308-0d34-0410-b5e6-96231b3b80d8 Mehdi Amini 3 years ago
11 changed file(s) with 232 addition(s) and 38 deletion(s). Raw diff Collapse all Expand all
445445 using SimpleBitstreamCursor::canSkipToPos;
446446 using SimpleBitstreamCursor::AtEndOfStream;
447447 using SimpleBitstreamCursor::GetCurrentBitNo;
448 using SimpleBitstreamCursor::getCurrentByteNo;
449 using SimpleBitstreamCursor::getPointerToByte;
448450 using SimpleBitstreamCursor::getBitStreamReader;
449451 using SimpleBitstreamCursor::JumpToBit;
450452 using SimpleBitstreamCursor::fillCurWord;
106106
107107 // SOURCE_FILENAME: [namechar x N]
108108 MODULE_CODE_SOURCE_FILENAME = 16,
109
110 // HASH: [5*i32]
111 MODULE_CODE_HASH = 17,
109112 };
110113
111114 /// PARAMATTR blocks have code for defining a parameter attribute set.
182185 // The module path symbol table only has one code (MST_CODE_ENTRY).
183186 enum ModulePathSymtabCodes {
184187 MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N]
188 MST_CODE_HASH = 2, // MST_HASH: [5*i32]
185189 };
186190
187191 // The summary section uses different codes in the per-module
106106 /// for use in ThinLTO optimization).
107107 void WriteBitcodeToFile(const Module *M, raw_ostream &Out,
108108 bool ShouldPreserveUseListOrder = false,
109 bool EmitSummaryIndex = false);
109 bool EmitSummaryIndex = false,
110 bool GenerateHash = false);
110111
111112 /// Write the specified module summary index to the given raw output stream,
112113 /// where it will be written in a new bitcode block. This is used when
2323 #include "llvm/IR/Module.h"
2424 #include "llvm/Support/MemoryBuffer.h"
2525 #include "llvm/Support/raw_ostream.h"
26
27 #include
2628
2729 namespace llvm {
2830
227229 void setBitcodeIndex(uint64_t Offset) { BitcodeIndex = Offset; }
228230 };
229231
232 /// 160 bits SHA1
233 typedef std::array ModuleHash;
234
230235 /// List of global value info structures for a particular value held
231236 /// in the GlobalValueMap. Requires a vector in the case of multiple
232237 /// COMDAT values of the same name.
244249 typedef GlobalValueInfoMapTy::iterator globalvalueinfo_iterator;
245250
246251 /// String table to hold/own module path strings, which additionally holds the
247 /// module ID assigned to each module during the plugin step. The StringMap
248 /// makes a copy of and owns inserted strings.
249 typedef StringMap ModulePathStringTableTy;
252 /// module ID assigned to each module during the plugin step, as well as a hash
253 /// of the module. The StringMap makes a copy of and owns inserted strings.
254 typedef StringMap> ModulePathStringTableTy;
250255
251256 /// Class to hold module path string table and global value map,
252257 /// and encapsulate methods for operating on them.
303308 GlobalValueMap[ValueGUID].push_back(std::move(Info));
304309 }
305310
306 /// Table of modules, containing an id.
307 const StringMap &modulePaths() const {
311 /// Table of modules, containing module hash and id.
312 const StringMap> &modulePaths() const {
308313 return ModulePathStringTable;
309314 }
310315
311 /// Table of modules, containing an id.
312 StringMap &modulePaths() { return ModulePathStringTable; }
316 /// Table of modules, containing hash and id.
317 StringMap> &modulePaths() {
318 return ModulePathStringTable;
319 }
313320
314321 /// Get the module ID recorded for the given module path.
315322 uint64_t getModuleId(const StringRef ModPath) const {
316 return ModulePathStringTable.lookup(ModPath);
323 return ModulePathStringTable.lookup(ModPath).first;
324 }
325
326 /// Get the module SHA1 hash recorded for the given module path.
327 const ModuleHash &getModuleHash(const StringRef ModPath) const {
328 auto It = ModulePathStringTable.find(ModPath);
329 assert(It != ModulePathStringTable.end() && "Module not registered");
330 return It->second.second;
317331 }
318332
319333 /// Add the given per-module index into this module index/summary,
332346 return NewName.str();
333347 }
334348
335 /// Add a new module path, mapped to the given module Id, and return StringRef
336 /// owned by string table map.
337 StringRef addModulePath(StringRef ModPath, uint64_t ModId) {
338 return ModulePathStringTable.insert(std::make_pair(ModPath, ModId))
339 .first->first();
349 /// Add a new module path with the given \p Hash, mapped to the given \p
350 /// ModID, and return an iterator to the entry in the index.
351 ModulePathStringTableTy::iterator
352 addModulePath(StringRef ModPath, uint64_t ModId,
353 ModuleHash Hash = ModuleHash{{0}}) {
354 return ModulePathStringTable.insert(std::make_pair(
355 ModPath,
356 std::make_pair(ModId, Hash))).first;
340357 }
341358
342359 /// Check if the given Module has any functions available for exporting
56315631 }
56325632 continue;
56335633
5634 case BitstreamEntry::Record:
5635 // Once we find the last record of interest, skip the rest.
5636 if (VSTOffset > 0)
5637 Stream.skipRecord(Entry.ID);
5638 else {
5634 case BitstreamEntry::Record: {
56395635 Record.clear();
56405636 auto BitCode = Stream.readRecord(Entry.ID, Record);
56415637 switch (BitCode) {
56475643 if (convertToString(Record, 0, ValueName))
56485644 return error("Invalid record");
56495645 SourceFileName = ValueName.c_str();
5646 break;
5647 }
5648 /// MODULE_CODE_HASH: [5*i32]
5649 case bitc::MODULE_CODE_HASH: {
5650 if (Record.size() != 5)
5651 return error("Invalid hash length " + Twine(Record.size()).str());
5652 if (!TheIndex)
5653 break;
5654 if (TheIndex->modulePaths().empty())
5655 // Does not have any summary emitted.
5656 break;
5657 if (TheIndex->modulePaths().size() != 1)
5658 return error("Don't expect multiple modules defined?");
5659 auto &Hash = TheIndex->modulePaths().begin()->second.second;
5660 int Pos = 0;
5661 for (auto &Val : Record) {
5662 assert(!(Val >> 32) && "Unexpected high bits set");
5663 Hash[Pos++] = Val;
5664 }
56505665 break;
56515666 }
56525667 /// MODULE_CODE_VSTOFFSET: [offset]
57605775 // module path string table entry with an empty (0) ID to take
57615776 // ownership.
57625777 FS->setModulePath(
5763 TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
5778 TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
57645779 static int RefListStartIndex = 4;
57655780 int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs;
57665781 assert(Record.size() >= RefListStartIndex + NumRefs &&
57985813 std::unique_ptr FS =
57995814 llvm::make_unique(getDecodedLinkage(RawLinkage));
58005815 FS->setModulePath(
5801 TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
5816 TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
58025817 for (unsigned I = 2, E = Record.size(); I != E; ++I) {
58035818 unsigned RefValueId = Record[I];
58045819 uint64_t RefGUID = getGUIDFromValueId(RefValueId);
58865901 SmallVector Record;
58875902
58885903 SmallString<128> ModulePath;
5904 ModulePathStringTableTy::iterator LastSeenModulePath;
58895905 while (1) {
58905906 BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
58915907
59065922 break;
59075923 case bitc::MST_CODE_ENTRY: {
59085924 // MST_ENTRY: [modid, namechar x N]
5925 uint64_t ModuleId = Record[0];
5926
59095927 if (convertToString(Record, 1, ModulePath))
59105928 return error("Invalid record");
5911 uint64_t ModuleId = Record[0];
5912 StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId);
5913 ModuleIdMap[ModuleId] = ModulePathInMap;
5929
5930 LastSeenModulePath = TheIndex->addModulePath(ModulePath, ModuleId);
5931 ModuleIdMap[ModuleId] = LastSeenModulePath->first();
5932
59145933 ModulePath.clear();
5934 break;
5935 }
5936 /// MST_CODE_HASH: [5*i32]
5937 case bitc::MST_CODE_HASH: {
5938 if (Record.size() != 5)
5939 return error("Invalid hash length " + Twine(Record.size()).str());
5940 if (LastSeenModulePath == TheIndex->modulePaths().end())
5941 return error("Invalid hash that does not follow a module path");
5942 int Pos = 0;
5943 for (auto &Val : Record) {
5944 assert(!(Val >> 32) && "Unexpected high bits set");
5945 LastSeenModulePath->second.second[Pos++] = Val;
5946 }
5947 // Reset LastSeenModulePath to avoid overriding the hash unexpectedly.
5948 LastSeenModulePath = TheIndex->modulePaths().end();
59155949 break;
59165950 }
59175951 }
1111 //===----------------------------------------------------------------------===//
1212
1313 #include "ValueEnumerator.h"
14 #include "llvm/ADT/StringExtras.h"
1415 #include "llvm/ADT/STLExtras.h"
1516 #include "llvm/ADT/Triple.h"
1617 #include "llvm/Analysis/BlockFrequencyInfo.h"
3839 #include "llvm/Support/MathExtras.h"
3940 #include "llvm/Support/Program.h"
4041 #include "llvm/Support/raw_ostream.h"
42 #include "llvm/Support/SHA1.h"
4143 #include
4244 #include
4345 using namespace llvm;
28512853 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
28522854 unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv);
28532855
2854 SmallVector NameVals;
2855 for (const StringMapEntry &MPSE : I.modulePaths()) {
2856 // Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY.
2857 Abbv = new BitCodeAbbrev();
2858 Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH));
2859 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
2860 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
2861 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
2862 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
2863 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
2864 unsigned AbbrevHash = Stream.EmitAbbrev(Abbv);
2865
2866 SmallVector Vals;
2867 for (const auto &MPSE : I.modulePaths()) {
28562868 StringEncoding Bits =
28572869 getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size());
28582870 unsigned AbbrevToUse = Abbrev8Bit;
28612873 else if (Bits == SE_Fixed7)
28622874 AbbrevToUse = Abbrev7Bit;
28632875
2864 NameVals.push_back(MPSE.getValue());
2876 Vals.push_back(MPSE.getValue().first);
28652877
28662878 for (const auto P : MPSE.getKey())
2867 NameVals.push_back((unsigned char)P);
2879 Vals.push_back((unsigned char)P);
28682880
28692881 // Emit the finished record.
2870 Stream.EmitRecord(bitc::MST_CODE_ENTRY, NameVals, AbbrevToUse);
2871 NameVals.clear();
2882 Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
2883
2884 Vals.clear();
2885 // Emit an optional hash for the module now
2886 auto &Hash = MPSE.getValue().second;
2887 bool AllZero = true; // Detect if the hash is empty, and do not generate it
2888 for (auto Val : Hash) {
2889 if (Val)
2890 AllZero = false;
2891 Vals.push_back(Val);
2892 }
2893 if (!AllZero) {
2894 // Emit the hash record.
2895 Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
2896 }
2897
2898 Vals.clear();
28722899 }
28732900 Stream.ExitBlock();
28742901 }
31763203 Stream.ExitBlock();
31773204 }
31783205
3206 static void writeModuleHash(BitstreamWriter &Stream,
3207 SmallVectorImpl &Buffer,
3208 size_t BlockStartPos) {
3209 // Emit the module's hash.
3210 // MODULE_CODE_HASH: [5*i32]
3211 SHA1 Hasher;
3212 Hasher.update(ArrayRef((uint8_t *)&Buffer[BlockStartPos],
3213 Buffer.size() - BlockStartPos));
3214 auto Hash = Hasher.result();
3215 SmallVector Vals;
3216 auto LShift = [&](unsigned char Val, unsigned Amount)
3217 -> uint64_t { return ((uint64_t)Val) << Amount; };
3218 for (int Pos = 0; Pos < 20; Pos += 4) {
3219 uint32_t SubHash = LShift(Hash[Pos + 0], 24);
3220 SubHash |= LShift(Hash[Pos + 1], 16) | LShift(Hash[Pos + 2], 8) |
3221 (unsigned)(unsigned char)Hash[Pos + 3];
3222 Vals.push_back(SubHash);
3223 }
3224
3225 // Emit the finished record.
3226 Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
3227 }
3228
31793229 /// WriteModule - Emit the specified module to the bitstream.
31803230 static void WriteModule(const Module *M, BitstreamWriter &Stream,
31813231 bool ShouldPreserveUseListOrder,
3182 uint64_t BitcodeStartBit, bool EmitSummaryIndex) {
3232 uint64_t BitcodeStartBit, bool EmitSummaryIndex,
3233 bool GenerateHash, SmallVectorImpl &Buffer) {
31833234 Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
3235 size_t BlockStartPos = Buffer.size();
31843236
31853237 SmallVector Vals;
31863238 unsigned CurVersion = 1;
32363288
32373289 WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream,
32383290 VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex);
3291
3292 if (GenerateHash) {
3293 writeModuleHash(Stream, Buffer, BlockStartPos);
3294 }
32393295
32403296 Stream.ExitBlock();
32413297 }
33213377 /// stream.
33223378 void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
33233379 bool ShouldPreserveUseListOrder,
3324 bool EmitSummaryIndex) {
3380 bool EmitSummaryIndex, bool GenerateHash) {
33253381 SmallVector Buffer;
33263382 Buffer.reserve(256*1024);
33273383
33473403
33483404 // Emit the module.
33493405 WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit,
3350 EmitSummaryIndex);
3406 EmitSummaryIndex, GenerateHash, Buffer);
33513407 }
33523408
33533409 if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
3636
3737 // Add the module path string ref for this module if we haven't already
3838 // saved a reference to it.
39 if (ModPath.empty())
40 ModPath = addModulePath(Info->summary()->modulePath(), NextModuleId);
41 else
39 if (ModPath.empty()) {
40 auto Path = Info->summary()->modulePath();
41 ModPath = addModulePath(Path, NextModuleId, Other->getModuleHash(Path))
42 ->first();
43 } else
4244 assert(ModPath == Info->summary()->modulePath() &&
4345 "Each module in the combined map should have a unique ID");
4446
6060 /* ShouldLazyLoadMetadata = */ true);
6161 if (!Result) {
6262 Err.print("function-import", errs());
63 return nullptr;
63 report_fatal_error("Abort");
6464 }
6565
6666 return Result;
0 ; Check per module hash.
1 ; RUN: llvm-as -module-hash %s -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD1
2 ; MOD1:
3 ; RUN: llvm-as -module-hash %p/Inputs/module_hash.ll -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD2
4 ; MOD2:
5
6 ; Check that the hash matches in the combined index.
7
8 ; First regenerate the modules with a summary
9 ; RUN: llvm-as -module-hash -module-summary %s -o %t.m1.bc
10 ; RUN: llvm-as -module-hash -module-summary %p/Inputs/module_hash.ll -o %t.m2.bc
11
12 ; Recover the hashes from the modules themselves.
13 ; RUN: llvm-bcanalyzer -dump %t1 | grep ' %t.hash
14 ; RUN: llvm-bcanalyzer -dump %t2 | grep '> %t.hash
15
16 ; Generate the combined index and gather the hashes there.
17 ; RUN: llvm-lto --thinlto-action=thinlink -o - %t.m1.bc %t.m2.bc | llvm-bcanalyzer -dump | grep '> %t.hash
18
19 ; Validate the output now, the hahes in the individual modules and the combined index are in the same file.
20 ; RUN: cat %t.hash | FileCheck %s --check-prefix=COMBINED
21
22 ; First capture the value of the hash for the two modules.
23 ; COMBINED:
24 ; COMBINED:
25
26 ; Validate against the value extracted from the combined index
27 ; COMBINED-DAG:
28 ; COMBINED-DAG:
29
30
31 ; Need a function for the combined index to be populated.
32 define void @foo() {
33 ret void
34 }
4747 cl::desc("Emit module summary index"),
4848 cl::init(false));
4949
50 static cl::opt EmitModuleHash("module-hash", cl::desc("Emit module hash"),
51 cl::init(false));
52
5053 static cl::opt
5154 DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
5255
8184
8285 if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
8386 WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder,
84 EmitSummaryIndex);
87 EmitSummaryIndex, EmitModuleHash);
8588
8689 // Declare success.
8790 Out->keep();
2828
2929 #include "llvm/Bitcode/BitstreamReader.h"
3030 #include "llvm/ADT/Optional.h"
31 #include "llvm/ADT/StringExtras.h"
3132 #include "llvm/Bitcode/LLVMBitCodes.h"
3233 #include "llvm/Bitcode/ReaderWriter.h"
3334 #include "llvm/IR/Verifier.h"
3738 #include "llvm/Support/MemoryBuffer.h"
3839 #include "llvm/Support/PrettyStackTrace.h"
3940 #include "llvm/Support/Signals.h"
41 #include "llvm/Support/SHA1.h"
4042 #include "llvm/Support/raw_ostream.h"
4143 #include
44 #include
4245 #include
4346 #include
4447 #include
173176 STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
174177 STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
175178 STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
179 STRINGIFY_CODE(MODULE_CODE, HASH)
176180 }
177181 case bitc::IDENTIFICATION_BLOCK_ID:
178182 switch (CodeID) {
291295 default:
292296 return nullptr;
293297 STRINGIFY_CODE(MST_CODE, ENTRY)
298 STRINGIFY_CODE(MST_CODE, HASH)
294299 }
295300 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
296301 switch (CodeID) {
480485 if (Stream.EnterSubBlock(BlockID, &NumWords))
481486 return Error("Malformed block record");
482487
488 // Keep it for later, when we see a MODULE_HASH record
489 uint64_t BlockEntryPos = Stream.getCurrentByteNo();
490
483491 const char *BlockName = nullptr;
484492 if (DumpRecords) {
485493 outs() << Indent << "<";
551559 ++BlockStats.NumRecords;
552560
553561 StringRef Blob;
562 unsigned CurrentRecordPos = Stream.getCurrentByteNo();
554563 unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
555564
556565 // Increment the # occurrences of this code.
584593
585594 for (unsigned i = 0, e = Record.size(); i != e; ++i)
586595 outs() << " op" << i << "=" << (int64_t)Record[i];
596
597 // If we found a module hash, let's verify that it matches!
598 if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) {
599 if (Record.size() != 5)
600 outs() << " (invalid)";
601 else {
602 // Recompute the hash and compare it to the one in the bitcode
603 SHA1 Hasher;
604 StringRef Hash;
605 {
606 int BlockSize = CurrentRecordPos - BlockEntryPos;
607 auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
608 Hasher.update(ArrayRef(Ptr, BlockSize));
609 Hash = Hasher.result();
610 }
611 SmallString<20> RecordedHash;
612 RecordedHash.resize(20);
613 int Pos = 0;
614 for (auto &Val : Record) {
615 assert(!(Val >> 32) && "Unexpected high bits set");
616 RecordedHash[Pos++] = (Val >> 24) & 0xFF;
617 RecordedHash[Pos++] = (Val >> 16) & 0xFF;
618 RecordedHash[Pos++] = (Val >> 8) & 0xFF;
619 RecordedHash[Pos++] = (Val >> 0) & 0xFF;
620 }
621 if (Hash == RecordedHash)
622 outs() << " (match)";
623 else
624 outs() << " (!mismatch!)";
625 }
626 }
587627
588628 outs() << "/>";
589629