llvm.org GIT mirror llvm / 0995a92
[PDB] Make our PDBs look more like MS PDBs. When investigating bugs in PDB generation, the first step is often to do the same link with link.exe and then compare PDBs. But comparing PDBs is hard because two completely different byte sequences can both be correct, so it hampers the investigation when you also have to spend time figuring out not just which bytes are different, but also if the difference is meaningful. This patch fixes a couple of cases related to string table emission, hash table emission, and the order in which we emit strings that makes more of our bytes the same as the bytes generated by MS PDBs. Differential Revision: https://reviews.llvm.org/D44810 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@328348 91177308-0d34-0410-b5e6-96231b3b80d8 Zachary Turner 1 year, 5 months ago
15 changed file(s) with 261 addition(s) and 86 deletion(s). Raw diff Collapse all Expand all
8181
8282 StringMap::const_iterator end() const { return StringToId.end(); }
8383
84 std::vector sortedIds() const;
85
8486 private:
8587 DenseMap IdToString;
8688 StringMap StringToId;
120120 MutableBinaryByteStream FileInfoBuffer;
121121 std::vector SectionContribs;
122122 ArrayRef SectionMap;
123 llvm::SmallVector, (int)DbgHeaderType::Max> DbgStreams;
123 std::array, (int)DbgHeaderType::Max> DbgStreams;
124124 };
125125 }
126126 }
303303
304304 void grow() {
305305 uint32_t S = size();
306 uint32_t MaxLoad = maxLoad(capacity());
306307 if (S < maxLoad(capacity()))
307308 return;
308309 assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
309310
310 uint32_t NewCapacity =
311 (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX;
311 uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
312312
313313 // Growing requires rebuilding the table and re-hashing every item. Make a
314314 // copy with a larger capacity, insert everything into the copy, then swap
5353 Error commit(StringRef Filename);
5454
5555 Expected getNamedStreamIndex(StringRef Name) const;
56 Error addNamedStream(StringRef Name, uint32_t Size);
56 Error addNamedStream(StringRef Name, StringRef Data);
5757
5858 private:
5959 Expected finalizeMsfLayout();
60 Expected allocateNamedStream(StringRef Name, uint32_t Size);
6061
6162 void commitFpm(WritableBinaryStream &MsfBuffer, const msf::MSFLayout &Layout);
6263
7172
7273 PDBStringTableBuilder Strings;
7374 NamedStreamMap NamedStreams;
75 DenseMap NamedStreamData;
7476 };
7577 }
7678 }
8585
8686 uint32_t DebugStringTableSubsection::size() const { return StringToId.size(); }
8787
88 std::vector DebugStringTableSubsection::sortedIds() const {
89 std::vector Result;
90 Result.reserve(IdToString.size());
91 for (const auto &Entry : IdToString)
92 Result.push_back(Entry.first);
93 std::sort(Result.begin(), Result.end());
94 return Result;
95 }
96
8897 uint32_t DebugStringTableSubsection::getIdForString(StringRef S) const {
8998 auto Iter = StringToId.find(S);
9099 assert(Iter != StringToId.end());
2626 DbiStreamBuilder::DbiStreamBuilder(msf::MSFBuilder &Msf)
2727 : Msf(Msf), Allocator(Msf.getAllocator()), Age(1), BuildNumber(0),
2828 PdbDllVersion(0), PdbDllRbld(0), Flags(0), MachineType(PDB_Machine::x86),
29 Header(nullptr), DbgStreams((int)DbgHeaderType::Max) {}
29 Header(nullptr) {}
3030
3131 DbiStreamBuilder::~DbiStreamBuilder() {}
3232
6262
6363 Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type,
6464 ArrayRef Data) {
65 if (DbgStreams[(int)Type].StreamNumber != kInvalidStreamIndex)
66 return make_error(raw_error_code::duplicate_entry,
67 "The specified stream type already exists");
68 auto ExpectedIndex = Msf.addStream(Data.size());
69 if (!ExpectedIndex)
70 return ExpectedIndex.takeError();
71 uint32_t Index = std::move(*ExpectedIndex);
72 DbgStreams[(int)Type].Data = Data;
73 DbgStreams[(int)Type].StreamNumber = Index;
65 DbgStreams[(int)Type].emplace();
66 DbgStreams[(int)Type]->Data = Data;
7467 return Error::success();
7568 }
7669
265258 }
266259
267260 Error DbiStreamBuilder::finalizeMsfLayout() {
261 for (auto &S : DbgStreams) {
262 if (!S.hasValue())
263 continue;
264 auto ExpectedIndex = Msf.addStream(S->Data.size());
265 if (!ExpectedIndex)
266 return ExpectedIndex.takeError();
267 S->StreamNumber = *ExpectedIndex;
268 }
269
268270 for (auto &MI : ModiList) {
269271 if (auto EC = MI->finalizeMsfLayout())
270272 return EC;
374376 if (auto EC = ECNamesBuilder.commit(Writer))
375377 return EC;
376378
377 for (auto &Stream : DbgStreams)
378 if (auto EC = Writer.writeInteger(Stream.StreamNumber))
379 return EC;
380
381379 for (auto &Stream : DbgStreams) {
382 if (Stream.StreamNumber == kInvalidStreamIndex)
380 uint16_t StreamNumber = kInvalidStreamIndex;
381 if (Stream.hasValue())
382 StreamNumber = Stream->StreamNumber;
383 if (auto EC = Writer.writeInteger(StreamNumber))
384 return EC;
385 }
386
387 for (auto &Stream : DbgStreams) {
388 if (!Stream.hasValue())
383389 continue;
390 assert(Stream->StreamNumber != kInvalidStreamIndex);
391
384392 auto WritableStream = WritableMappedBlockStream::createIndexedStream(
385 Layout, MsfBuffer, Stream.StreamNumber, Allocator);
393 Layout, MsfBuffer, Stream->StreamNumber, Allocator);
386394 BinaryStreamWriter DbgStreamWriter(*WritableStream);
387 if (auto EC = DbgStreamWriter.writeArray(Stream.Data))
395 if (auto EC = DbgStreamWriter.writeArray(Stream->Data))
388396 return EC;
389397 }
390398
149149 PSH->finalizeBuckets(PSHZero);
150150 GSH->finalizeBuckets(GSHZero);
151151
152 Expected Idx = Msf.addStream(calculatePublicsHashStreamSize());
152 Expected Idx = Msf.addStream(calculateGlobalsHashStreamSize());
153 if (!Idx)
154 return Idx.takeError();
155 GSH->StreamIndex = *Idx;
156 Idx = Msf.addStream(calculatePublicsHashStreamSize());
153157 if (!Idx)
154158 return Idx.takeError();
155159 PSH->StreamIndex = *Idx;
156 Idx = Msf.addStream(calculateGlobalsHashStreamSize());
157 if (!Idx)
158 return Idx.takeError();
159 GSH->StreamIndex = *Idx;
160160
161161 uint32_t RecordBytes =
162162 GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize();
7272 if (auto EC = Writer.writeEnum(E))
7373 return EC;
7474 }
75 assert(Writer.bytesRemaining() == 0);
7576 return Error::success();
7677 }
4646 }
4747
4848 NamedStreamMap::NamedStreamMap()
49 : HashTraits(*this), OffsetIndexMap(HashTraits) {}
49 : HashTraits(*this), OffsetIndexMap(1, HashTraits) {}
5050
5151 Error NamedStreamMap::load(BinaryStreamReader &Stream) {
5252 uint32_t StringBufferSize;
7979 return *Gsi;
8080 }
8181
82 Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) {
82 Expected PDBFileBuilder::allocateNamedStream(StringRef Name,
83 uint32_t Size) {
8384 auto ExpectedStream = Msf->addStream(Size);
84 if (!ExpectedStream)
85 return ExpectedStream.takeError();
86 NamedStreams.set(Name, *ExpectedStream);
85 if (ExpectedStream)
86 NamedStreams.set(Name, *ExpectedStream);
87 return ExpectedStream;
88 }
89
90 Error PDBFileBuilder::addNamedStream(StringRef Name, StringRef Data) {
91 Expected ExpectedIndex = allocateNamedStream(Name, Data.size());
92 if (!ExpectedIndex)
93 return ExpectedIndex.takeError();
94 assert(NamedStreamData.count(*ExpectedIndex) == 0);
95 NamedStreamData[*ExpectedIndex] = Data;
8796 return Error::success();
8897 }
8998
100109
101110 uint32_t StringsLen = Strings.calculateSerializedSize();
102111
103 if (auto EC = addNamedStream("/names", StringsLen))
104 return std::move(EC);
105 if (auto EC = addNamedStream("/LinkInfo", 0))
106 return std::move(EC);
107
108 if (Info) {
109 if (auto EC = Info->finalizeMsfLayout())
110 return std::move(EC);
111 }
112 if (Dbi) {
113 if (auto EC = Dbi->finalizeMsfLayout())
114 return std::move(EC);
115 }
116 if (Tpi) {
117 if (auto EC = Tpi->finalizeMsfLayout())
118 return std::move(EC);
119 }
120 if (Ipi) {
121 if (auto EC = Ipi->finalizeMsfLayout())
122 return std::move(EC);
123 }
112 Expected SN = allocateNamedStream("/LinkInfo", 0);
113 if (!SN)
114 return SN.takeError();
115
124116 if (Gsi) {
125117 if (auto EC = Gsi->finalizeMsfLayout())
126118 return std::move(EC);
129121 Dbi->setGlobalsStreamIndex(Gsi->getGlobalsStreamIndex());
130122 Dbi->setSymbolRecordStreamIndex(Gsi->getRecordStreamIdx());
131123 }
124 }
125 if (Tpi) {
126 if (auto EC = Tpi->finalizeMsfLayout())
127 return std::move(EC);
128 }
129 if (Dbi) {
130 if (auto EC = Dbi->finalizeMsfLayout())
131 return std::move(EC);
132 }
133 SN = allocateNamedStream("/names", StringsLen);
134 if (!SN)
135 return SN.takeError();
136
137 if (Ipi) {
138 if (auto EC = Ipi->finalizeMsfLayout())
139 return std::move(EC);
140 }
141
142 // Do this last, since it relies on the named stream map being complete, and
143 // that can be updated by previous steps in the finalization.
144 if (Info) {
145 if (auto EC = Info->finalizeMsfLayout())
146 return std::move(EC);
132147 }
133148
134149 return Msf->build();
218233 if (auto EC = Strings.commit(NSWriter))
219234 return EC;
220235
236 for (const auto &NSE : NamedStreamData) {
237 if (NSE.second.empty())
238 continue;
239
240 auto NS = WritableMappedBlockStream::createIndexedStream(
241 Layout, Buffer, NSE.first, Allocator);
242 BinaryStreamWriter NSW(*NS);
243 if (auto EC = NSW.writeBytes(arrayRefFromStringRef(NSE.second)))
244 return EC;
245 }
246
221247 if (Info) {
222248 if (auto EC = Info->commit(Layout, Buffer))
223249 return EC;
1313 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
1414 #include "llvm/Support/BinaryStreamWriter.h"
1515 #include "llvm/Support/Endian.h"
16
17 #include
1618
1719 using namespace llvm;
1820 using namespace llvm::msf;
3234 return Strings.getStringForId(Id);
3335 }
3436
37 // This is a precomputed list of Buckets given the specified number of
38 // strings. Matching the reference algorithm exactly is not strictly
39 // necessary for correctness, but it helps when comparing LLD's PDBs with
40 // Microsoft's PDBs so as to eliminate superfluous differences.
41 static std::map StringsToBuckets = {
42 {1, 2},
43 {2, 4},
44 {4, 7},
45 {6, 11},
46 {9, 17},
47 {13, 26},
48 {20, 40},
49 {31, 61},
50 {46, 92},
51 {70, 139},
52 {105, 209},
53 {157, 314},
54 {236, 472},
55 {355, 709},
56 {532, 1064},
57 {799, 1597},
58 {1198, 2396},
59 {1798, 3595},
60 {2697, 5393},
61 {4045, 8090},
62 {6068, 12136},
63 {9103, 18205},
64 {13654, 27308},
65 {20482, 40963},
66 {30723, 61445},
67 {46084, 92168},
68 {69127, 138253},
69 {103690, 207380},
70 {155536, 311071},
71 {233304, 466607},
72 {349956, 699911},
73 {524934, 1049867},
74 {787401, 1574801},
75 {1181101, 2362202},
76 {1771652, 3543304},
77 {2657479, 5314957},
78 {3986218, 7972436},
79 {5979328, 11958655},
80 {8968992, 17937983},
81 {13453488, 26906975},
82 {20180232, 40360463},
83 {30270348, 60540695},
84 {45405522, 90811043},
85 {68108283, 136216565},
86 {102162424, 204324848},
87 {153243637, 306487273},
88 {229865455, 459730910},
89 {344798183, 689596366},
90 {517197275, 1034394550},
91 {775795913, 1551591826}};
92
3593 static uint32_t computeBucketCount(uint32_t NumStrings) {
36 // The /names stream is basically an on-disk open-addressing hash table.
37 // Hash collisions are resolved by linear probing. We cannot make
38 // utilization 100% because it will make the linear probing extremely
39 // slow. But lower utilization wastes disk space. As a reasonable
40 // load factor, we choose 80%. We need +1 because slot 0 is reserved.
41 return (NumStrings + 1) * 1.25;
94 auto Entry = StringsToBuckets.lower_bound(NumStrings);
95 assert(Entry != StringsToBuckets.end());
96 return Entry->second;
4297 }
4398
4499 uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
8989 P.NewLine();
9090 }
9191
92 if (opts::dump::DumpStringTable) {
92 if (opts::dump::DumpNamedStreams) {
93 if (auto EC = dumpNamedStreams())
94 return EC;
95 P.NewLine();
96 }
97
98 if (opts::dump::DumpStringTable || opts::dump::DumpStringTableDetails) {
9399 if (auto EC = dumpStringTable())
94100 return EC;
95101 P.NewLine();
856862 return Error::success();
857863 }
858864
859 if (IS->name_ids().empty()) {
860 P.formatLine("Empty");
861 return Error::success();
862 }
863
864 auto MaxID = std::max_element(IS->name_ids().begin(), IS->name_ids().end());
865 uint32_t Digits = NumDigits(*MaxID);
866
867 P.formatLine("{0} | {1}", fmt_align("ID", AlignStyle::Right, Digits),
868 "String");
869
870 std::vector SortedIDs(IS->name_ids().begin(), IS->name_ids().end());
871 std::sort(SortedIDs.begin(), SortedIDs.end());
872 for (uint32_t I : SortedIDs) {
873 auto ES = IS->getStringForID(I);
874 llvm::SmallString<32> Str;
875 if (!ES) {
876 consumeError(ES.takeError());
877 Str = "Error reading string";
878 } else if (!ES->empty()) {
879 Str.append("'");
880 Str.append(*ES);
881 Str.append("'");
882 }
883
884 if (!Str.empty())
885 P.formatLine("{0} | {1}", fmt_align(I, AlignStyle::Right, Digits), Str);
865 if (opts::dump::DumpStringTable) {
866 if (IS->name_ids().empty())
867 P.formatLine("Empty");
868 else {
869 auto MaxID =
870 std::max_element(IS->name_ids().begin(), IS->name_ids().end());
871 uint32_t Digits = NumDigits(*MaxID);
872
873 P.formatLine("{0} | {1}", fmt_align("ID", AlignStyle::Right, Digits),
874 "String");
875
876 std::vector SortedIDs(IS->name_ids().begin(),
877 IS->name_ids().end());
878 std::sort(SortedIDs.begin(), SortedIDs.end());
879 for (uint32_t I : SortedIDs) {
880 auto ES = IS->getStringForID(I);
881 llvm::SmallString<32> Str;
882 if (!ES) {
883 consumeError(ES.takeError());
884 Str = "Error reading string";
885 } else if (!ES->empty()) {
886 Str.append("'");
887 Str.append(*ES);
888 Str.append("'");
889 }
890
891 if (!Str.empty())
892 P.formatLine("{0} | {1}", fmt_align(I, AlignStyle::Right, Digits),
893 Str);
894 }
895 }
896 }
897
898 if (opts::dump::DumpStringTableDetails) {
899 P.NewLine();
900 {
901 P.printLine("String Table Header:");
902 AutoIndent Indent(P);
903 P.formatLine("Signature: {0}", IS->getSignature());
904 P.formatLine("Hash Version: {0}", IS->getHashVersion());
905 P.formatLine("Name Buffer Size: {0}", IS->getByteSize());
906 P.NewLine();
907 }
908
909 BinaryStreamRef NameBuffer = IS->getStringTable().getBuffer();
910 ArrayRef Contents;
911 cantFail(NameBuffer.readBytes(0, NameBuffer.getLength(), Contents));
912 P.formatBinary("Name Buffer", Contents, 0);
913 P.NewLine();
914 {
915 P.printLine("Hash Table:");
916 AutoIndent Indent(P);
917 P.formatLine("Bucket Count: {0}", IS->name_ids().size());
918 for (const auto &Entry : enumerate(IS->name_ids()))
919 P.formatLine("Bucket[{0}] : {1}", Entry.index(),
920 uint32_t(Entry.value()));
921 P.formatLine("Name Count: {0}", IS->getNameCount());
922 }
886923 }
887924 return Error::success();
888925 }
905942 Str);
906943 }
907944 });
945 return Error::success();
946 }
947
948 Error DumpOutputStyle::dumpNamedStreams() {
949 printHeader(P, "Named Streams");
950 AutoIndent Indent(P, 2);
951
952 if (File.isObj()) {
953 P.formatLine("Dumping Named Streams is only supported for PDB files.");
954 return Error::success();
955 }
956 ExitOnError Err("Invalid PDB File: ");
957
958 auto &IS = Err(File.pdb().getPDBInfoStream());
959 const NamedStreamMap &NS = IS.getNamedStreams();
960 for (const auto &Entry : NS.entries()) {
961 P.printLine(Entry.getKey());
962 AutoIndent Indent2(P, 2);
963 P.formatLine("Index: {0}", Entry.getValue());
964 P.formatLine("Size in bytes: {0}",
965 File.pdb().getStreamByteSize(Entry.getValue()));
966 }
967
908968 return Error::success();
909969 }
910970
7373 Error dumpStreamSummary();
7474 Error dumpSymbolStats();
7575 Error dumpUdtStats();
76 Error dumpNamedStreams();
7677 Error dumpStringTable();
7778 Error dumpStringTableFromPdb();
7879 Error dumpStringTableFromObj();
533533 cl::cat(FileOptions), cl::sub(DumpSubcommand));
534534
535535 // MISCELLANEOUS OPTIONS
536 cl::opt DumpNamedStreams("named-streams",
537 cl::desc("dump PDB named stream table"),
538 cl::cat(MiscOptions), cl::sub(DumpSubcommand));
539
536540 cl::opt DumpStringTable("string-table", cl::desc("dump PDB String Table"),
537541 cl::cat(MiscOptions), cl::sub(DumpSubcommand));
542 cl::opt DumpStringTableDetails("string-table-details",
543 cl::desc("dump PDB String Table Details"),
544 cl::cat(MiscOptions),
545 cl::sub(DumpSubcommand));
538546
539547 cl::opt DumpSectionContribs("section-contribs",
540548 cl::desc("dump section contributions"),
11981206 opts::dump::DumpStreams = true;
11991207 opts::dump::DumpStreamBlocks = true;
12001208 opts::dump::DumpStringTable = true;
1209 opts::dump::DumpStringTableDetails = true;
12011210 opts::dump::DumpSummary = true;
12021211 opts::dump::DumpSymbols = true;
12031212 opts::dump::DumpSymbolStats = true;
141141 extern llvm::cl::opt DumpInlineeLines;
142142 extern llvm::cl::opt DumpXmi;
143143 extern llvm::cl::opt DumpXme;
144 extern llvm::cl::opt DumpNamedStreams;
144145 extern llvm::cl::opt DumpStringTable;
146 extern llvm::cl::opt DumpStringTableDetails;
145147 extern llvm::cl::opt DumpTypes;
146148 extern llvm::cl::opt DumpTypeData;
147149 extern llvm::cl::opt DumpTypeExtras;