llvm.org GIT mirror llvm / 89bf969
Add an index for Module Metadata record in the bitcode This index record the position for each metadata record in the bitcode, so that the reader will be able to lazy-load on demand each individual record. We also make sure that every abbrev is emitted upfront so that the block can be skipped while reading. I don't plan to commit this before having the reader counterpart, but I figured this can be reviewed mostly independently. Recommit r290684 (was reverted in r290686 because a test was broken) after adding a threshold to avoid emitting the index when unnecessary (little amount of metadata). This optimization "hides" a limitation of the ability to backpatch in the bitstream: we can only backpatch safely when the position has been flushed. So if we emit an index for one metadata, it is possible that (part of) the offset placeholder hasn't been flushed and the backpatch will fail. Differential Revision: https://reviews.llvm.org/D28083 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290690 91177308-0d34-0410-b5e6-96231b3b80d8 Mehdi Amini 2 years ago
9 changed file(s) with 174 addition(s) and 35 deletion(s). Raw diff Collapse all Expand all
111111 &Out[ByteNo], NewWord, BitNo & 7);
112112 }
113113
114 void BackpatchWord64(uint64_t BitNo, uint64_t Val) {
115 BackpatchWord(BitNo, (uint32_t)Val);
116 BackpatchWord(BitNo + 32, (uint32_t)(Val >> 32));
117 }
118
114119 void Emit(uint32_t Val, unsigned NumBits) {
115120 assert(NumBits && NumBits <= 32 && "Invalid value size!");
116121 assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
278283 default: llvm_unreachable("Unknown encoding!");
279284 case BitCodeAbbrevOp::Fixed:
280285 if (Op.getEncodingData())
281 Emit((unsigned)V, (unsigned)Op.getEncodingData());
286 Emit64(V, (unsigned)Op.getEncodingData());
282287 break;
283288 case BitCodeAbbrevOp::VBR:
284289 if (Op.getEncodingData())
217217 };
218218
219219 enum MetadataCodes {
220 METADATA_STRING_OLD = 1, // MDSTRING: [values]
221 METADATA_VALUE = 2, // VALUE: [type num, value num]
222 METADATA_NODE = 3, // NODE: [n x md num]
223 METADATA_NAME = 4, // STRING: [values]
224 METADATA_DISTINCT_NODE = 5, // DISTINCT_NODE: [n x md num]
225 METADATA_KIND = 6, // [n x [id, name]]
226 METADATA_LOCATION = 7, // [distinct, line, col, scope, inlined-at?]
227 METADATA_OLD_NODE = 8, // OLD_NODE: [n x (type num, value num)]
228 METADATA_OLD_FN_NODE = 9, // OLD_FN_NODE: [n x (type num, value num)]
229 METADATA_NAMED_NODE = 10, // NAMED_NODE: [n x mdnodes]
230 METADATA_ATTACHMENT = 11, // [m x [value, [n x [id, mdnode]]]
231 METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num]
232 METADATA_SUBRANGE = 13, // [distinct, count, lo]
233 METADATA_ENUMERATOR = 14, // [distinct, value, name]
234 METADATA_BASIC_TYPE = 15, // [distinct, tag, name, size, align, enc]
235 METADATA_FILE = 16, // [distinct, filename, directory, checksumkind, checksum]
236 METADATA_DERIVED_TYPE = 17, // [distinct, ...]
237 METADATA_COMPOSITE_TYPE = 18, // [distinct, ...]
238 METADATA_SUBROUTINE_TYPE = 19, // [distinct, flags, types, cc]
239 METADATA_COMPILE_UNIT = 20, // [distinct, ...]
240 METADATA_SUBPROGRAM = 21, // [distinct, ...]
241 METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column]
220 METADATA_STRING_OLD = 1, // MDSTRING: [values]
221 METADATA_VALUE = 2, // VALUE: [type num, value num]
222 METADATA_NODE = 3, // NODE: [n x md num]
223 METADATA_NAME = 4, // STRING: [values]
224 METADATA_DISTINCT_NODE = 5, // DISTINCT_NODE: [n x md num]
225 METADATA_KIND = 6, // [n x [id, name]]
226 METADATA_LOCATION = 7, // [distinct, line, col, scope, inlined-at?]
227 METADATA_OLD_NODE = 8, // OLD_NODE: [n x (type num, value num)]
228 METADATA_OLD_FN_NODE = 9, // OLD_FN_NODE: [n x (type num, value num)]
229 METADATA_NAMED_NODE = 10, // NAMED_NODE: [n x mdnodes]
230 METADATA_ATTACHMENT = 11, // [m x [value, [n x [id, mdnode]]]
231 METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num]
232 METADATA_SUBRANGE = 13, // [distinct, count, lo]
233 METADATA_ENUMERATOR = 14, // [distinct, value, name]
234 METADATA_BASIC_TYPE = 15, // [distinct, tag, name, size, align, enc]
235 METADATA_FILE = 16, // [distinct, filename, directory, checksumkind, checksum]
236 METADATA_DERIVED_TYPE = 17, // [distinct, ...]
237 METADATA_COMPOSITE_TYPE = 18, // [distinct, ...]
238 METADATA_SUBROUTINE_TYPE = 19, // [distinct, flags, types, cc]
239 METADATA_COMPILE_UNIT = 20, // [distinct, ...]
240 METADATA_SUBPROGRAM = 21, // [distinct, ...]
241 METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column]
242242 METADATA_LEXICAL_BLOCK_FILE = 23, //[distinct, scope, file, discriminator]
243 METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols]
243 METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols]
244244 METADATA_TEMPLATE_TYPE = 25, // [distinct, scope, name, type, ...]
245245 METADATA_TEMPLATE_VALUE = 26, // [distinct, scope, name, type, value, ...]
246246 METADATA_GLOBAL_VAR = 27, // [distinct, ...]
253253 METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...]
254254 METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars])
255255 METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]]
256 METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr]
256 METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr]
257 METADATA_INDEX_OFFSET = 38, // [offset]
258 METADATA_INDEX = 39, // [bitpos]
257259 };
258260
259261 // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
3737 using namespace llvm;
3838
3939 namespace {
40
41 cl::opt
42 IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25),
43 cl::desc("Number of metadatas above which we emit an index "
44 "to enable lazy-loading"));
4045 /// These are manifest constants used by the bitcode writer. They do not need to
4146 /// be kept in sync with the reader, but need to be consistent within this file.
4247 enum {
223228 void writeMetadataStrings(ArrayRef Strings,
224229 SmallVectorImpl &Record);
225230 void writeMetadataRecords(ArrayRef MDs,
226 SmallVectorImpl &Record);
231 SmallVectorImpl &Record,
232 std::vector *MDAbbrevs = nullptr,
233 std::vector *IndexPos = nullptr);
227234 void writeModuleMetadata();
228235 void writeFunctionMetadata(const Function &F);
229236 void writeFunctionMetadataAttachment(const Function &F);
18531860 Record.clear();
18541861 }
18551862
1863 // Generates an enum to use as an index in the Abbrev array of Metadata record.
1864 enum MetadataAbbrev : unsigned {
1865 #define HANDLE_MDNODE_LEAF(CLASS) CLASS##AbbrevID,
1866 #include "llvm/IR/Metadata.def"
1867 LastPlusOne
1868 };
1869
18561870 void ModuleBitcodeWriter::writeMetadataRecords(
1857 ArrayRef MDs, SmallVectorImpl &Record) {
1871 ArrayRef MDs, SmallVectorImpl &Record,
1872 std::vector *MDAbbrevs, std::vector *IndexPos) {
18581873 if (MDs.empty())
18591874 return;
18601875
18631878 #include "llvm/IR/Metadata.def"
18641879
18651880 for (const Metadata *MD : MDs) {
1881 if (IndexPos)
1882 IndexPos->push_back(Stream.GetCurrentBitNo());
18661883 if (const MDNode *N = dyn_cast(MD)) {
18671884 assert(N->isResolved() && "Expected forward references to be resolved");
18681885
18711888 llvm_unreachable("Invalid MDNode subclass");
18721889 #define HANDLE_MDNODE_LEAF(CLASS) \
18731890 case Metadata::CLASS##Kind: \
1874 write##CLASS(cast(N), Record, CLASS##Abbrev); \
1891 if (MDAbbrevs) \
1892 write##CLASS(cast(N), Record, \
1893 (*MDAbbrevs)[MetadataAbbrev::CLASS##AbbrevID]); \
1894 else \
1895 write##CLASS(cast(N), Record, CLASS##Abbrev); \
18751896 continue;
18761897 #include "llvm/IR/Metadata.def"
18771898 }
18841905 if (!VE.hasMDs() && M.named_metadata_empty())
18851906 return;
18861907
1887 Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
1908 Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 4);
18881909 SmallVector Record;
1910
1911 // Emit all abbrevs upfront, so that the reader can jump in the middle of the
1912 // block and load any metadata.
1913 std::vector MDAbbrevs;
1914
1915 MDAbbrevs.resize(MetadataAbbrev::LastPlusOne);
1916 MDAbbrevs[MetadataAbbrev::DILocationAbbrevID] = createDILocationAbbrev();
1917 MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] =
1918 createGenericDINodeAbbrev();
1919
1920 BitCodeAbbrev *Abbv = new BitCodeAbbrev();
1921 Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX_OFFSET));
1922 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 64));
1923 unsigned OffsetAbbrev = Stream.EmitAbbrev(Abbv);
1924
1925 Abbv = new BitCodeAbbrev();
1926 Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX));
1927 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
1928 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
1929 unsigned IndexAbbrev = Stream.EmitAbbrev(Abbv);
1930
1931 // Emit MDStrings together upfront.
18891932 writeMetadataStrings(VE.getMDStrings(), Record);
1890 writeMetadataRecords(VE.getNonMDStrings(), Record);
1933
1934 // We only emit an index for the metadata record if we have more than a given
1935 // (naive) threshold of metadatas, otherwise it is not worth it.
1936 if (VE.getNonMDStrings().size() > IndexThreshold) {
1937 // Write a placeholder value in for the offset of the metadata index,
1938 // which is written after the records, so that it can include
1939 // the offset of each entry. The placeholder offset will be
1940 // updated after all records are emitted.
1941 uint64_t Vals[] = {0};
1942 Stream.EmitRecord(bitc::METADATA_INDEX_OFFSET, Vals, OffsetAbbrev);
1943 }
1944
1945 // Compute and save the bit offset to the current position, which will be
1946 // patched when we emit the index later. We can simply subtract the 64-bit
1947 // fixed size from the current bit number to get the location to backpatch.
1948 uint64_t IndexOffsetRecordBitPos = Stream.GetCurrentBitNo();
1949
1950 // This index will contain the bitpos for each individual record.
1951 std::vector IndexPos;
1952 IndexPos.reserve(VE.getNonMDStrings().size());
1953
1954 // Write all the records
1955 writeMetadataRecords(VE.getNonMDStrings(), Record, &MDAbbrevs, &IndexPos);
1956
1957 if (VE.getNonMDStrings().size() > IndexThreshold) {
1958 // Now that we have emitted all the records we will emit the index. But
1959 // first
1960 // backpatch the forward reference so that the reader can skip the records
1961 // efficiently.
1962 Stream.BackpatchWord64(IndexOffsetRecordBitPos - 64,
1963 Stream.GetCurrentBitNo() - IndexOffsetRecordBitPos);
1964
1965 // Delta encode the index.
1966 uint64_t PreviousValue = IndexOffsetRecordBitPos;
1967 for (auto &Elt : IndexPos) {
1968 auto EltDelta = Elt - PreviousValue;
1969 PreviousValue = Elt;
1970 Elt = EltDelta;
1971 }
1972 // Emit the index record.
1973 Stream.EmitRecord(bitc::METADATA_INDEX, IndexPos, IndexAbbrev);
1974 IndexPos.clear();
1975 }
1976
1977 // Write the named metadata now.
18911978 writeNamedMetadata(Record);
18921979
18931980 auto AddDeclAttachedMetadata = [&](const GlobalObject &GO) {
None ; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s
0 ; RUN: llvm-as <%s -bitcode-mdindex-threshold=0 | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=CHECK -check-prefix=MDINDEX
1 ; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=CHECK
12 ; Check that distinct nodes are emitted in post-order to avoid unnecessary
23 ; forward references.
34
1617 ; CHECK-NEXT:
1718 !4 = distinct !{!1, !3, !2}
1819
20 ; Before the named records we emit the index containing the position of the
21 ; previously emitted records, but only if we have a number of record above
22 ; a threshold (can be controlled through `-bitcode-mdindex-threshold`).
23 ; MDINDEX:
24
1925 ; Note: named metadata nodes are not cannot reference null so their operands
2026 ; are numbered off-by-one.
2127 ; CHECK-NEXT:
None ; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s
0 ; RUN: llvm-as <%s -bitcode-mdindex-threshold=0 | llvm-bcanalyzer -dump | FileCheck %s
11 ; Check that distinct nodes break uniquing cycles, so that uniqued subgraphs
22 ; are always in post-order.
33 ;
2121 ; CHECK-NEXT:
2222 !3 = !{!2}
2323
24 ; Before the named records we emit the index containing the position of the
25 ; previously emitted records
26 ; CHECK-NEXT:
27
2428 ; Note: named metadata nodes are not cannot reference null so their operands
2529 ; are numbered off-by-one.
2630 ; CHECK-NEXT:
None ; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s
0 ; RUN: llvm-as <%s -bitcode-mdindex-threshold=0 | llvm-bcanalyzer -dump | FileCheck %s
11 ; Check that distinct nodes are emitted before uniqued nodes, even if that
22 ; breaks post-order traversals.
33
1010 ; CHECK-NEXT:
1111 !2 = !{!1}
1212
13 ; Before the named records we emit the index containing the position of the
14 ; previously emitted records
15 ; CHECK-NEXT:
16
1317 ; Note: named metadata nodes are not cannot reference null so their operands
1418 ; are numbered off-by-one.
1519 ; CHECK-NEXT:
None ; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s
0 ; RUN: llvm-as <%s -bitcode-mdindex-threshold=0 | llvm-bcanalyzer -dump | FileCheck %s
11 ; Check that nodes are emitted in post-order to minimize the need for temporary
22 ; nodes. The graph structure is designed to foil naive implementations of
33 ; iteratitive post-order traersals: the leaves, !3 and !4, are reachable from
1414 ; CHECK-NEXT: 'leaf
1515 ; CHECK-NEXT: }
1616
17 ; Before the records we emit an offset to the index for the block
18 ; CHECK-NEXT:
19
1720 ; The leafs should come first (in either order).
1821 ; CHECK-NEXT:
1922 ; CHECK-NEXT:
2629 ; CHECK-NEXT:
2730 !6 = !{!3, !5, !4}
2831
32 ; Before the named records we emit the index containing the position of the
33 ; previously emitted records
34 ; CHECK-NEXT:
35
2936 ; Note: named metadata nodes are not cannot reference null so their operands
3037 ; are numbered off-by-one.
3138 ; CHECK-NEXT:
None ; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s
0 ; RUN: llvm-as < %s -bitcode-mdindex-threshold=0 | llvm-bcanalyzer -dump | FileCheck %s
11 ; Test that metadata only used by a single function is serialized in that
22 ; function instead of in the global pool.
33 ;
1717
1818 ; Each node gets a new number. Bottom-up traversal of nodes.
1919 !named = !{!6}
20
21 ; Before the records we emit an offset to the index for the block
22 ; CHECK-NEXT:
2023
2124 ; CHECK-NEXT:
2225 !4 = !{!"named"}
352352 STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
353353 STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
354354 STRINGIFY_CODE(METADATA, MODULE)
355 STRINGIFY_CODE(METADATA, INDEX_OFFSET)
356 STRINGIFY_CODE(METADATA, INDEX)
355357 }
356358 case bitc::METADATA_KIND_BLOCK_ID:
357359 switch (CodeID) {
512514 }
513515
514516 SmallVector Record;
517
518 // Keep the offset to the metadata index if seen.
519 uint64_t MetadataIndexOffset = 0;
515520
516521 // Read all the records for this block.
517522 while (1) {
598603
599604 for (unsigned i = 0, e = Record.size(); i != e; ++i)
600605 outs() << " op" << i << "=" << (int64_t)Record[i];
606
607 // If we found a metadata index, let's verify that we had an offset before
608 // and validate its forward reference offset was correct!
609 if (BlockID == bitc::METADATA_BLOCK_ID) {
610 if (Code == bitc::METADATA_INDEX_OFFSET) {
611 MetadataIndexOffset = Stream.GetCurrentBitNo() + Record[0];
612 }
613 if (Code == bitc::METADATA_INDEX) {
614 outs() << " (offset ";
615 if (MetadataIndexOffset == RecordStartBit)
616 outs() << "match)";
617 else
618 outs() << "mismatch: " << MetadataIndexOffset << " vs "
619 << RecordStartBit << ")";
620 }
621 }
601622
602623 // If we found a module hash, let's verify that it matches!
603624 if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) {