llvm.org GIT mirror llvm / b738d34
Add an (optional) identification block in the bitcode Processing bitcode from a different LLVM version can lead to unexpected behavior. The LLVM project guarantees autoupdating bitcode from a previous minor revision for the same major, but can't make any promise when reading bitcode generated from a either a non-released LLVM, a vendor toolchain, or a "future" LLVM release. This patch aims at being more user-friendly and allows a bitcode produce to emit an optional block at the beginning of the bitcode that will contains an opaque string intended to describe the bitcode producer information. The bitcode reader will dump this information alongside any error it reports. The optional block also includes an "epoch" number, monotonically increasing when incompatible changes are made to the bitcode. The reader will reject bitcode whose epoch is different from the one expected. Differential Revision: http://reviews.llvm.org/D13666 From: Mehdi Amini <mehdi.amini@apple.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251325 91177308-0d34-0410-b5e6-96231b3b80d8 Mehdi Amini 5 years ago
5 changed file(s) with 151 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all
2222 namespace llvm {
2323 namespace bitc {
2424 // The only top-level block type defined is for a module.
25 enum BlockIDs {
26 // Blocks
27 MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID,
28
29 // Module sub-block id's.
30 PARAMATTR_BLOCK_ID,
31 PARAMATTR_GROUP_BLOCK_ID,
32
33 CONSTANTS_BLOCK_ID,
34 FUNCTION_BLOCK_ID,
35
36 UNUSED_ID1,
37
38 VALUE_SYMTAB_BLOCK_ID,
39 METADATA_BLOCK_ID,
40 METADATA_ATTACHMENT_ID,
41
42 TYPE_BLOCK_ID_NEW,
43
44 USELIST_BLOCK_ID,
45
46 MODULE_STRTAB_BLOCK_ID,
47 FUNCTION_SUMMARY_BLOCK_ID,
48
49 OPERAND_BUNDLE_TAGS_BLOCK_ID
50 };
51
25 enum BlockIDs {
26 // Blocks
27 MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID,
28
29 // Module sub-block id's.
30 PARAMATTR_BLOCK_ID,
31 PARAMATTR_GROUP_BLOCK_ID,
32
33 CONSTANTS_BLOCK_ID,
34 FUNCTION_BLOCK_ID,
35
36 // Block intended to contains information on the bitcode versioning.
37 // Can be used to provide better error messages when we fail to parse a
38 // bitcode file.
39 IDENTIFICATION_BLOCK_ID,
40
41 VALUE_SYMTAB_BLOCK_ID,
42 METADATA_BLOCK_ID,
43 METADATA_ATTACHMENT_ID,
44
45 TYPE_BLOCK_ID_NEW,
46
47 USELIST_BLOCK_ID,
48
49 MODULE_STRTAB_BLOCK_ID,
50 FUNCTION_SUMMARY_BLOCK_ID,
51
52 OPERAND_BUNDLE_TAGS_BLOCK_ID
53 };
54
55 /// Idenfitication block contains a string that describes the producer details,
56 /// and an epoch that defines the auto-upgrade capability.
57 enum IdentificationCodes {
58 IDENTIFICATION_CODE_STRING = 1, // IDENTIFICATION: [strchr x N]
59 IDENTIFICATION_CODE_EPOCH = 2, // EPOCH: [epoch#]
60 };
61
62 /// The epoch that defines the auto-upgrade compatibility for the bitcode.
63 ///
64 /// LLVM guarantees in a major release that a minor release can read bitcode
65 /// generated by previous minor releases. We translate this by making the reader
66 /// accepting only bitcode with the same epoch, except for the X.0 release which
67 /// also accepts N-1.
68 enum { BITCODE_CURRENT_EPOCH = 0 };
5269
5370 /// MODULE blocks have a number of optional fields and subblocks.
5471 enum ModuleCodes {
151151 uint64_t LastFunctionBlockBit = 0;
152152 bool SeenValueSymbolTable = false;
153153 unsigned VSTOffset = 0;
154 // Contains an arbitrary and optional string identifying the bitcode producer
155 std::string ProducerIdentification;
154156
155157 std::vector TypeList;
156158 BitcodeReaderValueList ValueList;
272274 void setStripDebugInfo() override;
273275
274276 private:
277 /// Parse the "IDENTIFICATION_BLOCK_ID" block, populate the
278 // ProducerIdentification data member, and do some basic enforcement on the
279 // "epoch" encoded in the bitcode.
280 std::error_code parseBitcodeVersion();
281
275282 std::vector IdentifiedStructTypes;
276283 StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
277284 StructType *createIdentifiedStructType(LLVMContext &Context);
517524 }
518525
519526 std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) {
527 if (!ProducerIdentification.empty()) {
528 Twine MsgWithID = Message + " (Producer: '" + ProducerIdentification +
529 "' Reader: 'LLVM " + LLVM_VERSION_STRING "')";
530 return ::error(DiagnosticHandler, make_error_code(E), MsgWithID);
531 }
520532 return ::error(DiagnosticHandler, make_error_code(E), Message);
521533 }
522534
523535 std::error_code BitcodeReader::error(const Twine &Message) {
536 if (!ProducerIdentification.empty()) {
537 Twine MsgWithID = Message + " (Producer: '" + ProducerIdentification +
538 "' Reader: 'LLVM " + LLVM_VERSION_STRING "')";
539 return ::error(DiagnosticHandler,
540 make_error_code(BitcodeError::CorruptedBitcode), MsgWithID);
541 }
524542 return ::error(DiagnosticHandler,
525543 make_error_code(BitcodeError::CorruptedBitcode), Message);
526544 }
30603078 }
30613079 }
30623080
3081 std::error_code BitcodeReader::parseBitcodeVersion() {
3082 if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
3083 return error("Invalid record");
3084
3085 // Read all the records.
3086 SmallVector Record;
3087 while (1) {
3088 BitstreamEntry Entry = Stream.advance();
3089
3090 switch (Entry.Kind) {
3091 default:
3092 case BitstreamEntry::Error:
3093 return error("Malformed block");
3094 case BitstreamEntry::EndBlock:
3095 return std::error_code();
3096 case BitstreamEntry::Record:
3097 // The interesting case.
3098 break;
3099 }
3100
3101 // Read a record.
3102 Record.clear();
3103 unsigned BitCode = Stream.readRecord(Entry.ID, Record);
3104 switch (BitCode) {
3105 default: // Default behavior: reject
3106 return error("Invalid value");
3107 case bitc::IDENTIFICATION_CODE_STRING: { // IDENTIFICATION: [strchr x
3108 // N]
3109 convertToString(Record, 0, ProducerIdentification);
3110 break;
3111 }
3112 case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#]
3113 unsigned epoch = (unsigned)Record[0];
3114 if (epoch != bitc::BITCODE_CURRENT_EPOCH) {
3115 auto BitcodeEpoch = std::to_string(epoch);
3116 auto CurrentEpoch = std::to_string(bitc::BITCODE_CURRENT_EPOCH);
3117 return error(Twine("Incompatible epoch: Bitcode '") + BitcodeEpoch +
3118 "' vs current: '" + CurrentEpoch + "'");
3119 }
3120 }
3121 }
3122 }
3123 }
3124
30633125 std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
30643126 bool ShouldLazyLoadMetadata) {
30653127 if (ResumeBit)
35513613 if (Entry.Kind != BitstreamEntry::SubBlock)
35523614 return error("Malformed block");
35533615
3616 if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
3617 parseBitcodeVersion();
3618 continue;
3619 }
3620
35543621 if (Entry.ID == bitc::MODULE_BLOCK_ID)
35553622 return parseModule(0, ShouldLazyLoadMetadata);
35563623
28282828 Stream.ExitBlock();
28292829 }
28302830
2831 // Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
2832 // current llvm version, and a record for the epoch number.
2833 static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) {
2834 Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);
2835
2836 // Write the "user readable" string identifying the bitcode producer
2837 BitCodeAbbrev *Abbv = new BitCodeAbbrev();
2838 Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING));
2839 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
2840 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
2841 auto StringAbbrev = Stream.EmitAbbrev(Abbv);
2842 WriteStringRecord(bitc::IDENTIFICATION_CODE_STRING,
2843 "LLVM" LLVM_VERSION_STRING, StringAbbrev, Stream);
2844
2845 // Write the epoch version
2846 Abbv = new BitCodeAbbrev();
2847 Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH));
2848 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
2849 SmallVector Vals = {bitc::BITCODE_CURRENT_EPOCH};
2850 Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals);
2851 Stream.ExitBlock();
2852 }
2853
28312854 /// WriteModule - Emit the specified module to the bitstream.
28322855 static void WriteModule(const Module *M, BitstreamWriter &Stream,
28332856 bool ShouldPreserveUseListOrder,
29993022 // Emit the file header.
30003023 WriteBitcodeHeader(Stream);
30013024
3025 WriteIdentificationBlock(M, Stream);
3026
30023027 // Emit the module.
30033028 WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit,
30043029 EmitFunctionSummary);
0 ; Check that a block "IDENTIFICATION_BLOCK_ID" is emitted.
1 ;RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s
2 ;CHECK:
3 ;CHECK-NEXT:
4 ;CHECK-NEXT:
5 ;CHECK-NEXT:
109109 case bitc::TYPE_BLOCK_ID_NEW: return "TYPE_BLOCK_ID";
110110 case bitc::CONSTANTS_BLOCK_ID: return "CONSTANTS_BLOCK";
111111 case bitc::FUNCTION_BLOCK_ID: return "FUNCTION_BLOCK";
112 case bitc::IDENTIFICATION_BLOCK_ID:
113 return "IDENTIFICATION_BLOCK_ID";
112114 case bitc::VALUE_SYMTAB_BLOCK_ID: return "VALUE_SYMTAB";
113115 case bitc::METADATA_BLOCK_ID: return "METADATA_BLOCK";
114116 case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK";
168170 STRINGIFY_CODE(MODULE_CODE, PURGEVALS)
169171 STRINGIFY_CODE(MODULE_CODE, GCNAME)
170172 STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
173 }
174 case bitc::IDENTIFICATION_BLOCK_ID:
175 switch (CodeID) {
176 default:
177 return nullptr;
178 STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
179 STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
171180 }
172181 case bitc::PARAMATTR_BLOCK_ID:
173182 switch (CodeID) {