llvm.org GIT mirror llvm / 202bc23
Minidump: Add support for reading/writing strings Summary: Strings in minidump files are stored as a 32-bit length field, giving the length of the string in *bytes*, which is followed by the appropriate number of UTF16 code units. The string is also supposed to be null-terminated, and the null-terminator is not a part of the length field. This patch: - adds support for reading these strings out of the minidump file (this implementation does not depend on proper null-termination) - adds support for writing them to a minidump file - using the previous two pieces implements proper (de)serialization of the CSDVersion field of the SystemInfo stream. Previously, this was only read/written as hex, and no attempt was made to access the referenced string -- now this string is read and written correctly. The changes are tested via yaml2obj|obj2yaml round-trip as well as a unit test which checks the corner cases of the string deserialization logic. Reviewers: jhenderson, zturner, clayborg Subscribers: llvm-commits, aprantl, markmentovai, amccarth, lldb-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59775 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357749 91177308-0d34-0410-b5e6-96231b3b80d8 Pavel Labath 4 months ago
7 changed file(s) with 115 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
4141 /// Returns the raw contents of the stream of the given type, or None if the
4242 /// file does not contain a stream of this type.
4343 Optional> getRawStream(minidump::StreamType Type) const;
44
45 /// Returns the minidump string at the given offset. An error is returned if
46 /// we fail to parse the string, or the string is invalid UTF16.
47 Expected getString(size_t Offset) const;
4448
4549 /// Returns the contents of the SystemInfo stream, cast to the appropriate
4650 /// type. An error is returned if the file does not contain this stream, or
6666 /// SystemInfo minidump stream.
6767 struct SystemInfoStream : public Stream {
6868 minidump::SystemInfo Info;
69 std::string CSDVersion;
6970
70 explicit SystemInfoStream(const minidump::SystemInfo &Info)
71 explicit SystemInfoStream(const minidump::SystemInfo &Info,
72 std::string CSDVersion)
7173 : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo),
72 Info(Info) {}
74 Info(Info), CSDVersion(std::move(CSDVersion)) {}
7375
7476 SystemInfoStream()
7577 : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo) {
77
88 #include "llvm/Object/Minidump.h"
99 #include "llvm/Object/Error.h"
10 #include "llvm/Support/ConvertUTF.h"
1011
1112 using namespace llvm;
1213 using namespace llvm::object;
1819 if (It != StreamMap.end())
1920 return getRawStream(Streams[It->second]);
2021 return None;
22 }
23
24 Expected MinidumpFile::getString(size_t Offset) const {
25 // Minidump strings consist of a 32-bit length field, which gives the size of
26 // the string in *bytes*. This is followed by the actual string encoded in
27 // UTF16.
28 auto ExpectedSize =
29 getDataSliceAs(getData(), Offset, 1);
30 if (!ExpectedSize)
31 return ExpectedSize.takeError();
32 size_t Size = (*ExpectedSize)[0];
33 if (Size % 2 != 0)
34 return createError("String size not even");
35 Size /= 2;
36 if (Size == 0)
37 return "";
38
39 Offset += sizeof(support::ulittle32_t);
40 auto ExpectedData = getDataSliceAs(getData(), Offset, Size);
41 if (!ExpectedData)
42 return ExpectedData.takeError();
43
44 std::string Result;
45 if (!convertUTF16ToUTF8String(*ExpectedData, Result))
46 return createError("String decoding failed");
47
48 return Result;
2149 }
2250
2351 Expected>
66 //===----------------------------------------------------------------------===//
77
88 #include "llvm/ObjectYAML/MinidumpYAML.h"
9 #include "llvm/Support/ConvertUTF.h"
910
1011 using namespace llvm;
1112 using namespace llvm::MinidumpYAML;
3839 return allocateArray(makeArrayRef(Data));
3940 }
4041
42 size_t allocateString(StringRef Str);
43
4144 void writeTo(raw_ostream &OS) const;
4245
4346 private:
4649 std::vector> Callbacks;
4750 };
4851 } // namespace
52
53 size_t BlobAllocator::allocateString(StringRef Str) {
54 SmallVector WStr;
55 bool OK = convertUTF8ToUTF16String(Str, WStr);
56 assert(OK && "Invalid UTF8 in Str?");
57 (void)OK;
58
59 SmallVector EndianStr(WStr.size() + 1,
60 support::ulittle16_t());
61 copy(WStr, EndianStr.begin());
62 return allocateCallback(
63 sizeof(uint32_t) + EndianStr.size() * sizeof(support::ulittle16_t),
64 [EndianStr](raw_ostream &OS) {
65 // Length does not include the null-terminator.
66 support::ulittle32_t Length(2 * (EndianStr.size() - 1));
67 OS.write(reinterpret_cast(&Length), sizeof(Length));
68 OS.write(reinterpret_cast(EndianStr.begin()),
69 sizeof(support::ulittle16_t) * EndianStr.size());
70 });
71 }
4972
5073 void BlobAllocator::writeTo(raw_ostream &OS) const {
5174 size_t BeginOffset = OS.tell();
268291 mapOptional(IO, "Minor Version", Info.MinorVersion, 0);
269292 mapOptional(IO, "Build Number", Info.BuildNumber, 0);
270293 IO.mapRequired("Platform ID", Info.PlatformId);
271 mapOptionalHex(IO, "CSD Version RVA", Info.CSDVersionRVA, 0);
294 IO.mapOptional("CSD Version", Stream.CSDVersion, "");
272295 mapOptionalHex(IO, "Suite Mask", Info.SuiteMask, 0);
273296 mapOptionalHex(IO, "Reserved", Info.Reserved, 0);
274297 switch (static_cast(Info.ProcessorArch)) {
336359 Directory Result;
337360 Result.Type = S.Type;
338361 Result.Location.RVA = File.tell();
362 Optional DataEnd;
339363 switch (S.Kind) {
340364 case Stream::StreamKind::RawContent: {
341365 RawContentStream &Raw = cast(S);
346370 });
347371 break;
348372 }
349 case Stream::StreamKind::SystemInfo:
350 File.allocateObject(cast(S).Info);
351 break;
373 case Stream::StreamKind::SystemInfo: {
374 SystemInfoStream &SystemInfo = cast(S);
375 File.allocateObject(SystemInfo.Info);
376 // The CSD string is not a part of the stream.
377 DataEnd = File.tell();
378 SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion);
379 break;
380 }
352381 case Stream::StreamKind::TextContent:
353382 File.allocateArray(arrayRefFromStringRef(cast(S).Text));
354383 break;
355384 }
356 Result.Location.DataSize = File.tell() - Result.Location.RVA;
385 // If DataEnd is not set, we assume everything we generated is a part of the
386 // stream.
387 Result.Location.DataSize =
388 DataEnd.getValueOr(File.tell()) - Result.Location.RVA;
357389 return Result;
358390 }
359391
394426 auto ExpectedInfo = File.getSystemInfo();
395427 if (!ExpectedInfo)
396428 return ExpectedInfo.takeError();
397 return make_unique(*ExpectedInfo);
429 auto ExpectedCSDVersion = File.getString(ExpectedInfo->CSDVersionRVA);
430 if (!ExpectedCSDVersion)
431 return ExpectedInfo.takeError();
432 return make_unique(*ExpectedInfo,
433 std::move(*ExpectedCSDVersion));
398434 }
399435 case StreamKind::TextContent:
400436 return make_unique(
44 - Type: SystemInfo
55 Processor Arch: ARM64
66 Platform ID: Linux
7 CSD Version RVA: 0x01020304
7 CSD Version: Linux 3.13.0-91-generic
88 CPU:
99 CPUID: 0x05060708
1010 - Type: LinuxAuxv
2121 # CHECK-NEXT: - Type: SystemInfo
2222 # CHECK-NEXT: Processor Arch: ARM64
2323 # CHECK-NEXT: Platform ID: Linux
24 # CHECK-NEXT: CSD Version RVA: 0x01020304
24 # CHECK-NEXT: CSD Version: Linux 3.13.0-91-generic
2525 # CHECK-NEXT: CPU:
2626 # CHECK-NEXT: CPUID: 0x05060708
2727 # CHECK-NEXT: - Type: LinuxAuxv
248248 EXPECT_EQ(0x08070605u, Info.CPU.X86.FeatureInfo);
249249 EXPECT_EQ(0x02010009u, Info.CPU.X86.AMDExtendedFeatures);
250250 }
251
252 TEST(MinidumpFile, getString) {
253 std::vector ManyStrings{
254 // Header
255 'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version
256 2, 0, 0, 0, // NumberOfStreams,
257 0x20, 0, 0, 0, // StreamDirectoryRVA
258 0, 1, 2, 3, 4, 5, 6, 7, // Checksum, TimeDateStamp
259 8, 9, 0, 1, 2, 3, 4, 5, // Flags
260 // Stream Directory
261 0, 0, 0, 0, 0, 0, 0, 0, // Type, DataSize,
262 0x20, 0, 0, 0, // RVA
263 1, 0, 0, 0, 0, 0, // String1 - odd length
264 0, 0, 1, 0, 0, 0, // String2 - too long
265 2, 0, 0, 0, 0, 0xd8, // String3 - invalid utf16
266 0, 0, 0, 0, 0, 0, // String4 - ""
267 2, 0, 0, 0, 'a', 0, // String5 - "a"
268 0, // Mis-align next string
269 2, 0, 0, 0, 'a', 0, // String6 - "a"
270
271 };
272 auto ExpectedFile = create(ManyStrings);
273 ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded());
274 const MinidumpFile &File = **ExpectedFile;
275 EXPECT_THAT_EXPECTED(File.getString(44), Failed());
276 EXPECT_THAT_EXPECTED(File.getString(50), Failed());
277 EXPECT_THAT_EXPECTED(File.getString(56), Failed());
278 EXPECT_THAT_EXPECTED(File.getString(62), HasValue(""));
279 EXPECT_THAT_EXPECTED(File.getString(68), HasValue("a"));
280 EXPECT_THAT_EXPECTED(File.getString(75), HasValue("a"));
281
282 // Check the case when the size field does not fit into the remaining data.
283 EXPECT_THAT_EXPECTED(File.getString(ManyStrings.size() - 2),
284 Failed());
285 }
3232 - Type: SystemInfo
3333 Processor Arch: ARM64
3434 Platform ID: Linux
35 CSD Version RVA: 0x01020304
3635 CPU:
3736 CPUID: 0x05060708
3837 - Type: LinuxMaps
5352 const SystemInfo &SysInfo = *ExpectedSysInfo;
5453 EXPECT_EQ(ProcessorArchitecture::ARM64, SysInfo.ProcessorArch);
5554 EXPECT_EQ(OSPlatform::Linux, SysInfo.PlatformId);
56 EXPECT_EQ(0x01020304u, SysInfo.CSDVersionRVA);
5755 EXPECT_EQ(0x05060708u, SysInfo.CPU.Arm.CPUID);
5856
5957 EXPECT_EQ(StreamType::LinuxMaps, File.streams()[1].Type);