llvm.org GIT mirror llvm / e401055
Add a function to MD5 a file's contents. In doing so, clean up the MD5 interface a little. Most existing users only care about the lower 8 bytes of an MD5, but for some users that care about the upper and lower, there wasn't a good interface. Furthermore, consumers of the MD5 checksum were required to handle endianness details on their own, so it seems reasonable to abstract this into a nicer interface that just gives you the right value. Differential Revision: https://reviews.llvm.org/D31105 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298322 91177308-0d34-0410-b5e6-96231b3b80d8 Zachary Turner 3 years ago
8 changed file(s) with 113 addition(s) and 24 deletion(s). Raw diff Collapse all Expand all
3232 #include "llvm/Support/Chrono.h"
3333 #include "llvm/Support/ErrorHandling.h"
3434 #include "llvm/Support/ErrorOr.h"
35 #include "llvm/Support/MD5.h"
3536 #include
3637 #include
3738 #include
398399 /// platform-specific error_code.
399400 std::error_code resize_file(int FD, uint64_t Size);
400401
402 /// @brief Compute an MD5 hash of a file's contents.
403 ///
404 /// @param FD Input file descriptor.
405 /// @returns An MD5Result with the hash computed, if successful, otherwise a
406 /// std::error_code.
407 ErrorOr md5_contents(int FD);
408
409 /// @brief Version of compute_md5 that doesn't require an open file descriptor.
410 ErrorOr md5_contents(const Twine &Path);
411
401412 /// @}
402413 /// @name Physical Observers
403414 /// @{
5151 MD5_u32plus block[16];
5252
5353 public:
54 typedef uint8_t MD5Result[16];
54 struct MD5Result {
55 std::array Bytes;
56
57 operator std::array() const { return Bytes; }
58
59 const uint8_t &operator[](size_t I) const { return Bytes[I]; }
60 uint8_t &operator[](size_t I) { return Bytes[I]; }
61
62 SmallString<32> digest() const;
63
64 uint64_t low() const {
65 // Our MD5 implementation returns the result in little endian, so the low
66 // word is first.
67 using namespace support;
68 return endian::read(Bytes.data());
69 }
70
71 uint64_t high() const {
72 using namespace support;
73 return endian::read(Bytes.data() + 8);
74 }
75 std::pair words() const {
76 using namespace support;
77 return std::make_pair(high(), low());
78 }
79 };
5580
5681 MD5();
5782
75100 const uint8_t *body(ArrayRef Data);
76101 };
77102
103 inline bool operator==(const MD5::MD5Result &LHS, const MD5::MD5Result &RHS) {
104 return LHS.Bytes == RHS.Bytes;
105 }
106
78107 /// Helper to compute and return lower 64 bits of the given string's MD5 hash.
79108 inline uint64_t MD5Hash(StringRef Str) {
80109 using namespace support;
83112 Hash.update(Str);
84113 MD5::MD5Result Result;
85114 Hash.final(Result);
86 // Return the least significant 8 bytes. Our MD5 implementation returns the
87 // result in little endian, so we may need to swap bytes.
88 return endian::read(Result);
115 // Return the least significant word.
116 return Result.low();
89117 }
90118
91119 } // end namespace llvm
489489 Hash.final(Result);
490490
491491 // ... take the least significant 8 bytes and return those. Our MD5
492 // implementation always returns its results in little endian, swap bytes
493 // appropriately.
494 return support::endian::read64le(Result + 8);
492 // implementation always returns its results in little endian, so we actually
493 // need the "high" word.
494 return Result.high();
495495 }
496496
497497 /// This is based on the type signature computation given in section 7.27 of the
513513 Hash.final(Result);
514514
515515 // ... take the least significant 8 bytes and return those. Our MD5
516 // implementation always returns its results in little endian, swap bytes
517 // appropriately.
518 return support::endian::read64le(Result + 8);
519 }
516 // implementation always returns its results in little endian, so we actually
517 // need the "high" word.
518 return Result.high();
519 }
3838 #include "llvm/Support/CommandLine.h"
3939 #include "llvm/Support/Debug.h"
4040 #include "llvm/Support/Dwarf.h"
41 #include "llvm/Support/Endian.h"
4241 #include "llvm/Support/ErrorHandling.h"
4342 #include "llvm/Support/FormattedStream.h"
4443 #include "llvm/Support/LEB128.h"
19441943 MD5 Hash;
19451944 Hash.update(Identifier);
19461945 // ... take the least significant 8 bytes and return those. Our MD5
1947 // implementation always returns its results in little endian, swap bytes
1948 // appropriately.
1946 // implementation always returns its results in little endian, so we actually
1947 // need the "high" word.
19491948 MD5::MD5Result Result;
19501949 Hash.final(Result);
1951 return support::endian::read64le(Result + 8);
1950 return Result.high();
19521951 }
19531952
19541953 void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
260260 support::endian::write32le(&Result[12], d);
261261 }
262262
263 void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
263 SmallString<32> MD5::MD5Result::digest() const {
264 SmallString<32> Str;
264265 raw_svector_ostream Res(Str);
265266 for (int i = 0; i < 16; ++i)
266 Res << format("%.2x", Result[i]);
267 Res << format("%.2x", Bytes[i]);
268 return Str;
269 }
270
271 void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
272 Str = Result.digest();
267273 }
268274
269275 std::array MD5::hash(ArrayRef Data) {
272278 MD5::MD5Result Res;
273279 Hash.final(Res);
274280
275 std::array Arr;
276 memcpy(Arr.data(), Res, sizeof(Res));
277 return Arr;
278 }
281 return Res;
282 }
1010 //
1111 //===----------------------------------------------------------------------===//
1212
13 #include "llvm/Support/Path.h"
14 #include "llvm/ADT/ArrayRef.h"
1315 #include "llvm/Support/COFF.h"
14 #include "llvm/Support/MachO.h"
1516 #include "llvm/Support/Endian.h"
1617 #include "llvm/Support/Errc.h"
1718 #include "llvm/Support/ErrorHandling.h"
1819 #include "llvm/Support/FileSystem.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/MachO.h"
2021 #include "llvm/Support/Process.h"
2122 #include
2223 #include
921922 if (BytesRead < 0 || BytesWritten < 0)
922923 return std::error_code(errno, std::generic_category());
923924 return std::error_code();
925 }
926
927 ErrorOr md5_contents(int FD) {
928 MD5 Hash;
929
930 constexpr size_t BufSize = 4096;
931 std::vector Buf(BufSize);
932 int BytesRead = 0;
933 for (;;) {
934 BytesRead = read(FD, Buf.data(), BufSize);
935 if (BytesRead <= 0)
936 break;
937 Hash.update(makeArrayRef(Buf.data(), BytesRead));
938 }
939
940 if (BytesRead < 0)
941 return std::error_code(errno, std::generic_category());
942 MD5::MD5Result Result;
943 Hash.final(Result);
944 return Result;
945 }
946
947 ErrorOr md5_contents(const Twine &Path) {
948 int FD;
949 if (auto EC = openFileForRead(Path, FD))
950 return EC;
951
952 auto Result = md5_contents(FD);
953 close(FD);
954 return Result;
924955 }
925956
926957 bool exists(file_status status) {
6262 std::array Vec = MD5::hash(Input);
6363 MD5::MD5Result MD5Res;
6464 SmallString<32> Res;
65 memcpy(MD5Res, Vec.data(), Vec.size());
65 memcpy(MD5Res.Bytes.data(), Vec.data(), Vec.size());
6666 MD5::stringifyResult(MD5Res, Res);
6767 EXPECT_EQ(Res, "c3fcd3d76192e4007dfb496cca67e13b");
68 EXPECT_EQ(0x3be167ca6c49fb7dULL, MD5Res.high());
69 EXPECT_EQ(0x00e49261d7d3fcc3ULL, MD5Res.low());
6870 }
6971 }
10101010 ASSERT_NO_ERROR(fs::remove(TempPath));
10111011 }
10121012
1013 TEST_F(FileSystemTest, MD5) {
1014 int FD;
1015 SmallString<64> TempPath;
1016 ASSERT_NO_ERROR(fs::createTemporaryFile("prefix", "temp", FD, TempPath));
1017 StringRef Data("abcdefghijklmnopqrstuvwxyz");
1018 write(FD, Data.data(), Data.size());
1019 lseek(FD, 0, SEEK_SET);
1020 auto Hash = fs::md5_contents(FD);
1021 ::close(FD);
1022 ASSERT_NO_ERROR(Hash.getError());
1023
1024 EXPECT_STREQ("c3fcd3d76192e4007dfb496cca67e13b", Hash->digest().c_str());
1025 }
1026
10131027 TEST_F(FileSystemTest, FileMapping) {
10141028 // Create a temp file.
10151029 int FileDescriptor;