llvm.org GIT mirror llvm / aa4d2ac
Object/llvm-objdump: allow dumping of mach-o exports trie MachOObjectFile in lib/Object currently has no support for parsing the rebase, binding, and export information from the LC_DYLD_INFO load command in final linked mach-o images. This patch adds support for parsing the exports trie data structure. It also adds an option to llvm-objdump to dump that export info. I did the exports parsing first because it is the hardest. The information is encoded in a trie structure, but the standard ObjectFile way to inspect content is through iterators. So I needed to make an iterator that would do a non-recursive walk through the trie and maintain the concatenation of edges needed for the current string prefix. I plan to add similar support in MachOObjectFile and llvm-objdump to parse/display the rebasing and binding info too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216808 91177308-0d34-0410-b5e6-96231b3b80d8 Nick Kledzik 6 years ago
8 changed file(s) with 397 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
4747 const ObjectFile *getObjectFile() const;
4848 };
4949 typedef content_iterator dice_iterator;
50
51 /// ExportEntry encapsulates the current-state-of-the-walk used when doing a
52 /// non-recursive walk of the trie data structure. This allows you to iterate
53 /// across all exported symbols using:
54 /// for (const llvm::object::ExportEntry &AnExport : Obj->exports()) {
55 /// }
56 class ExportEntry {
57 public:
58 ExportEntry(ArrayRef Trie);
59
60 StringRef name() const;
61 uint64_t flags() const;
62 uint64_t address() const;
63 uint64_t other() const;
64 StringRef otherName() const;
65 uint32_t nodeOffset() const;
66
67 bool operator==(const ExportEntry &) const;
68
69 void moveNext();
70
71 private:
72 friend class MachOObjectFile;
73 void moveToFirst();
74 void moveToEnd();
75 uint64_t readULEB128(const uint8_t *&p);
76 void pushDownUntilBottom();
77 void pushNode(uint64_t Offset);
78
79 // Represents a node in the mach-o exports trie.
80 struct NodeState {
81 NodeState(const uint8_t *Ptr);
82 const uint8_t *Start;
83 const uint8_t *Current;
84 uint64_t Flags;
85 uint64_t Address;
86 uint64_t Other;
87 const char *ImportName;
88 unsigned ChildCount;
89 unsigned NextChildIndex;
90 unsigned ParentStringLength;
91 bool IsExportNode;
92 };
93
94 ArrayRef Trie;
95 SmallString<256> CumulativeString;
96 SmallVector Stack;
97 bool Malformed;
98 bool Done;
99 };
100 typedef content_iterator export_iterator;
50101
51102 class MachOObjectFile : public ObjectFile {
52103 public:
118169 bool &Result) const override;
119170
120171 // MachO specific.
121 std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &Res);
172 std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &) const;
122173
123174 // TODO: Would be useful to have an iterator based version
124175 // of the load command interface too.
143194
144195 dice_iterator begin_dices() const;
145196 dice_iterator end_dices() const;
197
198 /// For use iterating over all exported symbols.
199 iterator_range exports() const;
200
201 /// For use examining a trie not in a MachOObjectFile.
202 static iterator_range exports(ArrayRef Trie);
146203
147204 // In a MachO file, sections have a segment name. This is used in the .o
148205 // files. They have a single segment, but this field specifies which segment
206263 MachO::symtab_command getSymtabLoadCommand() const;
207264 MachO::dysymtab_command getDysymtabLoadCommand() const;
208265 MachO::linkedit_data_command getDataInCodeLoadCommand() const;
266 ArrayRef getDyldInfoRebaseOpcodes() const;
267 ArrayRef getDyldInfoBindOpcodes() const;
268 ArrayRef getDyldInfoWeakBindOpcodes() const;
269 ArrayRef getDyldInfoLazyBindOpcodes() const;
270 ArrayRef getDyldInfoExportsTrie() const;
209271
210272 StringRef getStringTableData() const;
211273 bool is64Bit() const;
236298 typedef SmallVector LibraryList;
237299 LibraryList Libraries;
238300 typedef SmallVector LibraryShortName;
239 LibraryShortName LibrariesShortNames;
301 mutable LibraryShortName LibrariesShortNames;
240302 const char *SymtabLoadCmd;
241303 const char *DysymtabLoadCmd;
242304 const char *DataInCodeLoadCmd;
305 const char *DyldInfoLoadCmd;
243306 };
244307
245308 /// DiceRef
321321 };
322322
323323 enum {
324 EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u,
324325 EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION = 0x04u,
325326 EXPORT_SYMBOL_FLAGS_REEXPORT = 0x08u,
326327 EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER = 0x10u
327328 };
328329
329330 enum ExportSymbolKind {
330 EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u,
331331 EXPORT_SYMBOL_FLAGS_KIND_REGULAR = 0x00u,
332332 EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL = 0x01u,
333333 EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02u
1818 #include "llvm/Support/DataExtractor.h"
1919 #include "llvm/Support/Format.h"
2020 #include "llvm/Support/Host.h"
21 #include "llvm/Support/LEB128.h"
22 #include "llvm/Support/MachO.h"
2123 #include "llvm/Support/MemoryBuffer.h"
2224 #include "llvm/Support/raw_ostream.h"
2325 #include
225227 bool Is64bits, std::error_code &EC)
226228 : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
227229 SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr),
228 DataInCodeLoadCmd(nullptr) {
230 DataInCodeLoadCmd(nullptr), DyldInfoLoadCmd(nullptr) {
229231 uint32_t LoadCommandCount = this->getHeader().ncmds;
230232 MachO::LoadCommandType SegmentLoadType = is64Bit() ?
231233 MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT;
241243 } else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) {
242244 assert(!DataInCodeLoadCmd && "Multiple data in code tables");
243245 DataInCodeLoadCmd = Load.Ptr;
246 } else if (Load.C.cmd == MachO::LC_DYLD_INFO ||
247 Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) {
248 assert(!DyldInfoLoadCmd && "Multiple dyldinfo load commands");
249 DyldInfoLoadCmd = Load.Ptr;
244250 } else if (Load.C.cmd == SegmentLoadType) {
245251 uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
246252 for (unsigned J = 0; J < NumSections; ++J) {
11581164 // It is passed the index (0 - based) of the library as translated from
11591165 // GET_LIBRARY_ORDINAL (1 - based).
11601166 std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
1161 StringRef &Res) {
1167 StringRef &Res) const {
11621168 if (Index >= Libraries.size())
11631169 return object_error::parse_failed;
11641170
15041510 return dice_iterator(DiceRef(DRI, this));
15051511 }
15061512
1513 ExportEntry::ExportEntry(ArrayRef T)
1514 : Trie(T), Malformed(false), Done(false) { }
1515
1516 void ExportEntry::moveToFirst() {
1517 pushNode(0);
1518 pushDownUntilBottom();
1519 }
1520
1521 void ExportEntry::moveToEnd() {
1522 Stack.clear();
1523 Done = true;
1524 }
1525
1526 bool ExportEntry::operator==(const ExportEntry &Other) const {
1527 // Common case, one at end, other iterating from begin.
1528 if (Done || Other.Done)
1529 return (Done == Other.Done);
1530 // Not equal if different stack sizes.
1531 if (Stack.size() != Other.Stack.size())
1532 return false;
1533 // Not equal if different cumulative strings.
1534 if (!CumulativeString.str().equals(CumulativeString.str()))
1535 return false;
1536 // Equal if all nodes in both stacks match.
1537 for (unsigned i=0; i < Stack.size(); ++i) {
1538 if (Stack[i].Start != Other.Stack[i].Start)
1539 return false;
1540 }
1541 return true;
1542 }
1543
1544 uint64_t ExportEntry::readULEB128(const uint8_t *&p) {
1545 unsigned count;
1546 uint64_t result = decodeULEB128(p, &count);
1547 p += count;
1548 if (p > Trie.end()) {
1549 p = Trie.end();
1550 Malformed = true;
1551 }
1552 return result;
1553 }
1554
1555 StringRef ExportEntry::name() const {
1556 return CumulativeString.str();
1557 }
1558
1559 uint64_t ExportEntry::flags() const {
1560 return Stack.back().Flags;
1561 }
1562
1563 uint64_t ExportEntry::address() const {
1564 return Stack.back().Address;
1565 }
1566
1567 uint64_t ExportEntry::other() const {
1568 return Stack.back().Other;
1569 }
1570
1571 StringRef ExportEntry::otherName() const {
1572 const char* ImportName = Stack.back().ImportName;
1573 if (ImportName)
1574 return StringRef(ImportName);
1575 return StringRef();
1576 }
1577
1578 uint32_t ExportEntry::nodeOffset() const {
1579 return Stack.back().Start - Trie.begin();
1580 }
1581
1582 ExportEntry::NodeState::NodeState(const uint8_t *Ptr)
1583 : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0),
1584 ImportName(nullptr), ChildCount(0), NextChildIndex(0),
1585 ParentStringLength(0), IsExportNode(false) {
1586 }
1587
1588 void ExportEntry::pushNode(uint64_t offset) {
1589 const uint8_t *Ptr = Trie.begin() + offset;
1590 NodeState State(Ptr);
1591 uint64_t ExportInfoSize = readULEB128(State.Current);
1592 State.IsExportNode = (ExportInfoSize != 0);
1593 const uint8_t* Children = State.Current + ExportInfoSize;
1594 if (State.IsExportNode) {
1595 State.Flags = readULEB128(State.Current);
1596 if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
1597 State.Address = 0;
1598 State.Other = readULEB128(State.Current); // dylib ordinal
1599 State.ImportName = reinterpret_cast(State.Current);
1600 } else {
1601 State.Address = readULEB128(State.Current);
1602 if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER)
1603 State.Other = readULEB128(State.Current);
1604 }
1605 }
1606 State.ChildCount = *Children;
1607 State.Current = Children + 1;
1608 State.NextChildIndex = 0;
1609 State.ParentStringLength = CumulativeString.size();
1610 Stack.push_back(State);
1611 }
1612
1613 void ExportEntry::pushDownUntilBottom() {
1614 while (Stack.back().NextChildIndex < Stack.back().ChildCount) {
1615 NodeState &Top = Stack.back();
1616 CumulativeString.resize(Top.ParentStringLength);
1617 for (;*Top.Current != 0; Top.Current++) {
1618 char c = *Top.Current;
1619 CumulativeString.push_back(c);
1620 }
1621 Top.Current += 1;
1622 uint64_t childNodeIndex = readULEB128(Top.Current);
1623 Top.NextChildIndex += 1;
1624 pushNode(childNodeIndex);
1625 }
1626 if (!Stack.back().IsExportNode) {
1627 Malformed = true;
1628 moveToEnd();
1629 }
1630 }
1631
1632 // We have a trie data structure and need a way to walk it that is compatible
1633 // with the C++ iterator model. The solution is a non-recursive depth first
1634 // traversal where the iterator contains a stack of parent nodes along with a
1635 // string that is the accumulation of all edge strings along the parent chain
1636 // to this point.
1637 //
1638 // There is one “export” node for each exported symbol. But because some
1639 // symbols may be a prefix of another symbol (e.g. _dup and _dup2), an export
1640 // node may have child nodes too.
1641 //
1642 // The algorithm for moveNext() is to keep moving down the leftmost unvisited
1643 // child until hitting a node with no children (which is an export node or
1644 // else the trie is malformed). On the way down, each node is pushed on the
1645 // stack ivar. If there is no more ways down, it pops up one and tries to go
1646 // down a sibling path until a childless node is reached.
1647 void ExportEntry::moveNext() {
1648 if (Stack.empty() || !Stack.back().IsExportNode) {
1649 Malformed = true;
1650 moveToEnd();
1651 return;
1652 }
1653
1654 Stack.pop_back();
1655 while (!Stack.empty()) {
1656 NodeState &Top = Stack.back();
1657 if (Top.NextChildIndex < Top.ChildCount) {
1658 pushDownUntilBottom();
1659 // Now at the next export node.
1660 return;
1661 } else {
1662 if (Top.IsExportNode) {
1663 // This node has no children but is itself an export node.
1664 CumulativeString.resize(Top.ParentStringLength);
1665 return;
1666 }
1667 Stack.pop_back();
1668 }
1669 }
1670 Done = true;
1671 }
1672
1673 iterator_range
1674 MachOObjectFile::exports(ArrayRef Trie) {
1675 ExportEntry Start(Trie);
1676 Start.moveToFirst();
1677
1678 ExportEntry Finish(Trie);
1679 Finish.moveToEnd();
1680
1681 return iterator_range(export_iterator(Start),
1682 export_iterator(Finish));
1683 }
1684
1685 iterator_range MachOObjectFile::exports() const {
1686 return exports(getDyldInfoExportsTrie());
1687 }
1688
1689
15071690 StringRef
15081691 MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
15091692 ArrayRef Raw = getSectionRawFinalSegmentName(Sec);
17461929 Cmd.datasize = 0;
17471930 return Cmd;
17481931 }
1932
1933 ArrayRef MachOObjectFile::getDyldInfoRebaseOpcodes() const {
1934 if (!DyldInfoLoadCmd)
1935 return ArrayRef();
1936
1937 MachO::dyld_info_command DyldInfo
1938 = getStruct(this, DyldInfoLoadCmd);
1939 const uint8_t *Ptr = reinterpret_cast(
1940 getPtr(this, DyldInfo.rebase_off));
1941 return ArrayRef(Ptr, DyldInfo.rebase_size);
1942 }
1943
1944 ArrayRef MachOObjectFile::getDyldInfoBindOpcodes() const {
1945 if (!DyldInfoLoadCmd)
1946 return ArrayRef();
1947
1948 MachO::dyld_info_command DyldInfo
1949 = getStruct(this, DyldInfoLoadCmd);
1950 const uint8_t *Ptr = reinterpret_cast(
1951 getPtr(this, DyldInfo.bind_off));
1952 return ArrayRef(Ptr, DyldInfo.bind_size);
1953 }
1954
1955 ArrayRef MachOObjectFile::getDyldInfoWeakBindOpcodes() const {
1956 if (!DyldInfoLoadCmd)
1957 return ArrayRef();
1958
1959 MachO::dyld_info_command DyldInfo
1960 = getStruct(this, DyldInfoLoadCmd);
1961 const uint8_t *Ptr = reinterpret_cast(
1962 getPtr(this, DyldInfo.weak_bind_off));
1963 return ArrayRef(Ptr, DyldInfo.weak_bind_size);
1964 }
1965
1966 ArrayRef MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
1967 if (!DyldInfoLoadCmd)
1968 return ArrayRef();
1969
1970 MachO::dyld_info_command DyldInfo
1971 = getStruct(this, DyldInfoLoadCmd);
1972 const uint8_t *Ptr = reinterpret_cast(
1973 getPtr(this, DyldInfo.lazy_bind_off));
1974 return ArrayRef(Ptr, DyldInfo.lazy_bind_size);
1975 }
1976
1977 ArrayRef MachOObjectFile::getDyldInfoExportsTrie() const {
1978 if (!DyldInfoLoadCmd)
1979 return ArrayRef();
1980
1981 MachO::dyld_info_command DyldInfo
1982 = getStruct(this, DyldInfoLoadCmd);
1983 const uint8_t *Ptr = reinterpret_cast(
1984 getPtr(this, DyldInfo.export_off));
1985 return ArrayRef(Ptr, DyldInfo.export_size);
1986 }
1987
17491988
17501989 StringRef MachOObjectFile::getStringTableData() const {
17511990 MachO::symtab_command S = getSymtabLoadCommand();
0 # RUN: llvm-objdump -macho -exports-trie -arch x86_64 \
1 # RUN: %p/Inputs/exports-trie.macho-x86_64 2>/dev/null | FileCheck %s
2
3
4 # CHECK:[re-export] _malloc (from libSystem)
5 # CHECK:[re-export] _myfree (_free from libSystem)
6 # CHECK:0x00000F70 _myWeakweak_def]
7 # CHECK:0x00001018 _myTLVper-thread]
8 # CHECK:0x12345678 _myAbsabsolute]
9 # CHECK:0x00000F60 _foo
10
17821782 getAndPrintMachHeader(file, ncmds, filetype, cputype, true);
17831783 PrintLoadCommands(file, ncmds, filetype, cputype, true);
17841784 }
1785
1786 //===----------------------------------------------------------------------===//
1787 // export trie dumping
1788 //===----------------------------------------------------------------------===//
1789
1790 void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) {
1791 for (const llvm::object::ExportEntry &entry : Obj->exports()) {
1792 uint64_t Flags = entry.flags();
1793 bool ReExport = (Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT);
1794 bool WeakDef = (Flags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1795 bool ThreadLocal = ((Flags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) ==
1796 MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL);
1797 bool Abs = ((Flags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) ==
1798 MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE);
1799 bool Resolver = (Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER);
1800 if (ReExport)
1801 outs() << "[re-export] ";
1802 else
1803 outs()
1804 << format("0x%08llX ", entry.address()); // FIXME:add in base address
1805 outs() << entry.name();
1806 if (WeakDef || ThreadLocal || Resolver || Abs) {
1807 bool needComma = false;
1808 printf(" [");
1809 if (WeakDef) {
1810 outs() << "weak_def";
1811 needComma = true;
1812 }
1813 if (ThreadLocal) {
1814 if (needComma)
1815 outs() << ", ";
1816 outs() << "per-thread";
1817 needComma = true;
1818 }
1819 if (Abs) {
1820 if (needComma)
1821 outs() << ", ";
1822 outs() << "absolute";
1823 needComma = true;
1824 }
1825 if (Resolver) {
1826 if (needComma)
1827 outs() << ", ";
1828 outs() << format("resolver=0x%08llX", entry.other());
1829 needComma = true;
1830 }
1831 outs() << "]";
1832 }
1833 if (ReExport) {
1834 StringRef DylibName = "unknown";
1835 int ordinal = entry.other() - 1;
1836 Obj->getLibraryShortNameByIndex(ordinal, DylibName);
1837 if (entry.otherName().empty())
1838 outs() << " (from " << DylibName << ")";
1839 else
1840 outs() << " (" << entry.otherName() << " from " << DylibName << ")";
1841 }
1842 outs() << "\n";
1843 }
1844 }
8484 SymbolTable("t", cl::desc("Display the symbol table"));
8585
8686 static cl::opt
87 ExportsTrie("exports-trie", cl::desc("Display mach-o exported symbols"));
88
89 static cl::opt
8790 MachOOpt("macho", cl::desc("Use MachO specific object file parser"));
8891 static cl::alias
8992 MachOm("m", cl::desc("Alias for --macho"), cl::aliasopt(MachOOpt));
828831 }
829832 }
830833
834 static void printExportsTrie(const ObjectFile *o) {
835 outs() << "Exports trie:\n";
836 if (const MachOObjectFile *MachO = dyn_cast(o))
837 printMachOExportsTrie(MachO);
838 else {
839 errs() << "This operation is only currently supported "
840 "for Mach-O executable files.\n";
841 return;
842 }
843 }
844
831845 static void printPrivateFileHeader(const ObjectFile *o) {
832846 if (o->isELF()) {
833847 printELFFileHeader(o);
857871 PrintUnwindInfo(o);
858872 if (PrivateHeaders)
859873 printPrivateFileHeader(o);
874 if (ExportsTrie)
875 printExportsTrie(o);
860876 }
861877
862878 /// @brief Dump each object file in \a a;
938954 && !SectionContents
939955 && !SymbolTable
940956 && !UnwindInfo
941 && !PrivateHeaders) {
957 && !PrivateHeaders
958 && !ExportsTrie) {
942959 cl::PrintHelpMessage();
943960 return 2;
944961 }
3434 void DisassembleInputMachO(StringRef Filename);
3535 void printCOFFUnwindInfo(const object::COFFObjectFile* o);
3636 void printMachOUnwindInfo(const object::MachOObjectFile* o);
37
37 void printMachOExportsTrie(const object::MachOObjectFile* o);
3838 void printELFFileHeader(const object::ObjectFile *o);
3939 void printCOFFFileHeader(const object::ObjectFile *o);
4040 void printMachOFileHeader(const object::ObjectFile *o);