llvm.org GIT mirror llvm / 54154f3
Teach llvm-objdump with the -macho parser how to use the data in code table from the LC_DATA_IN_CODE load command. And when disassembling print the data in code formatted for the kind of data it and not disassemble those bytes. I added the format specific functionality to the derived class MachOObjectFile since these tables only appears in Mach-O object files. This is my first attempt to modify the libObject stuff so if folks have better suggestions how to fit this in or suggestions on the implementation please let me know. rdar://11791371 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183424 91177308-0d34-0410-b5e6-96231b3b80d8 Kevin Enderby 7 years ago
6 changed file(s) with 254 addition(s) and 9 deletion(s). Raw diff Collapse all Expand all
2323
2424 namespace llvm {
2525 namespace object {
26
27 /// DiceRef - This is a value type class that represents a single
28 /// data in code entry in the table in a Mach-O object file.
29 class DiceRef {
30 DataRefImpl DicePimpl;
31 const ObjectFile *OwningObject;
32
33 public:
34 DiceRef() : OwningObject(NULL) { }
35
36 DiceRef(DataRefImpl DiceP, const ObjectFile *Owner);
37
38 bool operator==(const DiceRef &Other) const;
39 bool operator<(const DiceRef &Other) const;
40
41 error_code getNext(DiceRef &Result) const;
42
43 error_code getOffset(uint32_t &Result) const;
44 error_code getLength(uint16_t &Result) const;
45 error_code getKind(uint16_t &Result) const;
46
47 DataRefImpl getRawDataRefImpl() const;
48 const ObjectFile *getObjectFile() const;
49 };
50 typedef content_iterator dice_iterator;
2651
2752 class MachOObjectFile : public ObjectFile {
2853 public:
107132 relocation_iterator getSectionRelBegin(unsigned Index) const;
108133 relocation_iterator getSectionRelEnd(unsigned Index) const;
109134
135 dice_iterator begin_dices() const;
136 dice_iterator end_dices() const;
137
110138 // In a MachO file, sections have a segment name. This is used in the .o
111139 // files. They have a single segment, but this field specifies which segment
112140 // a section should be put in in the final object.
151179 getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const;
152180
153181 macho::RelocationEntry getRelocation(DataRefImpl Rel) const;
182 macho::DataInCodeTableEntry getDice(DataRefImpl Rel) const;
154183 macho::Header getHeader() const;
155184 macho::Header64Ext getHeader64Ext() const;
156185 macho::IndirectSymbolTableEntry
160189 unsigned Index) const;
161190 macho::SymtabLoadCommand getSymtabLoadCommand() const;
162191 macho::DysymtabLoadCommand getDysymtabLoadCommand() const;
192 macho::LinkeditDataLoadCommand getDataInCodeLoadCommand() const;
163193
164194 StringRef getStringTableData() const;
165195 bool is64Bit() const;
174204 SectionList Sections;
175205 const char *SymtabLoadCmd;
176206 const char *DysymtabLoadCmd;
207 const char *DataInCodeLoadCmd;
177208 };
178209
210 /// DiceRef
211 inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner)
212 : DicePimpl(DiceP) , OwningObject(Owner) {}
213
214 inline bool DiceRef::operator==(const DiceRef &Other) const {
215 return DicePimpl == Other.DicePimpl;
216 }
217
218 inline bool DiceRef::operator<(const DiceRef &Other) const {
219 return DicePimpl < Other.DicePimpl;
220 }
221
222 inline error_code DiceRef::getNext(DiceRef &Result) const {
223 DataRefImpl Rel = DicePimpl;
224 const macho::DataInCodeTableEntry *P =
225 reinterpret_cast(Rel.p);
226 Rel.p = reinterpret_cast(P + 1);
227 Result = DiceRef(Rel, OwningObject);
228 return object_error::success;
229 }
230
231 // Since a Mach-O data in code reference, a DiceRef, can only be created when
232 // the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for
233 // the methods that get the values of the fields of the reference.
234
235 inline error_code DiceRef::getOffset(uint32_t &Result) const {
236 const MachOObjectFile *MachOOF =
237 static_cast(OwningObject);
238 macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
239 Result = Dice.Offset;
240 return object_error::success;
241 }
242
243 inline error_code DiceRef::getLength(uint16_t &Result) const {
244 const MachOObjectFile *MachOOF =
245 static_cast(OwningObject);
246 macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
247 Result = Dice.Length;
248 return object_error::success;
249 }
250
251 inline error_code DiceRef::getKind(uint16_t &Result) const {
252 const MachOObjectFile *MachOOF =
253 static_cast(OwningObject);
254 macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl);
255 Result = Dice.Kind;
256 return object_error::success;
257 }
258
259 inline DataRefImpl DiceRef::getRawDataRefImpl() const {
260 return DicePimpl;
261 }
262
263 inline const ObjectFile *DiceRef::getObjectFile() const {
264 return OwningObject;
265 }
266
179267 }
180268 }
181269
413413 bool IsLittleEndian, bool Is64bits,
414414 error_code &ec)
415415 : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
416 SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) {
416 SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) {
417417 uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
418418 macho::LoadCommandType SegmentLoadType = is64Bit() ?
419419 macho::LCT_Segment64 : macho::LCT_Segment;
426426 } else if (Load.C.Type == macho::LCT_Dysymtab) {
427427 assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
428428 DysymtabLoadCmd = Load.Ptr;
429 } else if (Load.C.Type == macho::LCT_DataInCode) {
430 assert(!DataInCodeLoadCmd && "Multiple data in code tables");
431 DataInCodeLoadCmd = Load.Ptr;
429432 } else if (Load.C.Type == SegmentLoadType) {
430433 uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
431434 for (unsigned J = 0; J < NumSections; ++J) {
13271330 return getSectionRelEnd(DRI);
13281331 }
13291332
1333 dice_iterator MachOObjectFile::begin_dices() const {
1334 DataRefImpl DRI;
1335 if (!DataInCodeLoadCmd)
1336 return dice_iterator(DiceRef(DRI, this));
1337
1338 macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
1339 DRI.p = reinterpret_cast(getPtr(this, DicLC.DataOffset));
1340 return dice_iterator(DiceRef(DRI, this));
1341 }
1342
1343 dice_iterator MachOObjectFile::end_dices() const {
1344 DataRefImpl DRI;
1345 if (!DataInCodeLoadCmd)
1346 return dice_iterator(DiceRef(DRI, this));
1347
1348 macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand();
1349 unsigned Offset = DicLC.DataOffset + DicLC.DataSize;
1350 DRI.p = reinterpret_cast(getPtr(this, Offset));
1351 return dice_iterator(DiceRef(DRI, this));
1352 }
1353
13301354 StringRef
13311355 MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
13321356 ArrayRef Raw = getSectionRawFinalSegmentName(Sec);
14911515 return getStruct(this, P);
14921516 }
14931517
1518 macho::DataInCodeTableEntry
1519 MachOObjectFile::getDice(DataRefImpl Rel) const {
1520 const char *P = reinterpret_cast(Rel.p);
1521 return getStruct(this, P);
1522 }
1523
14941524 macho::Header MachOObjectFile::getHeader() const {
14951525 return getStruct(this, getPtr(this, 0));
14961526 }
15211551
15221552 macho::DysymtabLoadCommand MachOObjectFile::getDysymtabLoadCommand() const {
15231553 return getStruct(this, DysymtabLoadCmd);
1554 }
1555
1556 macho::LinkeditDataLoadCommand
1557 MachOObjectFile::getDataInCodeLoadCommand() const {
1558 if (DataInCodeLoadCmd)
1559 return getStruct(this, DataInCodeLoadCmd);
1560
1561 // If there is no DataInCodeLoadCmd return a load command with zero'ed fields.
1562 macho::LinkeditDataLoadCommand Cmd;
1563 Cmd.Type = macho::LCT_DataInCode;
1564 Cmd.Size = macho::LinkeditLoadCommandSize;
1565 Cmd.DataOffset = 0;
1566 Cmd.DataSize = 0;
1567 return Cmd;
15241568 }
15251569
15261570 StringRef MachOObjectFile::getStringTableData() const {
0 RUN: llvm-objdump -triple thumbv7-apple-iOS -disassemble %p/../Inputs/macho-data-in-code.macho-thumbv7 -macho | FileCheck %s
1
2 CHECK: 12: 80 bd pop {r7, pc}
3
4 CHECK: 14: 38 00 00 00 .long 56 @ KIND_DATA
5 CHECK: 16: 00 00 movs r0, r0
6
8686 }
8787 };
8888
89 // Types for the storted data in code table that is built before disassembly
90 // and the predicate function to sort them.
91 typedef std::pair DiceTableEntry;
92 typedef std::vector DiceTable;
93 typedef DiceTable::iterator dice_table_iterator;
94
95 static bool
96 compareDiceTableEntries(const DiceTableEntry i,
97 const DiceTableEntry j) {
98 return i.first == j.first;
99 }
100
101 static void DumpDataInCode(const char *bytes, uint64_t Size,
102 unsigned short Kind) {
103 uint64_t Value;
104
105 switch (Kind) {
106 case macho::Data:
107 switch (Size) {
108 case 4:
109 Value = bytes[3] << 24 |
110 bytes[2] << 16 |
111 bytes[1] << 8 |
112 bytes[0];
113 outs() << "\t.long " << Value;
114 break;
115 case 2:
116 Value = bytes[1] << 8 |
117 bytes[0];
118 outs() << "\t.short " << Value;
119 break;
120 case 1:
121 Value = bytes[0];
122 outs() << "\t.byte " << Value;
123 break;
124 }
125 outs() << "\t@ KIND_DATA\n";
126 break;
127 case macho::JumpTable8:
128 Value = bytes[0];
129 outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8";
130 break;
131 case macho::JumpTable16:
132 Value = bytes[1] << 8 |
133 bytes[0];
134 outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16";
135 break;
136 case macho::JumpTable32:
137 Value = bytes[3] << 24 |
138 bytes[2] << 16 |
139 bytes[1] << 8 |
140 bytes[0];
141 outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32";
142 break;
143 default:
144 outs() << "\t@ data in code kind = " << Kind << "\n";
145 break;
146 }
147 }
148
89149 static void
90150 getSectionsAndSymbols(const macho::Header Header,
91151 MachOObjectFile *MachOObj,
92152 std::vector &Sections,
93153 std::vector &Symbols,
94 SmallVectorImpl &FoundFns) {
154 SmallVectorImpl &FoundFns,
155 uint64_t &BaseSegmentAddress) {
95156 error_code ec;
96157 for (symbol_iterator SI = MachOObj->begin_symbols(),
97158 SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
107168
108169 MachOObjectFile::LoadCommandInfo Command =
109170 MachOObj->getFirstLoadCommandInfo();
171 bool BaseSegmentAddressSet = false;
110172 for (unsigned i = 0; ; ++i) {
111173 if (Command.C.Type == macho::LCT_FunctionStarts) {
112174 // We found a function starts segment, parse the addresses for later
115177 MachOObj->getLinkeditDataLoadCommand(Command);
116178
117179 MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns);
180 }
181 else if (Command.C.Type == macho::LCT_Segment) {
182 macho::SegmentLoadCommand SLC =
183 MachOObj->getSegmentLoadCommand(Command);
184 StringRef SegName = SLC.Name;
185 if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") {
186 BaseSegmentAddressSet = true;
187 BaseSegmentAddress = SLC.VMAddress;
188 }
118189 }
119190
120191 if (i == Header.NumLoadCommands - 1)
183254 std::vector Sections;
184255 std::vector Symbols;
185256 SmallVector FoundFns;
186
187 getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns);
257 uint64_t BaseSegmentAddress;
258
259 getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns,
260 BaseSegmentAddress);
188261
189262 // Make a copy of the unsorted symbol list. FIXME: duplication
190263 std::vector UnsortedSymbols(Symbols);
191264 // Sort the symbols by address, just in case they didn't come in that way.
192265 std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
266
267 // Build a data in code table that is sorted on by the address of each entry.
268 uint64_t BaseAddress = 0;
269 if (Header.FileType == macho::HFT_Object)
270 Sections[0].getAddress(BaseAddress);
271 else
272 BaseAddress = BaseSegmentAddress;
273 DiceTable Dices;
274 error_code ec;
275 for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices();
276 DI != DE; DI.increment(ec)){
277 uint32_t Offset;
278 DI->getOffset(Offset);
279 Dices.push_back(std::make_pair(BaseAddress + Offset, *DI));
280 }
281 array_pod_sort(Dices.begin(), Dices.end());
193282
194283 #ifndef NDEBUG
195284 raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
308397 for (uint64_t Index = Start; Index < End; Index += Size) {
309398 MCInst Inst;
310399
400 uint64_t SectAddress = 0;
401 Sections[SectIdx].getAddress(SectAddress);
402 outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
403
404 // Check the data in code table here to see if this is data not an
405 // instruction to be disassembled.
406 DiceTable Dice;
407 Dice.push_back(std::make_pair(SectAddress + Index, DiceRef()));
408 dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(),
409 Dice.begin(), Dice.end(),
410 compareDiceTableEntries);
411 if (DTI != Dices.end()){
412 uint16_t Length;
413 DTI->second.getLength(Length);
414 DumpBytes(StringRef(Bytes.data() + Index, Length));
415 uint16_t Kind;
416 DTI->second.getKind(Kind);
417 DumpDataInCode(Bytes.data() + Index, Length, Kind);
418 continue;
419 }
420
311421 if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
312422 DebugOut, nulls())) {
313 uint64_t SectAddress = 0;
314 Sections[SectIdx].getAddress(SectAddress);
315 outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
316
317423 DumpBytes(StringRef(Bytes.data() + Index, Size));
318424 IP->printInst(&Inst, outs(), "");
319425
291291 << " ('datasize', " << LLC.DataSize << ")\n"
292292 << " ('_data_regions', [\n";
293293
294 unsigned NumRegions = LLC.DataSize / 8;
294 unsigned NumRegions = LLC.DataSize / sizeof(macho::DataInCodeTableEntry);
295295 for (unsigned i = 0; i < NumRegions; ++i) {
296296 macho::DataInCodeTableEntry DICE =
297297 Obj.getDataInCodeTableEntry(LLC.DataOffset, i);