llvm.org GIT mirror llvm / 309a2c4
Revert "[llvm.py] Implement interface to enhanced disassembler" Chris Lattner says the edis interface is going away. It doesn't make sense to land something that will go away in the near future. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152508 91177308-0d34-0410-b5e6-96231b3b80d8 Gregory Szorc 7 years ago
2 changed file(s) with 0 addition(s) and 626 deletion(s). Raw diff Collapse all Expand all
+0
-564
bindings/python/llvm/disassembler.py less more
None #===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
1 #
2 # The LLVM Compiler Infrastructure
3 #
4 # This file is distributed under the University of Illinois Open Source
5 # License. See LICENSE.TXT for details.
6 #
7 #===------------------------------------------------------------------------===#
8
9 from abc import ABCMeta
10 from abc import abstractmethod
11
12 from ctypes import CFUNCTYPE
13 from ctypes import POINTER
14 from ctypes import byref
15 from ctypes import c_char_p
16 from ctypes import c_int
17 from ctypes import c_ubyte
18 from ctypes import c_uint64
19 from ctypes import c_uint
20 from ctypes import c_void_p
21 from ctypes import memmove
22
23 from .common import CachedProperty
24 from .common import LLVMObject
25 from .common import c_object_p
26 from .common import get_library
27
28 __all__ = [
29 'DisassemblerByteArraySource',
30 'DisassemblerFileSource',
31 'DisassemblerSource',
32 'Disassembler',
33 'Instruction',
34 'Operand',
35 'Token',
36 ]
37
38 callbacks = {}
39
40 class DisassemblerSource:
41 """Abstract base class for disassembler input.
42
43 This defines the interface to which inputs to the disassembler must
44 conform.
45
46 Basically, the disassembler input is a read-only sequence of a finite
47 length.
48 """
49 __metaclass__ = ABCMeta
50
51 @abstractmethod
52 def __len__(self):
53 """Returns the number of bytes that are available for input."""
54 pass
55
56 @abstractmethod
57 def get_byte(self, address):
58 """Returns the byte at the specified address."""
59 pass
60
61 @abstractmethod
62 def start_address(self):
63 """Returns the address at which to start fetch bytes, as a long."""
64 pass
65
66 class DisassemblerByteArraySource(DisassemblerSource):
67 """A disassembler source for byte arrays."""
68
69 def __init__(self, b):
70 self._array = b
71
72 def __len__(self):
73 return len(self._array)
74
75 def get_byte(self, address):
76 return self._array[address]
77
78 def start_address(self):
79 return 0
80
81 class DisassemblerFileSource(DisassemblerSource):
82 """A disassembler source for file segments.
83
84 This allows you to feed in segments of a file into a Disassembler.
85 """
86
87 def __init__(self, filename, start_offset, length=None, end_offset=None,
88 start_address=None):
89 """Create a new source from a file.
90
91 A source begins at a specified byte offset and can be defined in terms
92 of byte length of the end byte offset.
93 """
94 if length is None and end_offset is None:
95 raise Exception('One of length or end_offset must be defined.')
96
97 self._start_address = start_address
98 if self._start_address is None:
99 self._start_address = 0
100
101 count = length
102 if length is None:
103 count = end_offset - start_offset
104
105 with open(filename, 'rb') as fh:
106 fh.seek(start_offset)
107
108 # FIXME handle case where read bytes != requested
109 self._buf = fh.read(count)
110
111 def __len__(self):
112 return len(self._buf)
113
114 def get_byte(self, address):
115 return self._buf[address - self._start_address]
116
117 def start_address(self):
118 return self._start_address
119
120 class Disassembler(LLVMObject):
121 """Interface to LLVM's enhanced disassembler.
122
123 The API is slightly different from the C API in that we tightly couple a
124 disassembler instance to an input source. This saves an extra level of
125 abstraction and makes the Python implementation easier.
126 """
127
128 SYNTAX_X86_INTEL = 0
129 SYNTAX_X86_ATT = 1
130 SYNTAX_ARM_UAL = 2
131
132 def __init__(self, triple, source, syntax=0):
133 """Create a new disassembler instance.
134
135 Arguments:
136
137 triple -- str target type (e.g. x86_64-apple-darwin10)
138 source -- DisassemblerSource instance to be fed into this disassembler.
139 syntax -- The assembly syntax to use. One of the SYNTAX_* class
140 constants. e.g. EnhancedDisassembler.SYNTAX_X86_INTEL
141 """
142 assert isinstance(source, DisassemblerSource)
143
144 ptr = c_object_p()
145 result = lib.EDGetDisassembler(byref(ptr), c_char_p(triple),
146 c_int(syntax))
147 if result != 0:
148 raise Exception('Non-0 return code.')
149
150 LLVMObject.__init__(self, ptr)
151
152 self._source = source
153
154 def get_instructions(self):
155 """Obtain the instructions from the input.
156
157 This is a generator for Instruction instances.
158
159 By default, this will return instructions for the entire source which
160 has been defined. It does this by querying the source's start_address()
161 method and continues to request instructions until len(source) is
162 exhausted.
163 """
164
165 # We currently obtain 1 instruction at a time because it is easiest.
166
167 # This serves as our EDByteReaderCallback. It is a proxy between C and
168 # the Python DisassemblerSource.
169 def byte_reader(dest, address, arg):
170 try:
171 byte = self._source.get_byte(address)
172 memmove(dest, byte, 1)
173
174 return 0
175 except:
176 return -1
177
178 address = self._source.start_address()
179 end_address = address + len(self._source)
180 cb = callbacks['byte_reader'](byte_reader)
181 while address < end_address:
182 ptr = c_object_p()
183
184 result = lib.EDCreateInsts(byref(ptr), c_uint(1), self, cb,
185 address, c_void_p(None))
186
187 if result != 1:
188 raise Exception('Error obtaining instruction at address %d' %
189 address)
190
191 instruction = Instruction(ptr, self)
192 yield instruction
193
194 address += instruction.byte_size
195
196
197 class Instruction(LLVMObject):
198 """Represents an individual instruction.
199
200 Instruction instances are obtained from Disassembler.get_instructions().
201 """
202 def __init__(self, ptr, disassembler):
203 """Create a new instruction.
204
205 Instructions are created from within this module. You should have no
206 need to call this from outside this module.
207 """
208 assert isinstance(ptr, c_object_p)
209 assert isinstance(disassembler, Disassembler)
210
211 LLVMObject.__init__(self, ptr, disposer=lib.EDReleaseInst)
212 self._disassembler = disassembler
213
214 def __str__(self):
215 s = c_char_p(None)
216 result = lib.EDGetInstString(byref(s), self)
217 if result != 0:
218 raise Exception('Non-0 return code.')
219
220 return s.value
221
222 @CachedProperty
223 def byte_size(self):
224 result = lib.EDInstByteSize(self)
225 if result == -1:
226 raise Exception('Error code returned.')
227
228 return result
229
230 @CachedProperty
231 def id(self):
232 i = c_uint()
233 result = lib.EDInstID(byref(i), self)
234 if result != 0:
235 raise Exception('Non-0 return code.')
236
237 return i.value
238
239 @CachedProperty
240 def is_branch(self):
241 result = lib.EDInstIsBranch(self)
242 if result == -1:
243 raise Exception('Error code returned.')
244
245 return result > 0
246
247 @CachedProperty
248 def is_move(self):
249 result = lib.EDInstIsMove(self)
250 if result == -1:
251 raise Exception('Error code returned.')
252
253 return result > 0
254
255 @CachedProperty
256 def branch_target_id(self):
257 result = lib.EDBranchTargetID(self)
258 if result == -1:
259 raise Exception('Error code returned.')
260
261 return result
262
263 @CachedProperty
264 def move_source_id(self):
265 result = lib.EDMoveSourceID(self)
266 if result == -1:
267 raise Exception('Error code returned.')
268
269 return result
270
271 def get_tokens(self):
272 """Obtain the tokens in this instruction.
273
274 This is a generator for Token instances.
275 """
276 count = lib.EDNumTokens(self)
277 if count == -1:
278 raise Exception('Error code returned.')
279
280 for i in range(0, count):
281 ptr = c_object_p()
282 result = lib.EDGetToken(byref(ptr), self, c_int(i))
283 if result != 0:
284 raise Exception('Non-0 return code.')
285
286 yield Token(ptr, self)
287
288 def get_operands(self):
289 """Obtain the operands in this instruction.
290
291 This is a generator for Operand instances.
292 """
293 count = lib.EDNumOperands(self)
294 if count == -1:
295 raise Exception('Error code returned.')
296
297 for i in range(0, count):
298 ptr = c_object_p()
299 result = lib.EDGetOperand(byref(ptr), self, c_int(i))
300 if result != 0:
301 raise Exception('Non-0 return code.')
302
303 yield Operand(ptr, self)
304
305 class Token(LLVMObject):
306 def __init__(self, ptr, instruction):
307 assert isinstance(ptr, c_object_p)
308 assert isinstance(instruction, Instruction)
309
310 LLVMObject.__init__(self, ptr)
311
312 self._instruction = instruction
313
314 def __str__(self):
315 s = c_char_p(None)
316 result = lib.EDGetTokenString(byref(s), self)
317 if result != 0:
318 raise Exception('Non-0 return code.')
319
320 return s.value
321
322 @CachedProperty
323 def operand_index(self):
324 result = lib.EDOperandIndexForToken(self)
325 if result == -1:
326 raise Exception('Error code returned.')
327
328 return result
329
330 @CachedProperty
331 def is_whitespace(self):
332 result = lib.EDTokenIsWhitespace(self)
333 if result == -1:
334 raise Exception('Error code returned.')
335
336 return result > 0
337
338 @CachedProperty
339 def is_punctuation(self):
340 result = lib.EDTokenIsPunctuation(self)
341 if result == -1:
342 raise Exception('Error code returned.')
343
344 return result > 0
345
346 @CachedProperty
347 def is_opcode(self):
348 result = lib.EDTokenIsOpcode(self)
349 if result == -1:
350 raise Exception('Error code returned.')
351
352 return result > 0
353
354 @CachedProperty
355 def is_literal(self):
356 result = lib.EDTokenIsLiteral(self)
357 if result == -1:
358 raise Exception('Error code returned.')
359
360 return result > 0
361
362 @CachedProperty
363 def is_register(self):
364 result = lib.EDTokenIsRegister(self)
365 if result == -1:
366 raise Exception('Error code returned.')
367
368 return result > 0
369
370 @CachedProperty
371 def is_negative_literal(self):
372 result = lib.EDTokenIsNegativeLiteral(self)
373 if result == -1:
374 raise Exception('Error code returned.')
375
376 return result > 0
377
378 @CachedProperty
379 def absolute_value(self):
380 value = c_uint64()
381 result = lib.EDLiteralTokenAbsoluteValue(byref(value), self)
382 if result != 0:
383 raise Exception('Non-0 return code.')
384
385 return value
386
387 @CachedProperty
388 def register_value(self):
389 value = c_uint()
390 result = lib.EDRegisterTokenValue(byref(value), self)
391 if result != 0:
392 raise Exception('Non-0 return code.')
393
394 return value
395
396 class Operand(LLVMObject):
397 """Represents an operand in an instruction.
398
399 FIXME support register evaluation.
400 """
401 def __init__(self, ptr, instruction):
402 assert isinstance(ptr, c_object_p)
403 assert isinstance(instruction, Instruction)
404
405 LLVMObject.__init__(self, ptr)
406
407 self._instruction = instruction
408
409 @CachedProperty
410 def is_register(self):
411 result = lib.EDOperandIsRegister(self)
412 if result == -1:
413 raise Exception('Error code returned.')
414
415 return result > 0
416
417 @CachedProperty
418 def is_immediate(self):
419 result = lib.EDOperandIsImmediate(self)
420 if result == -1:
421 raise Exception('Error code returned.')
422
423 return result > 0
424
425 @CachedProperty
426 def is_memory(self):
427 result = lib.EDOperandIsMemory(self)
428 if result == -1:
429 raise Exception('Error code returned.')
430
431 return result > 0
432
433 @CachedProperty
434 def register_value(self):
435 value = c_uint()
436 result = lib.EDRegisterOperandValue(byref(value), self)
437 if result != 0:
438 raise Exception('Non-0 return code.')
439
440 return value
441
442 @CachedProperty
443 def immediate_value(self):
444 value = c_uint64()
445 result = lib.EDImmediateOperandValue(byref(value), self)
446 if result != 0:
447 raise Exception('Non-0 return code.')
448
449 return value
450
451 def register_library(library):
452 library.EDGetDisassembler.argtypes = [POINTER(c_object_p), c_char_p, c_int]
453 library.EDGetDisassembler.restype = c_int
454
455 library.EDGetRegisterName.argtypes = [POINTER(c_char_p), Disassembler,
456 c_uint]
457 library.EDGetRegisterName.restype = c_int
458
459 library.EDRegisterIsStackPointer.argtypes = [Disassembler, c_uint]
460 library.EDRegisterIsStackPointer.restype = c_int
461
462 library.EDRegisterIsProgramCounter.argtypes = [Disassembler, c_uint]
463 library.EDRegisterIsProgramCounter.restype = c_int
464
465 library.EDCreateInsts.argtypes = [POINTER(c_object_p), c_uint,
466 Disassembler, callbacks['byte_reader'], c_uint64, c_void_p]
467 library.EDCreateInsts.restype = c_uint
468
469 library.EDReleaseInst.argtypes = [Instruction]
470
471 library.EDInstByteSize.argtypes = [Instruction]
472 library.EDInstByteSize.restype = c_int
473
474 library.EDGetInstString.argtypes = [POINTER(c_char_p), Instruction]
475 library.EDGetInstString.restype = c_int
476
477 library.EDInstID.argtypes = [POINTER(c_uint), Instruction]
478 library.EDInstID.restype = c_int
479
480 library.EDInstIsBranch.argtypes = [Instruction]
481 library.EDInstIsBranch.restype = c_int
482
483 library.EDInstIsMove.argtypes = [Instruction]
484 library.EDInstIsMove.restype = c_int
485
486 library.EDBranchTargetID.argtypes = [Instruction]
487 library.EDBranchTargetID.restype = c_int
488
489 library.EDMoveSourceID.argtypes = [Instruction]
490 library.EDMoveSourceID.restype = c_int
491
492 library.EDMoveTargetID.argtypes = [Instruction]
493 library.EDMoveTargetID.restype = c_int
494
495 library.EDNumTokens.argtypes = [Instruction]
496 library.EDNumTokens.restype = c_int
497
498 library.EDGetToken.argtypes = [POINTER(c_object_p), Instruction, c_int]
499 library.EDGetToken.restype = c_int
500
501 library.EDGetTokenString.argtypes = [POINTER(c_char_p), Token]
502 library.EDGetTokenString.restype = c_int
503
504 library.EDOperandIndexForToken.argtypes = [Token]
505 library.EDOperandIndexForToken.restype = c_int
506
507 library.EDTokenIsWhitespace.argtypes = [Token]
508 library.EDTokenIsWhitespace.restype = c_int
509
510 library.EDTokenIsPunctuation.argtypes = [Token]
511 library.EDTokenIsPunctuation.restype = c_int
512
513 library.EDTokenIsOpcode.argtypes = [Token]
514 library.EDTokenIsOpcode.restype = c_int
515
516 library.EDTokenIsLiteral.argtypes = [Token]
517 library.EDTokenIsLiteral.restype = c_int
518
519 library.EDTokenIsRegister.argtypes = [Token]
520 library.EDTokenIsRegister.restype = c_int
521
522 library.EDTokenIsNegativeLiteral.argtypes = [Token]
523 library.EDTokenIsNegativeLiteral.restype = c_int
524
525 library.EDLiteralTokenAbsoluteValue.argtypes = [POINTER(c_uint64), Token]
526 library.EDLiteralTokenAbsoluteValue.restype = c_int
527
528 library.EDRegisterTokenValue.argtypes = [POINTER(c_uint), Token]
529 library.EDRegisterTokenValue.restype = c_int
530
531 library.EDNumOperands.argtypes = [Instruction]
532 library.EDNumOperands.restype = c_int
533
534 library.EDGetOperand.argtypes = [POINTER(c_object_p), Instruction, c_int]
535 library.EDGetOperand.restype = c_int
536
537 library.EDOperandIsRegister.argtypes = [Operand]
538 library.EDOperandIsRegister.restype = c_int
539
540 library.EDOperandIsImmediate.argtypes = [Operand]
541 library.EDOperandIsImmediate.restype = c_int
542
543 library.EDOperandIsMemory.argtypes = [Operand]
544 library.EDOperandIsMemory.restype = c_int
545
546 library.EDRegisterOperandValue.argtypes = [POINTER(c_uint), Operand]
547 library.EDRegisterOperandValue.restype = c_int
548
549 library.EDImmediateOperandValue.argtypes = [POINTER(c_uint64), Operand]
550 library.EDImmediateOperandValue.restype = c_int
551
552 library.EDEvaluateOperand.argtypes = [c_uint64, Operand,
553 callbacks['register_reader'], c_void_p]
554 library.EDEvaluateOperand.restype = c_int
555
556 # Enhanced disassembler.
557 callbacks['byte_reader'] = CFUNCTYPE(c_int, POINTER(c_ubyte), c_uint64,
558 c_void_p)
559 callbacks['register_reader'] = CFUNCTYPE(c_int, POINTER(c_uint64), c_uint,
560 c_void_p)
561
562 lib = get_library()
563 register_library(lib)
+0
-62
bindings/python/llvm/tests/test_disassembler.py less more
None from unittest import expectedFailure
1 from unittest import skip
2
3 from .base import TestBase
4 from ..disassembler import DisassemblerByteArraySource
5 from ..disassembler import DisassemblerFileSource
6 from ..disassembler import Disassembler
7 from ..object import ObjectFile
8
9 class TestDisassembler(TestBase):
10 def test_simple(self):
11 sequence = '\x67\xe3\x81' # jcxz -127
12 triple = 'i686-apple-darwin9'
13
14 source = DisassemblerByteArraySource(sequence)
15
16 disassembler = Disassembler(triple, source)
17 instructions = list(disassembler.get_instructions())
18
19 self.assertEqual(len(instructions), 1)
20
21 i = instructions[0]
22 self.assertEqual(str(i), '\tjcxz\t-127\n')
23 self.assertEqual(i.byte_size, 3)
24 self.assertEqual(i.id, 1032)
25 self.assertTrue(i.is_branch)
26 self.assertFalse(i.is_move)
27 self.assertEqual(i.branch_target_id, 0)
28
29 tokens = list(i.get_tokens())
30 self.assertEqual(len(tokens), 4)
31 token = tokens[0]
32 self.assertEqual(str(token), 'jcxz')
33 self.assertFalse(token.is_whitespace)
34 self.assertFalse(token.is_punctuation)
35 self.assertTrue(token.is_opcode)
36 self.assertFalse(token.is_literal)
37 self.assertFalse(token.is_register)
38
39 self.assertTrue(tokens[1].is_whitespace)
40
41 operands = list(i.get_operands())
42 self.assertEqual(len(operands), 1)
43
44 # TODO implement operand tests
45
46 @skip('This test is horribly broken and probably not even correct.')
47 def test_read_instructions(self):
48 filename = self.get_test_binary()
49 o = ObjectFile(filename=filename)
50
51 for symbol in o.get_symbols():
52 address = symbol.address
53 offset = symbol.file_offset
54 size = symbol.size
55
56 source = DisassemblerFileSource(filename, offset, length=size,
57 start_address=address)
58
59 disassembler = Disassembler('x86-generic-gnu-linux', source)
60 for instruction in disassembler.get_instructions():
61 print instruction