llvm.org GIT mirror llvm / 92a3e9d
[llvm.py] Implement disassembler interface It doesn't currently support the op info and symbol lookup callbacks, but it is better than nothing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152527 91177308-0d34-0410-b5e6-96231b3b80d8 Gregory Szorc 7 years ago
2 changed file(s) with 162 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 #===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
1 #
2 # The LLVM Compiler Infrastructure
3 #
4 # This file is distributed under the University of Illinois Open Source
5 # License. See LICENSE.TXT for details.
6 #
7 #===------------------------------------------------------------------------===#
8
9 from ctypes import CFUNCTYPE
10 from ctypes import POINTER
11 from ctypes import addressof
12 from ctypes import byref
13 from ctypes import c_byte
14 from ctypes import c_char_p
15 from ctypes import c_int
16 from ctypes import c_size_t
17 from ctypes import c_ubyte
18 from ctypes import c_uint64
19 from ctypes import c_void_p
20 from ctypes import cast
21
22 from .common import LLVMObject
23 from .common import c_object_p
24 from .common import get_library
25
26 __all__ = [
27 'Disassembler',
28 ]
29
30 lib = get_library()
31 callbacks = {}
32
33 class Disassembler(LLVMObject):
34 """Represents a disassembler instance.
35
36 Disassembler instances are tied to specific "triple," which must be defined
37 at creation time.
38
39 Disassembler instances can disassemble instructions from multiple sources.
40 """
41 def __init__(self, triple):
42 """Create a new disassembler instance.
43
44 The triple argument is the triple to create the disassembler for. This
45 is something like 'i386-apple-darwin9'.
46 """
47 ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0),
48 callbacks['op_info'](0), callbacks['symbol_lookup'](0))
49 if not ptr.contents:
50 raise Exception('Could not obtain disassembler for triple: %s' %
51 triple)
52
53 LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose)
54
55 def get_instruction(self, source, pc=0):
56 """Obtain the next instruction from an input source.
57
58 The input source should be a str or bytearray or something that
59 represents a sequence of bytes.
60
61 This function will start reading bytes from the beginning of the
62 source.
63
64 The pc argument specifies the address that the first byte is at.
65
66 This returns a 2-tuple of:
67
68 long number of bytes read. 0 if no instruction was read.
69 str representation of instruction. This will be the assembly that
70 represents the instruction.
71 """
72 buf = cast(c_char_p(source), POINTER(c_ubyte))
73 out_str = cast((c_byte * 255)(), c_char_p)
74
75 result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)),
76 c_uint64(pc), out_str, 255)
77
78 return (result, out_str.value)
79
80 def get_instructions(self, source, pc=0):
81 """Obtain multiple instructions from an input source.
82
83 This is like get_instruction() except it is a generator for all
84 instructions within the source. It starts at the beginning of the
85 source and reads instructions until no more can be read.
86
87 This generator returns 3-tuple of:
88
89 long address of instruction.
90 long size of instruction, in bytes.
91 str representation of instruction.
92 """
93 source_bytes = c_char_p(source)
94 out_str = cast((c_byte * 255)(), c_char_p)
95
96 # This could probably be written cleaner. But, it does work.
97 buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents
98 offset = 0
99 address = pc
100 end_address = pc + len(source)
101 while address < end_address:
102 b = cast(addressof(buf) + offset, POINTER(c_ubyte))
103 result = lib.LLVMDisasmInstruction(self, b,
104 c_uint64(len(source) - offset), c_uint64(address),
105 out_str, 255)
106
107 if result == 0:
108 break
109
110 yield (address, result, out_str.value)
111
112 address += result
113 offset += result
114
115
116 def register_library(library):
117 library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
118 callbacks['op_info'], callbacks['symbol_lookup']]
119 library.LLVMCreateDisasm.restype = c_object_p
120
121 library.LLVMDisasmDispose.argtypes = [Disassembler]
122
123 library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte),
124 c_uint64, c_uint64, c_char_p, c_size_t]
125 library.LLVMDisasmInstruction.restype = c_size_t
126
127 callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
128 c_int, c_void_p)
129 callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
130 POINTER(c_uint64), c_uint64,
131 POINTER(c_char_p))
132
133 register_library(lib)
0 from .base import TestBase
1
2 from ..disassembler import Disassembler
3
4 class TestDisassembler(TestBase):
5 def test_instantiate(self):
6 Disassembler('i686-apple-darwin9')
7
8 def test_basic(self):
9 sequence = '\x67\xe3\x81' # jcxz -127
10 triple = 'i686-apple-darwin9'
11
12 disassembler = Disassembler(triple)
13
14 count, s = disassembler.get_instruction(sequence)
15 self.assertEqual(count, 3)
16 self.assertEqual(s, '\tjcxz\t-127')
17
18 def test_get_instructions(self):
19 sequence = '\x67\xe3\x81\x01\xc7' # jcxz -127; addl %eax, %edi
20
21 disassembler = Disassembler('i686-apple-darwin9')
22
23 instructions = list(disassembler.get_instructions(sequence))
24 self.assertEqual(len(instructions), 2)
25
26 self.assertEqual(instructions[0], (0, 3, '\tjcxz\t-127'))
27 self.assertEqual(instructions[1], (3, 2, '\taddl\t%eax, %edi'))