llvm.org GIT mirror llvm / 8ed9f51
Table-driven disassembler for the X86 architecture (16-, 32-, and 64-bit incarnations), integrated into the MC framework. The disassembler is table-driven, using a custom TableGen backend to generate hierarchical tables optimized for fast decode. The disassembler consumes MemoryObjects and produces arrays of MCInsts, adhering to the abstract base class MCDisassembler (llvm/MC/MCDisassembler.h). The disassembler is documented in detail in - lib/Target/X86/Disassembler/X86Disassembler.cpp (disassembler runtime) - utils/TableGen/DisassemblerEmitter.cpp (table emitter) You can test the disassembler by running llvm-mc -disassemble for i386 or x86_64 targets. Please let me know if you encounter any problems with it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@91749 91177308-0d34-0410-b5e6-96231b3b80d8 Sean Callanan 9 years ago
18 changed file(s) with 5255 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
11 set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMARMInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
22 set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
33 set(MSVC_LIB_DEPS_LLVMARMInfo LLVMSupport)
4 set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
4 set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaCodeGen LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
55 set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
66 set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport)
77 set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget)
1010 set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
1111 set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
1212 set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem)
13 set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
13 set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
1414 set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
1515 set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMSupport)
1616 set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
1717 set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMSupport)
18 set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
18 set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
1919 set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
2020 set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMSupport)
2121 set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
3030 set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem)
3131 set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
3232 set(MSVC_LIB_DEPS_LLVMMSILInfo LLVMSupport)
33 set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget)
33 set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430CodeGen LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget)
3434 set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
3535 set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport)
3636 set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMSystem LLVMTarget)
3939 set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
4040 set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMSystem LLVMTarget)
4141 set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport)
42 set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget)
42 set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget)
4343 set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
4444 set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport)
4545 set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
4646 set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget)
47 set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
47 set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
4848 set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
4949 set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMSupport)
5050 set(MSVC_LIB_DEPS_LLVMSupport LLVMSystem)
5151 set(MSVC_LIB_DEPS_LLVMSystem )
52 set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZInfo LLVMTarget)
52 set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZCodeGen LLVMSystemZInfo LLVMTarget)
5353 set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget)
5454 set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport)
5555 set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport LLVMSystem)
22 tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header)
33 tablegen(X86GenRegisterNames.inc -gen-register-enums)
44 tablegen(X86GenRegisterInfo.inc -gen-register-desc)
5 tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
56 tablegen(X86GenInstrNames.inc -gen-instr-enums)
67 tablegen(X86GenInstrInfo.inc -gen-instr-desc)
78 tablegen(X86GenAsmWriter.inc -gen-asm-writer)
11
22 add_llvm_library(LLVMX86Disassembler
33 X86Disassembler.cpp
4 X86DisassemblerDecoder.c
45 )
56 add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen)
55 // License. See LICENSE.TXT for details.
66 //
77 //===----------------------------------------------------------------------===//
8 //
9 // This file is part of the X86 Disassembler.
10 // It contains code to translate the data produced by the decoder into
11 // MCInsts.
12 // Documentation for the disassembler can be found in X86Disassembler.h.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "X86Disassembler.h"
17 #include "X86DisassemblerDecoder.h"
18 #include "X86InstrInfo.h"
819
920 #include "llvm/MC/MCDisassembler.h"
21 #include "llvm/MC/MCDisassembler.h"
22 #include "llvm/MC/MCInst.h"
1023 #include "llvm/Target/TargetRegistry.h"
11 #include "X86.h"
24 #include "llvm/Support/MemoryObject.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/raw_ostream.h"
1227 using namespace llvm;
28 using namespace llvm::X86Disassembler;
29
30 namespace llvm {
31
32 // Fill-ins to make the compiler happy. These constants are never actually
33 // assigned; they are just filler to make an automatically-generated switch
34 // statement work.
35 namespace X86 {
36 enum {
37 BX_SI = 500,
38 BX_DI = 501,
39 BP_SI = 502,
40 BP_DI = 503,
41 sib = 504,
42 sib64 = 505
43 };
44 }
45
46 }
47
48 static void translateInstruction(MCInst &target,
49 InternalInstruction &source);
50
51 X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
52 MCDisassembler(),
53 fMode(mode) {
54 }
55
56 X86GenericDisassembler::~X86GenericDisassembler() {
57 }
58
59 /// regionReader - a callback function that wraps the readByte method from
60 /// MemoryObject.
61 ///
62 /// @param arg - The generic callback parameter. In this case, this should
63 /// be a pointer to a MemoryObject.
64 /// @param byte - A pointer to the byte to be read.
65 /// @param address - The address to be read.
66 static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
67 MemoryObject* region = static_cast(arg);
68 return region->readByte(address, byte);
69 }
70
71 /// logger - a callback function that wraps the operator<< method from
72 /// raw_ostream.
73 ///
74 /// @param arg - The generic callback parameter. This should be a pointe
75 /// to a raw_ostream.
76 /// @param log - A string to be logged. logger() adds a newline.
77 static void logger(void* arg, const char* log) {
78 if (!arg)
79 return;
80
81 raw_ostream &vStream = *(static_cast(arg));
82 vStream << log << "\n";
83 }
84
85 //
86 // Public interface for the disassembler
87 //
88
89 bool X86GenericDisassembler::getInstruction(MCInst &instr,
90 uint64_t &size,
91 const MemoryObject ®ion,
92 uint64_t address,
93 raw_ostream &vStream) const {
94 InternalInstruction internalInstr;
95
96 int ret = decodeInstruction(&internalInstr,
97 regionReader,
98 (void*)®ion,
99 logger,
100 (void*)&vStream,
101 address,
102 fMode);
103
104 if(ret) {
105 size = internalInstr.readerCursor - address;
106 return false;
107 }
108 else {
109 size = internalInstr.length;
110 translateInstruction(instr, internalInstr);
111 return true;
112 }
113 }
114
115 //
116 // Private code that translates from struct InternalInstructions to MCInsts.
117 //
118
119 /// translateRegister - Translates an internal register to the appropriate LLVM
120 /// register, and appends it as an operand to an MCInst.
121 ///
122 /// @param mcInst - The MCInst to append to.
123 /// @param reg - The Reg to append.
124 static void translateRegister(MCInst &mcInst, Reg reg) {
125 #define ENTRY(x) X86::x,
126 uint8_t llvmRegnums[] = {
127 ALL_REGS
128 0
129 };
130 #undef ENTRY
131
132 uint8_t llvmRegnum = llvmRegnums[reg];
133 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
134 }
135
136 /// translateImmediate - Appends an immediate operand to an MCInst.
137 ///
138 /// @param mcInst - The MCInst to append to.
139 /// @param immediate - The immediate value to append.
140 static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
141 mcInst.addOperand(MCOperand::CreateImm(immediate));
142 }
143
144 /// translateRMRegister - Translates a register stored in the R/M field of the
145 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
146 /// @param mcInst - The MCInst to append to.
147 /// @param insn - The internal instruction to extract the R/M field
148 /// from.
149 static void translateRMRegister(MCInst &mcInst,
150 InternalInstruction &insn) {
151 assert(insn.eaBase != EA_BASE_sib && insn.eaBase != EA_BASE_sib64 &&
152 "A R/M register operand may not have a SIB byte");
153
154 switch (insn.eaBase) {
155 case EA_BASE_NONE:
156 llvm_unreachable("EA_BASE_NONE for ModR/M base");
157 break;
158 #define ENTRY(x) case EA_BASE_##x:
159 ALL_EA_BASES
160 #undef ENTRY
161 llvm_unreachable("A R/M register operand may not have a base; "
162 "the operand must be a register.");
163 break;
164 #define ENTRY(x) \
165 case EA_REG_##x: \
166 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
167 ALL_REGS
168 #undef ENTRY
169 default:
170 llvm_unreachable("Unexpected EA base register");
171 }
172 }
173
174 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
175 /// fields of an internal instruction (and possibly its SIB byte) to a memory
176 /// operand in LLVM's format, and appends it to an MCInst.
177 ///
178 /// @param mcInst - The MCInst to append to.
179 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
180 /// from.
181 static void translateRMMemory(MCInst &mcInst,
182 InternalInstruction &insn) {
183 // Addresses in an MCInst are represented as five operands:
184 // 1. basereg (register) The R/M base, or (if there is a SIB) the
185 // SIB base
186 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
187 // scale amount
188 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
189 // the index (which is multiplied by the
190 // scale amount)
191 // 4. displacement (immediate) 0, or the displacement if there is one
192 // 5. segmentreg (register) x86_registerNONE for now, but could be set
193 // if we have segment overrides
194
195 MCOperand baseReg;
196 MCOperand scaleAmount;
197 MCOperand indexReg;
198 MCOperand displacement;
199 MCOperand segmentReg;
200
201 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
202 if (insn.sibBase != SIB_BASE_NONE) {
203 switch (insn.sibBase) {
204 default:
205 llvm_unreachable("Unexpected sibBase");
206 #define ENTRY(x) \
207 case SIB_BASE_##x: \
208 baseReg = MCOperand::CreateReg(X86::x); break;
209 ALL_SIB_BASES
210 #undef ENTRY
211 }
212 } else {
213 baseReg = MCOperand::CreateReg(0);
214 }
215
216 if (insn.sibIndex != SIB_INDEX_NONE) {
217 switch (insn.sibIndex) {
218 default:
219 llvm_unreachable("Unexpected sibIndex");
220 #define ENTRY(x) \
221 case SIB_INDEX_##x: \
222 indexReg = MCOperand::CreateReg(X86::x); break;
223 EA_BASES_32BIT
224 EA_BASES_64BIT
225 #undef ENTRY
226 }
227 } else {
228 indexReg = MCOperand::CreateReg(0);
229 }
230
231 scaleAmount = MCOperand::CreateImm(insn.sibScale);
232 } else {
233 switch (insn.eaBase) {
234 case EA_BASE_NONE:
235 assert(insn.eaDisplacement != EA_DISP_NONE &&
236 "EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
237
238 if (insn.mode == MODE_64BIT)
239 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
240 else
241 baseReg = MCOperand::CreateReg(0);
242
243 indexReg = MCOperand::CreateReg(0);
244 break;
245 case EA_BASE_BX_SI:
246 baseReg = MCOperand::CreateReg(X86::BX);
247 indexReg = MCOperand::CreateReg(X86::SI);
248 break;
249 case EA_BASE_BX_DI:
250 baseReg = MCOperand::CreateReg(X86::BX);
251 indexReg = MCOperand::CreateReg(X86::DI);
252 break;
253 case EA_BASE_BP_SI:
254 baseReg = MCOperand::CreateReg(X86::BP);
255 indexReg = MCOperand::CreateReg(X86::SI);
256 break;
257 case EA_BASE_BP_DI:
258 baseReg = MCOperand::CreateReg(X86::BP);
259 indexReg = MCOperand::CreateReg(X86::DI);
260 break;
261 default:
262 indexReg = MCOperand::CreateReg(0);
263 switch (insn.eaBase) {
264 default:
265 llvm_unreachable("Unexpected eaBase");
266 break;
267 // Here, we will use the fill-ins defined above. However,
268 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
269 // sib and sib64 were handled in the top-level if, so they're only
270 // placeholders to keep the compiler happy.
271 #define ENTRY(x) \
272 case EA_BASE_##x: \
273 baseReg = MCOperand::CreateReg(X86::x); break;
274 ALL_EA_BASES
275 #undef ENTRY
276 #define ENTRY(x) case EA_REG_##x:
277 ALL_REGS
278 #undef ENTRY
279 llvm_unreachable("A R/M memory operand may not be a register; "
280 "the base field must be a base.");
281 break;
282 }
283 }
284 }
285
286 displacement = MCOperand::CreateImm(insn.displacement);
287
288 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
289 0, // SEG_OVERRIDE_NONE
290 X86::CS,
291 X86::SS,
292 X86::DS,
293 X86::ES,
294 X86::FS,
295 X86::GS
296 };
297
298 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
299
300 mcInst.addOperand(baseReg);
301 mcInst.addOperand(scaleAmount);
302 mcInst.addOperand(indexReg);
303 mcInst.addOperand(displacement);
304 mcInst.addOperand(segmentReg);
305 }
306
307 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
308 /// byte of an instruction to LLVM form, and appends it to an MCInst.
309 ///
310 /// @param mcInst - The MCInst to append to.
311 /// @param operand - The operand, as stored in the descriptor table.
312 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
313 /// from.
314 static void translateRM(MCInst &mcInst,
315 OperandSpecifier &operand,
316 InternalInstruction &insn) {
317 switch (operand.type) {
318 default:
319 llvm_unreachable("Unexpected type for a R/M operand");
320 case TYPE_R8:
321 case TYPE_R16:
322 case TYPE_R32:
323 case TYPE_R64:
324 case TYPE_Rv:
325 case TYPE_MM:
326 case TYPE_MM32:
327 case TYPE_MM64:
328 case TYPE_XMM:
329 case TYPE_XMM32:
330 case TYPE_XMM64:
331 case TYPE_XMM128:
332 case TYPE_DEBUGREG:
333 case TYPE_CR32:
334 case TYPE_CR64:
335 translateRMRegister(mcInst, insn);
336 break;
337 case TYPE_M:
338 case TYPE_M8:
339 case TYPE_M16:
340 case TYPE_M32:
341 case TYPE_M64:
342 case TYPE_M128:
343 case TYPE_M512:
344 case TYPE_Mv:
345 case TYPE_M32FP:
346 case TYPE_M64FP:
347 case TYPE_M80FP:
348 case TYPE_M16INT:
349 case TYPE_M32INT:
350 case TYPE_M64INT:
351 case TYPE_M1616:
352 case TYPE_M1632:
353 case TYPE_M1664:
354 translateRMMemory(mcInst, insn);
355 break;
356 }
357 }
358
359 /// translateFPRegister - Translates a stack position on the FPU stack to its
360 /// LLVM form, and appends it to an MCInst.
361 ///
362 /// @param mcInst - The MCInst to append to.
363 /// @param stackPos - The stack position to translate.
364 static void translateFPRegister(MCInst &mcInst,
365 uint8_t stackPos) {
366 assert(stackPos < 8 && "Invalid FP stack position");
367
368 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
369 }
370
371 /// translateOperand - Translates an operand stored in an internal instruction
372 /// to LLVM's format and appends it to an MCInst.
373 ///
374 /// @param mcInst - The MCInst to append to.
375 /// @param operand - The operand, as stored in the descriptor table.
376 /// @param insn - The internal instruction.
377 static void translateOperand(MCInst &mcInst,
378 OperandSpecifier &operand,
379 InternalInstruction &insn) {
380 switch (operand.encoding) {
381 default:
382 llvm_unreachable("Unhandled operand encoding during translation");
383 case ENCODING_REG:
384 translateRegister(mcInst, insn.reg);
385 break;
386 case ENCODING_RM:
387 translateRM(mcInst, operand, insn);
388 break;
389 case ENCODING_CB:
390 case ENCODING_CW:
391 case ENCODING_CD:
392 case ENCODING_CP:
393 case ENCODING_CO:
394 case ENCODING_CT:
395 llvm_unreachable("Translation of code offsets isn't supported.");
396 case ENCODING_IB:
397 case ENCODING_IW:
398 case ENCODING_ID:
399 case ENCODING_IO:
400 case ENCODING_Iv:
401 case ENCODING_Ia:
402 translateImmediate(mcInst,
403 insn.immediates[insn.numImmediatesTranslated++]);
404 break;
405 case ENCODING_RB:
406 case ENCODING_RW:
407 case ENCODING_RD:
408 case ENCODING_RO:
409 translateRegister(mcInst, insn.opcodeRegister);
410 break;
411 case ENCODING_I:
412 translateFPRegister(mcInst, insn.opcodeModifier);
413 break;
414 case ENCODING_Rv:
415 translateRegister(mcInst, insn.opcodeRegister);
416 break;
417 case ENCODING_DUP:
418 translateOperand(mcInst,
419 insn.spec->operands[operand.type - TYPE_DUP0],
420 insn);
421 break;
422 }
423 }
424
425 /// translateInstruction - Translates an internal instruction and all its
426 /// operands to an MCInst.
427 ///
428 /// @param mcInst - The MCInst to populate with the instruction's data.
429 /// @param insn - The internal instruction.
430 static void translateInstruction(MCInst &mcInst,
431 InternalInstruction &insn) {
432 assert(insn.spec);
433
434 mcInst.setOpcode(insn.instructionID);
435
436 int index;
437
438 insn.numImmediatesTranslated = 0;
439
440 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
441 if (insn.spec->operands[index].encoding != ENCODING_NONE)
442 translateOperand(mcInst, insn.spec->operands[index], insn);
443 }
444 }
13445
14446 static const MCDisassembler *createX86_32Disassembler(const Target &T) {
15 return 0;
447 return new X86Disassembler::X86_32Disassembler;
16448 }
17449
18450 static const MCDisassembler *createX86_64Disassembler(const Target &T) {
19 return 0;
451 return new X86Disassembler::X86_64Disassembler;
20452 }
21453
22454 extern "C" void LLVMInitializeX86Disassembler() {
0 //===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
10 // 64-bit X86 instruction sets. The main decode sequence for an assembly
11 // instruction in this disassembler is:
12 //
13 // 1. Read the prefix bytes and determine the attributes of the instruction.
14 // These attributes, recorded in enum attributeBits
15 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
16 // provides a mapping from bitmasks to contexts, which are represented by
17 // enum InstructionContext (ibid.).
18 //
19 // 2. Read the opcode, and determine what kind of opcode it is. The
20 // disassembler distinguishes four kinds of opcodes, which are enumerated in
21 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
22 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
23 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
24 //
25 // 3. Depending on the opcode type, look in one of four ClassDecision structures
26 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
27 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
28 // a ModRMDecision (ibid.).
29 //
30 // 4. Some instructions, such as escape opcodes or extended opcodes, or even
31 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
32 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from
33 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
34 // ModR/M byte is required and how to interpret it.
35 //
36 // 5. After resolving the ModRMDecision, the disassembler has a unique ID
37 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
38 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
39 // meanings of its operands.
40 //
41 // 6. For each operand, its encoding is an entry from OperandEncoding
42 // (X86DisassemblerDecoderCommon.h) and its type is an entry from
43 // OperandType (ibid.). The encoding indicates how to read it from the
44 // instruction; the type indicates how to interpret the value once it has
45 // been read. For example, a register operand could be stored in the R/M
46 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to
47 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM
48 // register, for instance). Given this information, the operands can be
49 // extracted and interpreted.
50 //
51 // 7. As the last step, the disassembler translates the instruction information
52 // and operands into a format understandable by the client - in this case, an
53 // MCInst for use by the MC infrastructure.
54 //
55 // The disassembler is broken broadly into two parts: the table emitter that
56 // emits the instruction decode tables discussed above during compilation, and
57 // the disassembler itself. The table emitter is documented in more detail in
58 // utils/TableGen/X86DisassemblerEmitter.h.
59 //
60 // X86Disassembler.h contains the public interface for the disassembler,
61 // adhering to the MCDisassembler interface.
62 // X86Disassembler.cpp contains the code responsible for step 7, and for
63 // invoking the decoder to execute steps 1-6.
64 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
65 // table emitter and the disassembler.
66 // X86DisassemblerDecoder.h contains the public interface of the decoder,
67 // factored out into C for possible use by other projects.
68 // X86DisassemblerDecoder.c contains the source code of the decoder, which is
69 // responsible for steps 1-6.
70 //
71 //===----------------------------------------------------------------------===//
72
73 #ifndef X86DISASSEMBLER_H
74 #define X86DISASSEMBLER_H
75
76 #define INSTRUCTION_SPECIFIER_FIELDS \
77 const char* name;
78
79 #define INSTRUCTION_IDS \
80 InstrUID* instructionIDs;
81
82 #include "X86DisassemblerDecoderCommon.h"
83
84 #undef INSTRUCTION_SPECIFIER_FIELDS
85 #undef INSTRUCTION_IDS
86
87 #include "llvm/MC/MCDisassembler.h"
88
89 struct InternalInstruction;
90
91 namespace llvm {
92
93 class MCInst;
94 class MemoryObject;
95 class raw_ostream;
96
97 namespace X86Disassembler {
98
99 /// X86GenericDisassembler - Generic disassembler for all X86 platforms.
100 /// All each platform class should have to do is subclass the constructor, and
101 /// provide a different disassemblerMode value.
102 class X86GenericDisassembler : public MCDisassembler {
103 protected:
104 /// Constructor - Initializes the disassembler.
105 ///
106 /// @param mode - The X86 architecture mode to decode for.
107 X86GenericDisassembler(DisassemblerMode mode);
108 public:
109 ~X86GenericDisassembler();
110
111 /// getInstruction - See MCDisassembler.
112 bool getInstruction(MCInst &instr,
113 uint64_t &size,
114 const MemoryObject ®ion,
115 uint64_t address,
116 raw_ostream &vStream) const;
117 private:
118 DisassemblerMode fMode;
119 };
120
121 /// X86_16Disassembler - 16-bit X86 disassembler.
122 class X86_16Disassembler : public X86GenericDisassembler {
123 public:
124 X86_16Disassembler() :
125 X86GenericDisassembler(MODE_16BIT) {
126 }
127 };
128
129 /// X86_16Disassembler - 32-bit X86 disassembler.
130 class X86_32Disassembler : public X86GenericDisassembler {
131 public:
132 X86_32Disassembler() :
133 X86GenericDisassembler(MODE_32BIT) {
134 }
135 };
136
137 /// X86_16Disassembler - 64-bit X86 disassembler.
138 class X86_64Disassembler : public X86GenericDisassembler {
139 public:
140 X86_64Disassembler() :
141 X86GenericDisassembler(MODE_64BIT) {
142 }
143 };
144
145 } // namespace X86Disassembler
146
147 } // namespace llvm
148
149 #endif
0 /*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
1 *
2 * The LLVM Compiler Infrastructure
3 *
4 * This file is distributed under the University of Illinois Open Source
5 * License. See LICENSE.TXT for details.
6 *
7 *===----------------------------------------------------------------------===*
8 *
9 * This file is part of the X86 Disassembler.
10 * It contains the implementation of the instruction decoder.
11 * Documentation for the disassembler can be found in X86Disassembler.h.
12 *
13 *===----------------------------------------------------------------------===*/
14
15 #include /* for assert() */
16 #include /* for va_*() */
17 #include /* for vsnprintf() */
18 #include /* for exit() */
19 #include /* for bzero() */
20
21 #include "X86DisassemblerDecoder.h"
22
23 #include "X86GenDisassemblerTables.inc"
24
25 #define TRUE 1
26 #define FALSE 0
27
28 #ifdef __GNUC__
29 #define NORETURN __attribute__((noreturn))
30 #else
31 #define NORETURN
32 #endif
33
34 #define unreachable(s) \
35 do { \
36 fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, s); \
37 exit(-1); \
38 } while (0);
39
40 /*
41 * contextForAttrs - Client for the instruction context table. Takes a set of
42 * attributes and returns the appropriate decode context.
43 *
44 * @param attrMask - Attributes, from the enumeration attributeBits.
45 * @return - The InstructionContext to use when looking up an
46 * an instruction with these attributes.
47 */
48 static inline InstructionContext contextForAttrs(uint8_t attrMask) {
49 return CONTEXTS_SYM[attrMask];
50 }
51
52 /*
53 * modRMRequired - Reads the appropriate instruction table to determine whether
54 * the ModR/M byte is required to decode a particular instruction.
55 *
56 * @param type - The opcode type (i.e., how many bytes it has).
57 * @param insnContext - The context for the instruction, as returned by
58 * contextForAttrs.
59 * @param opcode - The last byte of the instruction's opcode, not counting
60 * ModR/M extensions and escapes.
61 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
62 */
63 static inline int modRMRequired(OpcodeType type,
64 InstructionContext insnContext,
65 uint8_t opcode) {
66 const struct ContextDecision* decision;
67
68 switch (type) {
69 case ONEBYTE:
70 decision = &ONEBYTE_SYM;
71 break;
72 case TWOBYTE:
73 decision = &TWOBYTE_SYM;
74 break;
75 case THREEBYTE_38:
76 decision = &THREEBYTE38_SYM;
77 break;
78 case THREEBYTE_3A:
79 decision = &THREEBYTE3A_SYM;
80 break;
81 }
82
83 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
84 modrm_type != MODRM_ONEENTRY;
85
86 unreachable("Unknown opcode type");
87 return 0;
88 }
89
90 /*
91 * decode - Reads the appropriate instruction table to obtain the unique ID of
92 * an instruction.
93 *
94 * @param type - See modRMRequired().
95 * @param insnContext - See modRMRequired().
96 * @param opcode - See modRMRequired().
97 * @param modRM - The ModR/M byte if required, or any value if not.
98 */
99 static inline InstrUID decode(OpcodeType type,
100 InstructionContext insnContext,
101 uint8_t opcode,
102 uint8_t modRM) {
103 struct ModRMDecision* dec;
104
105 switch (type) {
106 default:
107 unreachable("Unknown opcode type");
108 case ONEBYTE:
109 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
110 break;
111 case TWOBYTE:
112 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
113 break;
114 case THREEBYTE_38:
115 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
116 break;
117 case THREEBYTE_3A:
118 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
119 break;
120 }
121
122 switch (dec->modrm_type) {
123 default:
124 unreachable("Corrupt table! Unknown modrm_type");
125 case MODRM_ONEENTRY:
126 return dec->instructionIDs[0];
127 case MODRM_SPLITRM:
128 if (modFromModRM(modRM) == 0x3)
129 return dec->instructionIDs[1];
130 else
131 return dec->instructionIDs[0];
132 case MODRM_FULL:
133 return dec->instructionIDs[modRM];
134 }
135
136 return 0;
137 }
138
139 /*
140 * specifierForUID - Given a UID, returns the name and operand specification for
141 * that instruction.
142 *
143 * @param uid - The unique ID for the instruction. This should be returned by
144 * decode(); specifierForUID will not check bounds.
145 * @return - A pointer to the specification for that instruction.
146 */
147 static inline struct InstructionSpecifier* specifierForUID(InstrUID uid) {
148 return &INSTRUCTIONS_SYM[uid];
149 }
150
151 /*
152 * consumeByte - Uses the reader function provided by the user to consume one
153 * byte from the instruction's memory and advance the cursor.
154 *
155 * @param insn - The instruction with the reader function to use. The cursor
156 * for this instruction is advanced.
157 * @param byte - A pointer to a pre-allocated memory buffer to be populated
158 * with the data read.
159 * @return - 0 if the read was successful; nonzero otherwise.
160 */
161 static inline int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
162 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
163
164 if (!ret)
165 ++(insn->readerCursor);
166
167 return ret;
168 }
169
170 /*
171 * lookAtByte - Like consumeByte, but does not advance the cursor.
172 *
173 * @param insn - See consumeByte().
174 * @param byte - See consumeByte().
175 * @return - See consumeByte().
176 */
177 static inline int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
178 return insn->reader(insn->readerArg, byte, insn->readerCursor);
179 }
180
181 static inline void unconsumeByte(struct InternalInstruction* insn) {
182 insn->readerCursor--;
183 }
184
185 #define CONSUME_FUNC(name, type) \
186 static inline int name(struct InternalInstruction* insn, type* ptr) { \
187 type combined = 0; \
188 unsigned offset; \
189 for (offset = 0; offset < sizeof(type); ++offset) { \
190 uint8_t byte; \
191 int ret = insn->reader(insn->readerArg, \
192 &byte, \
193 insn->readerCursor + offset); \
194 if (ret) \
195 return ret; \
196 combined = combined | ((type)byte << ((type)offset * 8)); \
197 } \
198 *ptr = combined; \
199 insn->readerCursor += sizeof(type); \
200 return 0; \
201 }
202
203 /*
204 * consume* - Use the reader function provided by the user to consume data
205 * values of various sizes from the instruction's memory and advance the
206 * cursor appropriately. These readers perform endian conversion.
207 *
208 * @param insn - See consumeByte().
209 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
210 * be populated with the data read.
211 * @return - See consumeByte().
212 */
213 CONSUME_FUNC(consumeInt8, int8_t)
214 CONSUME_FUNC(consumeInt16, int16_t)
215 CONSUME_FUNC(consumeInt32, int32_t)
216 CONSUME_FUNC(consumeUInt16, uint16_t)
217 CONSUME_FUNC(consumeUInt32, uint32_t)
218 CONSUME_FUNC(consumeUInt64, uint64_t)
219
220 /*
221 * dprintf - Uses the logging function provided by the user to log a single
222 * message, typically without a carriage-return.
223 *
224 * @param insn - The instruction containing the logging function.
225 * @param format - See printf().
226 * @param ... - See printf().
227 */
228 static inline void dprintf(struct InternalInstruction* insn,
229 const char* format,
230 ...) {
231 char buffer[256];
232 va_list ap;
233
234 if (!insn->dlog)
235 return;
236
237 va_start(ap, format);
238 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
239 va_end(ap);
240
241 insn->dlog(insn->dlogArg, buffer);
242
243 return;
244 }
245
246 /*
247 * setPrefixPresent - Marks that a particular prefix is present at a particular
248 * location.
249 *
250 * @param insn - The instruction to be marked as having the prefix.
251 * @param prefix - The prefix that is present.
252 * @param location - The location where the prefix is located (in the address
253 * space of the instruction's reader).
254 */
255 static inline void setPrefixPresent(struct InternalInstruction* insn,
256 uint8_t prefix,
257 uint64_t location)
258 {
259 insn->prefixPresent[prefix] = 1;
260 insn->prefixLocations[prefix] = location;
261 }
262
263 /*
264 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
265 * present at a given location.
266 *
267 * @param insn - The instruction to be queried.
268 * @param prefix - The prefix.
269 * @param location - The location to query.
270 * @return - Whether the prefix is at that location.
271 */
272 static inline BOOL isPrefixAtLocation(struct InternalInstruction* insn,
273 uint8_t prefix,
274 uint64_t location)
275 {
276 if (insn->prefixPresent[prefix] == 1 &&
277 insn->prefixLocations[prefix] == location)
278 return TRUE;
279 else
280 return FALSE;
281 }
282
283 /*
284 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
285 * instruction as having them. Also sets the instruction's default operand,
286 * address, and other relevant data sizes to report operands correctly.
287 *
288 * @param insn - The instruction whose prefixes are to be read.
289 * @return - 0 if the instruction could be read until the end of the prefix
290 * bytes, and no prefixes conflicted; nonzero otherwise.
291 */
292 static int readPrefixes(struct InternalInstruction* insn) {
293 BOOL isPrefix = TRUE;
294 BOOL prefixGroups[4] = { FALSE };
295 uint64_t prefixLocation;
296 uint8_t byte;
297
298 BOOL hasAdSize = FALSE;
299 BOOL hasOpSize = FALSE;
300
301 dprintf(insn, "readPrefixes()");
302
303 while (isPrefix) {
304 prefixLocation = insn->readerCursor;
305
306 if (consumeByte(insn, &byte))
307 return -1;
308
309 switch (byte) {
310 case 0xf0: /* LOCK */
311 case 0xf2: /* REPNE/REPNZ */
312 case 0xf3: /* REP or REPE/REPZ */
313 if (prefixGroups[0])
314 dprintf(insn, "Redundant Group 1 prefix");
315 prefixGroups[0] = TRUE;
316 setPrefixPresent(insn, byte, prefixLocation);
317 break;
318 case 0x2e: /* CS segment override -OR- Branch not taken */
319 case 0x36: /* SS segment override -OR- Branch taken */
320 case 0x3e: /* DS segment override */
321 case 0x26: /* ES segment override */
322 case 0x64: /* FS segment override */
323 case 0x65: /* GS segment override */
324 switch (byte) {
325 case 0x2e:
326 insn->segmentOverride = SEG_OVERRIDE_CS;
327 break;
328 case 0x36:
329 insn->segmentOverride = SEG_OVERRIDE_SS;
330 break;
331 case 0x3e:
332 insn->segmentOverride = SEG_OVERRIDE_DS;
333 break;
334 case 0x26:
335 insn->segmentOverride = SEG_OVERRIDE_ES;
336 break;
337 case 0x64:
338 insn->segmentOverride = SEG_OVERRIDE_FS;
339 break;
340 case 0x65:
341 insn->segmentOverride = SEG_OVERRIDE_GS;
342 break;
343 default:
344 unreachable("Unhandled override");
345 }
346 if (prefixGroups[1])
347 dprintf(insn, "Redundant Group 2 prefix");
348 prefixGroups[1] = TRUE;
349 setPrefixPresent(insn, byte, prefixLocation);
350 break;
351 case 0x66: /* Operand-size override */
352 if (prefixGroups[2])
353 dprintf(insn, "Redundant Group 3 prefix");
354 prefixGroups[2] = TRUE;
355 hasOpSize = TRUE;
356 setPrefixPresent(insn, byte, prefixLocation);
357 break;
358 case 0x67: /* Address-size override */
359 if (prefixGroups[3])
360 dprintf(insn, "Redundant Group 4 prefix");
361 prefixGroups[3] = TRUE;
362 hasAdSize = TRUE;
363 setPrefixPresent(insn, byte, prefixLocation);
364 break;
365 default: /* Not a prefix byte */
366 isPrefix = FALSE;
367 break;
368 }
369
370 if (isPrefix)
371 dprintf(insn, "Found prefix 0x%hhx", byte);
372 }
373
374 if (insn->mode == MODE_64BIT) {
375 if ((byte & 0xf0) == 0x40) {
376 uint8_t opcodeByte;
377
378 if(lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
379 dprintf(insn, "Redundant REX prefix");
380 return -1;
381 }
382
383 insn->rexPrefix = byte;
384 insn->necessaryPrefixLocation = insn->readerCursor - 2;
385
386 dprintf(insn, "Found REX prefix 0x%hhx", byte);
387 } else {
388 unconsumeByte(insn);
389 insn->necessaryPrefixLocation = insn->readerCursor - 1;
390 }
391 } else {
392 unconsumeByte(insn);
393 }
394
395 if (insn->mode == MODE_16BIT) {
396 insn->registerSize = (hasOpSize ? 4 : 2);
397 insn->addressSize = (hasAdSize ? 4 : 2);
398 insn->displacementSize = (hasAdSize ? 4 : 2);
399 insn->immediateSize = (hasOpSize ? 4 : 2);
400 } else if (insn->mode == MODE_32BIT) {
401 insn->registerSize = (hasOpSize ? 2 : 4);
402 insn->addressSize = (hasAdSize ? 2 : 4);
403 insn->displacementSize = (hasAdSize ? 2 : 4);
404 insn->immediateSize = (hasAdSize ? 2 : 4);
405 } else if (insn->mode == MODE_64BIT) {
406 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
407 insn->registerSize = 8;
408 insn->addressSize = (hasAdSize ? 4 : 8);
409 insn->displacementSize = 4;
410 insn->immediateSize = 4;
411 } else if (insn->rexPrefix) {
412 insn->registerSize = (hasOpSize ? 2 : 4);
413 insn->addressSize = (hasAdSize ? 4 : 8);
414 insn->displacementSize = (hasOpSize ? 2 : 4);
415 insn->immediateSize = (hasOpSize ? 2 : 4);
416 } else {
417 insn->registerSize = (hasOpSize ? 2 : 4);
418 insn->addressSize = (hasAdSize ? 4 : 8);
419 insn->displacementSize = (hasOpSize ? 2 : 4);
420 insn->immediateSize = (hasOpSize ? 2 : 4);
421 }
422 }
423
424 return 0;
425 }
426
427 /*
428 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
429 * extended or escape opcodes).
430 *
431 * @param insn - The instruction whose opcode is to be read.
432 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
433 */
434 static int readOpcode(struct InternalInstruction* insn) {
435 /* Determine the length of the primary opcode */
436
437 uint8_t current;
438
439 dprintf(insn, "readOpcode()");
440
441 insn->opcodeType = ONEBYTE;
442 if (consumeByte(insn, ¤t))
443 return -1;
444
445 if (current == 0x0f) {
446 dprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
447
448 insn->twoByteEscape = current;
449
450 if (consumeByte(insn, ¤t))
451 return -1;
452
453 if (current == 0x38) {
454 dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
455
456 insn->threeByteEscape = current;
457
458 if (consumeByte(insn, ¤t))
459 return -1;
460
461 insn->opcodeType = THREEBYTE_38;
462 } else if (current == 0x3a) {
463 dprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
464
465 insn->threeByteEscape = current;
466
467 if (consumeByte(insn, ¤t))
468 return -1;
469
470 insn->opcodeType = THREEBYTE_3A;
471 } else {
472 dprintf(insn, "Didn't find a three-byte escape prefix");
473
474 insn->opcodeType = TWOBYTE;
475 }
476 }
477
478 /*
479 * At this point we have consumed the full opcode.
480 * Anything we consume from here on must be unconsumed.
481 */
482
483 insn->opcode = current;
484
485 return 0;
486 }
487
488 static int readModRM(struct InternalInstruction* insn);
489
490 /*
491 * getIDWithAttrMask - Determines the ID of an instruction, consuming
492 * the ModR/M byte as appropriate for extended and escape opcodes,
493 * and using a supplied attribute mask.
494 *
495 * @param instructionID - A pointer whose target is filled in with the ID of the
496 * instruction.
497 * @param insn - The instruction whose ID is to be determined.
498 * @param attrMask - The attribute mask to search.
499 * @return - 0 if the ModR/M could be read when needed or was not
500 * needed; nonzero otherwise.
501 */
502 static int getIDWithAttrMask(uint16_t* instructionID,
503 struct InternalInstruction* insn,
504 uint8_t attrMask) {
505 BOOL hasModRMExtension;
506
507 uint8_t instructionClass;
508
509 instructionClass = contextForAttrs(attrMask);
510
511 hasModRMExtension = modRMRequired(insn->opcodeType,
512 instructionClass,
513 insn->opcode);
514
515 if (hasModRMExtension) {
516 readModRM(insn);
517
518 *instructionID = decode(insn->opcodeType,
519 instructionClass,
520 insn->opcode,
521 insn->modRM);
522 } else {
523 *instructionID = decode(insn->opcodeType,
524 instructionClass,
525 insn->opcode,
526 0);
527 }
528
529 return 0;
530 }
531
532 /*
533 * is16BitEquivalent - Determines whether two instruction names refer to
534 * equivalent instructions but one is 16-bit whereas the other is not.
535 *
536 * @param orig - The instruction that is not 16-bit
537 * @param equiv - The instruction that is 16-bit
538 */
539 static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
540 off_t i;
541
542 for(i = 0;; i++) {
543 if(orig[i] == '\0' && equiv[i] == '\0')
544 return TRUE;
545 if(orig[i] == '\0' || equiv[i] == '\0')
546 return FALSE;
547 if(orig[i] != equiv[i]) {
548 if((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
549 continue;
550 if((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
551 continue;
552 if((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
553 continue;
554 return FALSE;
555 }
556 }
557 }
558
559 /*
560 * is64BitEquivalent - Determines whether two instruction names refer to
561 * equivalent instructions but one is 64-bit whereas the other is not.
562 *
563 * @param orig - The instruction that is not 64-bit
564 * @param equiv - The instruction that is 64-bit
565 */
566 static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
567 off_t i;
568
569 for(i = 0;; i++) {
570 if(orig[i] == '\0' && equiv[i] == '\0')
571 return TRUE;
572 if(orig[i] == '\0' || equiv[i] == '\0')
573 return FALSE;
574 if(orig[i] != equiv[i]) {
575 if((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
576 continue;
577 if((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
578 continue;
579 if((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
580 continue;
581 return FALSE;
582 }
583 }
584 }
585
586
587 /*
588 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
589 * appropriate for extended and escape opcodes. Determines the attributes and
590 * context for the instruction before doing so.
591 *
592 * @param insn - The instruction whose ID is to be determined.
593 * @return - 0 if the ModR/M could be read when needed or was not needed;
594 * nonzero otherwise.
595 */
596 static int getID(struct InternalInstruction* insn) {
597 uint8_t attrMask;
598 uint16_t instructionID;
599
600 dprintf(insn, "getID()");
601
602 attrMask = ATTR_NONE;
603
604 if (insn->mode == MODE_64BIT)
605 attrMask |= ATTR_64BIT;
606
607 if (insn->rexPrefix & 0x08)
608 attrMask |= ATTR_REXW;
609
610 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
611 attrMask |= ATTR_OPSIZE;
612 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
613 attrMask |= ATTR_XS;
614 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
615 attrMask |= ATTR_XD;
616
617 if(getIDWithAttrMask(&instructionID, insn, attrMask))
618 return -1;
619
620 /* The following clauses compensate for limitations of the tables. */
621
622 if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
623 /*
624 * Although for SSE instructions it is usually necessary to treat REX.W+F2
625 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
626 * an occasional instruction where F2 is incidental and REX.W is the more
627 * significant. If the decoded instruction is 32-bit and adding REX.W
628 * instead of F2 changes a 32 to a 64, we adopt the new encoding.
629 */
630
631 struct InstructionSpecifier* spec;
632 uint16_t instructionIDWithREXw;
633 struct InstructionSpecifier* specWithREXw;
634
635 spec = specifierForUID(instructionID);
636
637 if (getIDWithAttrMask(&instructionIDWithREXw,
638 insn,
639 attrMask & (~ATTR_XD))) {
640 /*
641 * Decoding with REX.w would yield nothing; give up and return original
642 * decode.
643 */
644
645 insn->instructionID = instructionID;
646 insn->spec = spec;
647 return 0;
648 }
649
650 specWithREXw = specifierForUID(instructionIDWithREXw);
651
652 if (is64BitEquivalent(spec->name, specWithREXw->name)) {
653 insn->instructionID = instructionIDWithREXw;
654 insn->spec = specWithREXw;
655 } else {
656 insn->instructionID = instructionID;
657 insn->spec = spec;
658 }
659 return 0;
660 }
661
662 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
663 /*
664 * The instruction tables make no distinction between instructions that
665 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
666 * particular spot (i.e., many MMX operations). In general we're
667 * conservative, but in the specific case where OpSize is present but not
668 * in the right place we check if there's a 16-bit operation.
669 */
670
671 struct InstructionSpecifier* spec;
672 uint16_t instructionIDWithOpsize;
673 struct InstructionSpecifier* specWithOpsize;
674
675 spec = specifierForUID(instructionID);
676
677 if (getIDWithAttrMask(&instructionIDWithOpsize,
678 insn,
679 attrMask | ATTR_OPSIZE)) {
680 /*
681 * ModRM required with OpSize but not present; give up and return version
682 * without OpSize set
683 */
684
685 insn->instructionID = instructionID;
686 insn->spec = spec;
687 return 0;
688 }
689
690 specWithOpsize = specifierForUID(instructionIDWithOpsize);
691
692 if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
693 insn->instructionID = instructionIDWithOpsize;
694 insn->spec = specWithOpsize;
695 } else {
696 insn->instructionID = instructionID;
697 insn->spec = spec;
698 }
699 return 0;
700 }
701
702 insn->instructionID = instructionID;
703 insn->spec = specifierForUID(insn->instructionID);
704
705 return 0;
706 }
707
708 /*
709 * readSIB - Consumes the SIB byte to determine addressing information for an
710 * instruction.
711 *
712 * @param insn - The instruction whose SIB byte is to be read.
713 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
714 */
715 static int readSIB(struct InternalInstruction* insn) {
716 SIBIndex sibIndexBase;
717 SIBBase sibBaseBase;
718 uint8_t index, base;
719
720 dprintf(insn, "readSIB()");
721
722 if (insn->consumedSIB)
723 return 0;
724
725 insn->consumedSIB = TRUE;
726
727 switch (insn->addressSize) {
728 case 2:
729 dprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
730 return -1;
731 break;
732 case 4:
733 sibIndexBase = SIB_INDEX_EAX;
734 sibBaseBase = SIB_BASE_EAX;
735 break;
736 case 8:
737 sibIndexBase = SIB_INDEX_RAX;
738 sibBaseBase = SIB_BASE_RAX;
739 break;
740 }
741
742 if (consumeByte(insn, &insn->sib))
743 return -1;
744
745 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
746
747 switch (index) {
748 case 0x4:
749 insn->sibIndex = SIB_INDEX_NONE;
750 break;
751 default:
752 insn->sibIndex = (EABase)(sibIndexBase + index);
753 if (insn->sibIndex == SIB_INDEX_sib ||
754 insn->sibIndex == SIB_INDEX_sib64)
755 insn->sibIndex = SIB_INDEX_NONE;
756 break;
757 }
758
759 switch (scaleFromSIB(insn->sib)) {
760 case 0:
761 insn->sibScale = 1;
762 break;
763 case 1:
764 insn->sibScale = 2;
765 break;
766 case 2:
767 insn->sibScale = 4;
768 break;
769 case 3:
770 insn->sibScale = 8;
771 break;
772 }
773
774 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
775
776 switch (base) {
777 case 0x5:
778 switch (modFromModRM(insn->modRM)) {
779 case 0x0:
780 insn->eaDisplacement = EA_DISP_32;
781 insn->sibBase = SIB_BASE_NONE;
782 break;
783 case 0x1:
784 insn->eaDisplacement = EA_DISP_8;
785 insn->sibBase = (insn->addressSize == 4 ?
786 SIB_BASE_EBP : SIB_BASE_RBP);
787 break;
788 case 0x2:
789 insn->eaDisplacement = EA_DISP_32;
790 insn->sibBase = (insn->addressSize == 4 ?
791 SIB_BASE_EBP : SIB_BASE_RBP);
792 break;
793 case 0x3:
794 unreachable("Cannot have Mod = 0b11 and a SIB byte");
795 }
796 break;
797 default:
798 insn->sibBase = (EABase)(sibBaseBase + base);
799 break;
800 }
801
802 return 0;
803 }
804
805 /*
806 * readDisplacement - Consumes the displacement of an instruction.
807 *
808 * @param insn - The instruction whose displacement is to be read.
809 * @return - 0 if the displacement byte was successfully read; nonzero
810 * otherwise.
811 */
812 static int readDisplacement(struct InternalInstruction* insn) {
813 int8_t d8;
814 int16_t d16;
815 int32_t d32;
816
817 dprintf(insn, "readDisplacement()");
818
819 if (insn->consumedDisplacement)
820 return 0;
821
822 insn->consumedDisplacement = TRUE;
823
824 switch (insn->eaDisplacement) {
825 case EA_DISP_NONE:
826 insn->consumedDisplacement = FALSE;
827 break;
828 case EA_DISP_8:
829 if (consumeInt8(insn, &d8))
830 return -1;
831 insn->displacement = d8;
832 break;
833 case EA_DISP_16:
834 if (consumeInt16(insn, &d16))
835 return -1;
836 insn->displacement = d16;
837 break;
838 case EA_DISP_32:
839 if (consumeInt32(insn, &d32))
840 return -1;
841 insn->displacement = d32;
842 break;
843 }
844
845 insn->consumedDisplacement = TRUE;
846 return 0;
847 }
848
849 /*
850 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
851 * displacement) for an instruction and interprets it.
852 *
853 * @param insn - The instruction whose addressing information is to be read.
854 * @return - 0 if the information was successfully read; nonzero otherwise.
855 */
856 static int readModRM(struct InternalInstruction* insn) {
857 uint8_t mod, rm, reg;
858
859 dprintf(insn, "readModRM()");
860
861 if (insn->consumedModRM)
862 return 0;
863
864 consumeByte(insn, &insn->modRM);
865 insn->consumedModRM = TRUE;
866
867 mod = modFromModRM(insn->modRM);
868 rm = rmFromModRM(insn->modRM);
869 reg = regFromModRM(insn->modRM);
870
871 /*
872 * This goes by insn->registerSize to pick the correct register, which messes
873 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
874 * fixupReg().
875 */
876 switch (insn->registerSize) {
877 case 2:
878 insn->regBase = REG_AX;
879 insn->eaRegBase = EA_REG_AX;
880 break;
881 case 4:
882 insn->regBase = REG_EAX;
883 insn->eaRegBase = EA_REG_EAX;
884 break;
885 case 8:
886 insn->regBase = REG_RAX;
887 insn->eaRegBase = EA_REG_RAX;
888 break;
889 }
890
891 reg |= rFromREX(insn->rexPrefix) << 3;
892 rm |= bFromREX(insn->rexPrefix) << 3;
893
894 insn->reg = (Reg)(insn->regBase + reg);
895
896 switch (insn->addressSize) {
897 case 2:
898 insn->eaBaseBase = EA_BASE_BX_SI;
899
900 switch (mod) {
901 case 0x0:
902 if (rm == 0x6) {
903 insn->eaBase = EA_BASE_NONE;
904 insn->eaDisplacement = EA_DISP_16;
905 if(readDisplacement(insn))
906 return -1;
907 } else {
908 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
909 insn->eaDisplacement = EA_DISP_NONE;
910 }
911 break;
912 case 0x1:
913 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
914 insn->eaDisplacement = EA_DISP_8;
915 if(readDisplacement(insn))
916 return -1;
917 break;
918 case 0x2:
919 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
920 insn->eaDisplacement = EA_DISP_16;
921 if(readDisplacement(insn))
922 return -1;
923 break;
924 case 0x3:
925 insn->eaBase = (EABase)(insn->eaRegBase + rm);
926 if(readDisplacement(insn))
927 return -1;
928 break;
929 }
930 break;
931 case 4:
932 case 8:
933 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
934
935 switch (mod) {
936 case 0x0:
937 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
938 switch (rm) {
939 case 0x4:
940 case 0xc: /* in case REXW.b is set */
941 insn->eaBase = (insn->addressSize == 4 ?
942 EA_BASE_sib : EA_BASE_sib64);
943 readSIB(insn);
944 if(readDisplacement(insn))
945 return -1;
946 break;
947 case 0x5:
948 insn->eaBase = EA_BASE_NONE;
949 insn->eaDisplacement = EA_DISP_32;
950 if(readDisplacement(insn))
951 return -1;
952 break;
953 default:
954 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
955 break;
956 }
957 break;
958 case 0x1:
959 case 0x2:
960 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
961 switch (rm) {
962 case 0x4:
963 case 0xc: /* in case REXW.b is set */
964 insn->eaBase = EA_BASE_sib;
965 readSIB(insn);
966 if(readDisplacement(insn))
967 return -1;
968 break;
969 default:
970 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
971 if(readDisplacement(insn))
972 return -1;
973 break;
974 }
975 break;
976 case 0x3:
977 insn->eaDisplacement = EA_DISP_NONE;
978 insn->eaBase = (EABase)(insn->eaRegBase + rm);
979 break;
980 }
981 break;
982 } /* switch (insn->addressSize) */
983
984 return 0;
985 }
986
987 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
988 static uint8_t name(struct InternalInstruction *insn, \
989 OperandType type, \
990 uint8_t index, \
991 uint8_t *valid) { \
992 *valid = 1; \
993 switch (type) { \
994 default: \
995 unreachable("Unhandled register type"); \
996 case TYPE_Rv: \
997 return base + index; \
998 case TYPE_R8: \
999 if(insn->rexPrefix && \
1000 index >= 4 && index <= 7) { \
1001 return prefix##_SPL + (index - 4); \
1002 } else { \
1003 return prefix##_AL + index; \
1004 } \
1005 case TYPE_R16: \
1006 return prefix##_AX + index; \
1007 case TYPE_R32: \
1008 return prefix##_EAX + index; \
1009 case TYPE_R64: \
1010 return prefix##_RAX + index; \
1011 case TYPE_XMM128: \
1012 case TYPE_XMM64: \
1013 case TYPE_XMM32: \
1014 case TYPE_XMM: \
1015 return prefix##_XMM0 + index; \
1016 case TYPE_MM64: \
1017 case TYPE_MM32: \
1018 case TYPE_MM: \
1019 if(index > 7) \
1020 *valid = 0; \
1021 return prefix##_MM0 + index; \
1022 case TYPE_SEGMENTREG: \
1023 if(index > 5) \
1024 *valid = 0; \
1025 return prefix##_ES + index; \
1026 case TYPE_DEBUGREG: \
1027 if(index > 7) \
1028 *valid = 0; \
1029 return prefix##_DR0 + index; \
1030 case TYPE_CR32: \
1031 if(index > 7) \
1032 *valid = 0; \
1033 return prefix##_ECR0 + index; \
1034 case TYPE_CR64: \
1035 if(index > 8) \
1036 *valid = 0; \
1037 return prefix##_RCR0 + index; \
1038 } \
1039 }
1040
1041 /*
1042 * fixup*Value - Consults an operand type to determine the meaning of the
1043 * reg or R/M field. If the operand is an XMM operand, for example, an
1044 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1045 * misinterpret it as.
1046 *
1047 * @param insn - The instruction containing the operand.
1048 * @param type - The operand type.
1049 * @param index - The existing value of the field as reported by readModRM().
1050 * @param valid - The address of a uint8_t. The target is set to 1 if the
1051 * field is valid for the register class; 0 if not.
1052 */
1053 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, REG)
1054 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1055
1056 /*
1057 * fixupReg - Consults an operand specifier to determine which of the
1058 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1059 *
1060 * @param insn - See fixup*Value().
1061 * @param op - The operand specifier.
1062 * @return - 0 if fixup was successful; -1 if the register returned was
1063 * invalid for its class.
1064 */
1065 static int fixupReg(struct InternalInstruction *insn,
1066 struct OperandSpecifier *op) {
1067 uint8_t valid;
1068
1069 dprintf(insn, "fixupReg()");
1070
1071 switch ((OperandEncoding)op->encoding) {
1072 default:
1073 unreachable("Expected a REG or R/M encoding in fixupReg");
1074 case ENCODING_REG:
1075 insn->reg = (Reg)fixupRegValue(insn,
1076 (OperandType)op->type,
1077 insn->reg - insn->regBase,
1078 &valid);
1079 if (!valid)
1080 return -1;
1081 break;
1082 case ENCODING_RM:
1083 if (insn->eaBase >= insn->eaRegBase) {
1084 insn->eaBase = (EABase)fixupRMValue(insn,
1085 (OperandType)op->type,
1086 insn->eaBase - insn->eaRegBase,
1087 &valid);
1088 if (!valid)
1089 return -1;
1090 }
1091 break;
1092 }
1093
1094 return 0;
1095 }
1096
1097 /*
1098 * readOpcodeModifier - Reads an operand from the opcode field of an
1099 * instruction. Handles AddRegFrm instructions.
1100 *
1101 * @param insn - The instruction whose opcode field is to be read.
1102 * @param inModRM - Indicates that the opcode field is to be read from the
1103 * ModR/M extension; useful for escape opcodes
1104 */
1105 static void readOpcodeModifier(struct InternalInstruction* insn) {
1106 dprintf(insn, "readOpcodeModifier()");
1107
1108 if (insn->consumedOpcodeModifier)
1109 return;
1110
1111 insn->consumedOpcodeModifier = TRUE;
1112
1113 switch(insn->spec->modifierType) {
1114 default:
1115 unreachable("Unknown modifier type.");
1116 case MODIFIER_NONE:
1117 unreachable("No modifier but an operand expects one.");
1118 case MODIFIER_OPCODE:
1119 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
1120 break;
1121 case MODIFIER_MODRM:
1122 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
1123 break;
1124 }
1125 }
1126
1127 /*
1128 * readOpcodeRegister - Reads an operand from the opcode field of an
1129 * instruction and interprets it appropriately given the operand width.
1130 * Handles AddRegFrm instructions.
1131 *
1132 * @param insn - See readOpcodeModifier().
1133 * @param size - The width (in bytes) of the register being specified.
1134 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1135 * RAX.
1136 */
1137 static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1138 dprintf(insn, "readOpcodeRegister()");
1139
1140 readOpcodeModifier(insn);
1141
1142 if (size == 0)
1143 size = insn->registerSize;
1144
1145 switch (size) {
1146 case 1:
1147 insn->opcodeRegister = (Reg)(REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1148 | insn->opcodeModifier));
1149 if(insn->rexPrefix &&
1150 insn->opcodeRegister >= REG_AL + 0x4 &&
1151 insn->opcodeRegister < REG_AL + 0x8) {
1152 insn->opcodeRegister = (Reg)(REG_SPL + (insn->opcodeRegister - REG_AL - 4));
1153 }
1154
1155 break;
1156 case 2:
1157 insn->opcodeRegister = (Reg)(REG_AX + ((bFromREX(insn->rexPrefix) << 3)
1158 | insn->opcodeModifier));
1159 break;
1160 case 4:
1161 insn->opcodeRegister = (Reg)(REG_EAX + ((bFromREX(insn->rexPrefix) << 3)
1162 | insn->opcodeModifier));
1163 break;
1164 case 8:
1165 insn->opcodeRegister = (Reg)(REG_RAX + ((bFromREX(insn->rexPrefix) << 3)
1166 |insn->opcodeModifier));
1167 break;
1168 }
1169 }
1170
1171 /*
1172 * readImmediate - Consumes an immediate operand from an instruction, given the
1173 * desired operand size.
1174 *
1175 * @param insn - The instruction whose operand is to be read.
1176 * @param size - The width (in bytes) of the operand.
1177 * @return - 0 if the immediate was successfully consumed; nonzero
1178 * otherwise.
1179 */
1180 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1181 uint8_t imm8;
1182 uint16_t imm16;
1183 uint32_t imm32;
1184 uint64_t imm64;
1185
1186 dprintf(insn, "readImmediate()");
1187
1188 if (insn->numImmediatesConsumed == 2)
1189 unreachable("Already consumed two immediates");
1190
1191 if (size == 0)
1192 size = insn->immediateSize;
1193 else
1194 insn->immediateSize = size;
1195
1196 switch (size) {
1197 case 1:
1198 if (consumeByte(insn, &imm8))
1199 return -1;
1200 insn->immediates[insn->numImmediatesConsumed] = imm8;
1201 break;
1202 case 2:
1203 if (consumeUInt16(insn, &imm16))
1204 return -1;
1205 insn->immediates[insn->numImmediatesConsumed] = imm16;
1206 break;
1207 case 4:
1208 if (consumeUInt32(insn, &imm32))
1209 return -1;
1210 insn->immediates[insn->numImmediatesConsumed] = imm32;
1211 break;
1212 case 8:
1213 if (consumeUInt64(insn, &imm64))
1214 return -1;
1215 insn->immediates[insn->numImmediatesConsumed] = imm64;
1216 break;
1217 }
1218
1219 insn->numImmediatesConsumed++;
1220
1221 return 0;
1222 }
1223
1224 /*
1225 * readOperands - Consults the specifier for an instruction and consumes all
1226 * operands for that instruction, interpreting them as it goes.
1227 *
1228 * @param insn - The instruction whose operands are to be read and interpreted.
1229 * @return - 0 if all operands could be read; nonzero otherwise.
1230 */
1231 static int readOperands(struct InternalInstruction* insn) {
1232 int index;
1233
1234 dprintf(insn, "readOperands()");
1235
1236 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1237 switch (insn->spec->operands[index].encoding) {
1238 case ENCODING_NONE:
1239 break;
1240 case ENCODING_REG:
1241 case ENCODING_RM:
1242 if (readModRM(insn))
1243 return -1;
1244 if (fixupReg(insn, &insn->spec->operands[index]))
1245 return -1;
1246 break;
1247 case ENCODING_CB:
1248 case ENCODING_CW:
1249 case ENCODING_CD:
1250 case ENCODING_CP:
1251 case ENCODING_CO:
1252 case ENCODING_CT:
1253 dprintf(insn, "We currently don't hande code-offset encodings");
1254 return -1;
1255 case ENCODING_IB:
1256 if (readImmediate(insn, 1))
1257 return -1;
1258 break;
1259 case ENCODING_IW:
1260 if (readImmediate(insn, 2))
1261 return -1;
1262 break;
1263 case ENCODING_ID:
1264 if (readImmediate(insn, 4))
1265 return -1;
1266 break;
1267 case ENCODING_IO:
1268 if (readImmediate(insn, 8))
1269 return -1;
1270 break;
1271 case ENCODING_Iv:
1272 readImmediate(insn, insn->immediateSize);
1273 break;
1274 case ENCODING_Ia:
1275 readImmediate(insn, insn->addressSize);
1276 break;
1277 case ENCODING_RB:
1278 readOpcodeRegister(insn, 1);
1279 break;
1280 case ENCODING_RW:
1281 readOpcodeRegister(insn, 2);
1282 break;
1283 case ENCODING_RD:
1284 readOpcodeRegister(insn, 4);
1285 break;
1286 case ENCODING_RO:
1287 readOpcodeRegister(insn, 8);
1288 break;
1289 case ENCODING_Rv:
1290 readOpcodeRegister(insn, 0);
1291 break;
1292 case ENCODING_I:
1293 readOpcodeModifier(insn);
1294 break;
1295 case ENCODING_DUP:
1296 break;
1297 default:
1298 dprintf(insn, "Encountered an operand with an unknown encoding.");
1299 return -1;
1300 }
1301 }
1302
1303 return 0;
1304 }
1305
1306 /*
1307 * decodeInstruction - Reads and interprets a full instruction provided by the
1308 * user.
1309 *
1310 * @param insn - A pointer to the instruction to be populated. Must be
1311 * pre-allocated.
1312 * @param reader - The function to be used to read the instruction's bytes.
1313 * @param readerArg - A generic argument to be passed to the reader to store
1314 * any internal state.
1315 * @param logger - If non-NULL, the function to be used to write log messages
1316 * and warnings.
1317 * @param loggerArg - A generic argument to be passed to the logger to store
1318 * any internal state.
1319 * @param startLoc - The address (in the reader's address space) of the first
1320 * byte in the instruction.
1321 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1322 * decode the instruction in.
1323 * @return - 0 if the instruction's memory could be read; nonzero if
1324 * not.
1325 */
1326 int decodeInstruction(struct InternalInstruction* insn,
1327 byteReader_t reader,
1328 void* readerArg,
1329 dlog_t logger,
1330 void* loggerArg,
1331 uint64_t startLoc,
1332 DisassemblerMode mode) {
1333 bzero(insn, sizeof(struct InternalInstruction));
1334
1335 insn->reader = reader;
1336 insn->readerArg = readerArg;
1337 insn->dlog = logger;
1338 insn->dlogArg = loggerArg;
1339 insn->startLocation = startLoc;
1340 insn->readerCursor = startLoc;
1341 insn->mode = mode;
1342 insn->numImmediatesConsumed = 0;
1343
1344 if (readPrefixes(insn) ||
1345 readOpcode(insn) ||
1346 getID(insn) ||
1347 insn->instructionID == 0 ||
1348 readOperands(insn))
1349 return -1;
1350
1351 insn->length = insn->readerCursor - insn->startLocation;
1352
1353 dprintf(insn, "Read from 0x%llx to 0x%llx: length %llu",
1354 startLoc, insn->readerCursor, insn->length);
1355
1356 if (insn->length > 15)
1357 dprintf(insn, "Instruction exceeds 15-byte limit");
1358
1359 return 0;
1360 }
0 /*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
1 *
2 * The LLVM Compiler Infrastructure
3 *
4 * This file is distributed under the University of Illinois Open Source
5 * License. See LICENSE.TXT for details.
6 *
7 *===----------------------------------------------------------------------===*
8 *
9 * This file is part of the X86 Disassembler.
10 * It contains the public interface of the instruction decoder.
11 * Documentation for the disassembler can be found in X86Disassembler.h.
12 *
13 *===----------------------------------------------------------------------===*/
14
15 #ifndef X86DISASSEMBLERDECODER_H
16 #define X86DISASSEMBLERDECODER_H
17
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21
22 #define INSTRUCTION_SPECIFIER_FIELDS \
23 const char* name;
24
25 #define INSTRUCTION_IDS \
26 InstrUID* instructionIDs;
27
28 #include "X86DisassemblerDecoderCommon.h"
29
30 #undef INSTRUCTION_SPECIFIER_FIELDS
31 #undef INSTRUCTION_IDS
32
33 /*
34 * Accessor functions for various fields of an Intel instruction
35 */
36 static inline uint8_t modFromModRM(uint8_t modRM){ return (modRM & 0xc0) >> 6; }
37 static inline uint8_t regFromModRM(uint8_t modRM){ return (modRM & 0x38) >> 3; }
38 static inline uint8_t rmFromModRM(uint8_t modRM) { return (modRM & 0x7); }
39 static inline uint8_t scaleFromSIB(uint8_t sib) { return (sib & 0xc0) >> 6; }
40 static inline uint8_t indexFromSIB(uint8_t sib) { return (sib & 0x38) >> 3; }
41 static inline uint8_t baseFromSIB(uint8_t sib) { return (sib & 0x7); }
42 static inline uint8_t wFromREX(uint8_t rex) { return (rex & 0x8) >> 3; }
43 static inline uint8_t rFromREX(uint8_t rex) { return (rex & 0x4) >> 2; }
44 static inline uint8_t xFromREX(uint8_t rex) { return (rex & 0x2) >> 1; }
45 static inline uint8_t bFromREX(uint8_t rex) { return (rex & 0x1); }
46
47 /*
48 * These enums represent Intel registers for use by the decoder.
49 */
50
51 #define REGS_8BIT \
52 ENTRY(AL) \
53 ENTRY(CL) \
54 ENTRY(DL) \
55 ENTRY(BL) \
56 ENTRY(AH) \
57 ENTRY(CH) \
58 ENTRY(DH) \
59 ENTRY(BH) \
60 ENTRY(R8B) \
61 ENTRY(R9B) \
62 ENTRY(R10B) \
63 ENTRY(R11B) \
64 ENTRY(R12B) \
65 ENTRY(R13B) \
66 ENTRY(R14B) \
67 ENTRY(R15B) \
68 ENTRY(SPL) \
69 ENTRY(BPL) \
70 ENTRY(SIL) \
71 ENTRY(DIL)
72
73 #define EA_BASES_16BIT \
74 ENTRY(BX_SI) \
75 ENTRY(BX_DI) \
76 ENTRY(BP_SI) \
77 ENTRY(BP_DI) \
78 ENTRY(SI) \
79 ENTRY(DI) \
80 ENTRY(BP) \
81 ENTRY(BX) \
82 ENTRY(R8W) \
83 ENTRY(R9W) \
84 ENTRY(R10W) \
85 ENTRY(R11W) \
86 ENTRY(R12W) \
87 ENTRY(R13W) \
88 ENTRY(R14W) \
89 ENTRY(R15W)
90
91 #define REGS_16BIT \
92 ENTRY(AX) \
93 ENTRY(CX) \
94 ENTRY(DX) \
95 ENTRY(BX) \
96 ENTRY(SP) \
97 ENTRY(BP) \
98 ENTRY(SI) \
99 ENTRY(DI) \
100 ENTRY(R8W) \
101 ENTRY(R9W) \
102 ENTRY(R10W) \
103 ENTRY(R11W) \
104 ENTRY(R12W) \
105 ENTRY(R13W) \
106 ENTRY(R14W) \
107 ENTRY(R15W)
108
109 #define EA_BASES_32BIT \
110 ENTRY(EAX) \
111 ENTRY(ECX) \
112 ENTRY(EDX) \
113 ENTRY(EBX) \
114 ENTRY(sib) \
115 ENTRY(EBP) \
116 ENTRY(ESI) \
117 ENTRY(EDI) \
118 ENTRY(R8D) \
119 ENTRY(R9D) \
120 ENTRY(R10D) \
121 ENTRY(R11D) \
122 ENTRY(R12D) \
123 ENTRY(R13D) \
124 ENTRY(R14D) \
125 ENTRY(R15D)
126
127 #define REGS_32BIT \
128 ENTRY(EAX) \
129 ENTRY(ECX) \
130 ENTRY(EDX) \
131 ENTRY(EBX) \
132 ENTRY(ESP) \
133 ENTRY(EBP) \
134 ENTRY(ESI) \
135 ENTRY(EDI) \
136 ENTRY(R8D) \
137 ENTRY(R9D) \
138 ENTRY(R10D) \
139 ENTRY(R11D) \
140 ENTRY(R12D) \
141 ENTRY(R13D) \
142 ENTRY(R14D) \
143 ENTRY(R15D)
144
145 #define EA_BASES_64BIT \
146 ENTRY(RAX) \
147 ENTRY(RCX) \
148 ENTRY(RDX) \
149 ENTRY(RBX) \
150 ENTRY(sib64) \
151 ENTRY(RBP) \
152 ENTRY(RSI) \
153 ENTRY(RDI) \
154 ENTRY(R8) \
155 ENTRY(R9) \
156 ENTRY(R10) \
157 ENTRY(R11) \
158 ENTRY(R12) \
159 ENTRY(R13) \
160 ENTRY(R14) \
161 ENTRY(R15)
162
163 #define REGS_64BIT \
164 ENTRY(RAX) \
165 ENTRY(RCX) \
166 ENTRY(RDX) \
167 ENTRY(RBX) \
168 ENTRY(RSP) \
169 ENTRY(RBP) \
170 ENTRY(RSI) \
171 ENTRY(RDI) \
172 ENTRY(R8) \
173 ENTRY(R9) \
174 ENTRY(R10) \
175 ENTRY(R11) \
176 ENTRY(R12) \
177 ENTRY(R13) \
178 ENTRY(R14) \
179 ENTRY(R15)
180
181 #define REGS_MMX \
182 ENTRY(MM0) \
183 ENTRY(MM1) \
184 ENTRY(MM2) \
185 ENTRY(MM3) \
186 ENTRY(MM4) \
187 ENTRY(MM5) \
188 ENTRY(MM6) \
189 ENTRY(MM7)
190
191 #define REGS_XMM \
192 ENTRY(XMM0) \
193 ENTRY(XMM1) \
194 ENTRY(XMM2) \
195 ENTRY(XMM3) \
196 ENTRY(XMM4) \
197 ENTRY(XMM5) \
198 ENTRY(XMM6) \
199 ENTRY(XMM7) \
200 ENTRY(XMM8) \
201 ENTRY(XMM9) \
202 ENTRY(XMM10) \
203 ENTRY(XMM11) \
204 ENTRY(XMM12) \
205 ENTRY(XMM13) \
206 ENTRY(XMM14) \
207 ENTRY(XMM15)
208
209 #define REGS_SEGMENT \
210 ENTRY(ES) \
211 ENTRY(CS) \
212 ENTRY(SS) \
213 ENTRY(DS) \
214 ENTRY(FS) \
215 ENTRY(GS)
216
217 #define REGS_DEBUG \
218 ENTRY(DR0) \
219 ENTRY(DR1) \
220 ENTRY(DR2) \
221 ENTRY(DR3) \
222 ENTRY(DR4) \
223 ENTRY(DR5) \
224 ENTRY(DR6) \
225 ENTRY(DR7)
226
227 #define REGS_CONTROL_32BIT \
228 ENTRY(ECR0) \
229 ENTRY(ECR1) \
230 ENTRY(ECR2) \
231 ENTRY(ECR3) \
232 ENTRY(ECR4) \
233 ENTRY(ECR5) \
234 ENTRY(ECR6) \
235 ENTRY(ECR7)
236
237 #define REGS_CONTROL_64BIT \
238 ENTRY(RCR0) \
239 ENTRY(RCR1) \
240 ENTRY(RCR2) \
241 ENTRY(RCR3) \
242 ENTRY(RCR4) \
243 ENTRY(RCR5) \
244 ENTRY(RCR6) \
245 ENTRY(RCR7) \
246 ENTRY(RCR8)
247
248 #define ALL_EA_BASES \
249 EA_BASES_16BIT \
250 EA_BASES_32BIT \
251 EA_BASES_64BIT
252
253 #define ALL_SIB_BASES \
254 REGS_32BIT \
255 REGS_64BIT
256
257 #define ALL_REGS \
258 REGS_8BIT \
259 REGS_16BIT \
260 REGS_32BIT \
261 REGS_64BIT \
262 REGS_MMX \
263 REGS_XMM \
264 REGS_SEGMENT \
265 REGS_DEBUG \
266 REGS_CONTROL_32BIT \
267 REGS_CONTROL_64BIT \
268 ENTRY(RIP)
269
270 /*
271 * EABase - All possible values of the base field for effective-address
272 * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
273 * distinguish between bases (EA_BASE_*) and registers that just happen to be
274 * referred to when Mod == 0b11 (EA_REG_*).
275 */
276 typedef enum {
277 EA_BASE_NONE,
278 #define ENTRY(x) EA_BASE_##x,
279 ALL_EA_BASES
280 #undef ENTRY
281 #define ENTRY(x) EA_REG_##x,
282 ALL_REGS
283 #undef ENTRY
284 EA_max
285 } EABase;
286
287 /*
288 * SIBIndex - All possible values of the SIB index field.
289 * Borrows entries from ALL_EA_BASES with the special case that
290 * sib is synonymous with NONE.
291 */
292 typedef enum {
293 SIB_INDEX_NONE,
294 #define ENTRY(x) SIB_INDEX_##x,
295 ALL_EA_BASES
296 #undef ENTRY
297 SIB_INDEX_max
298 } SIBIndex;
299
300 /*
301 * SIBBase - All possible values of the SIB base field.
302 */
303 typedef enum {
304 SIB_BASE_NONE,
305 #define ENTRY(x) SIB_BASE_##x,
306 ALL_SIB_BASES
307 #undef ENTRY
308 SIB_BASE_max
309 } SIBBase;
310
311 /*
312 * EADisplacement - Possible displacement types for effective-address
313 * computations.
314 */
315 typedef enum {
316 EA_DISP_NONE,
317 EA_DISP_8,
318 EA_DISP_16,
319 EA_DISP_32
320 } EADisplacement;
321
322 /*
323 * Reg - All possible values of the reg field in the ModR/M byte.
324 */
325 typedef enum {
326 #define ENTRY(x) REG_##x,
327 ALL_REGS
328 #undef ENTRY
329 REG_max
330 } Reg;
331
332 /*
333 * SegmentOverride - All possible segment overrides.
334 */
335 typedef enum {
336 SEG_OVERRIDE_NONE,
337 SEG_OVERRIDE_CS,
338 SEG_OVERRIDE_SS,
339 SEG_OVERRIDE_DS,
340 SEG_OVERRIDE_ES,
341 SEG_OVERRIDE_FS,
342 SEG_OVERRIDE_GS,
343 SEG_OVERRIDE_max
344 } SegmentOverride;
345
346 typedef uint8_t BOOL;
347
348 /*
349 * byteReader_t - Type for the byte reader that the consumer must provide to
350 * the decoder. Reads a single byte from the instruction's address space.
351 * @param arg - A baton that the consumer can associate with any internal
352 * state that it needs.
353 * @param byte - A pointer to a single byte in memory that should be set to
354 * contain the value at address.
355 * @param address - The address in the instruction's address space that should
356 * be read from.
357 * @return - -1 if the byte cannot be read for any reason; 0 otherwise.
358 */
359 typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
360
361 /*
362 * dlog_t - Type for the logging function that the consumer can provide to
363 * get debugging output from the decoder.
364 * @param arg - A baton that the consumer can associate with any internal
365 * state that it needs.
366 * @param log - A string that contains the message. Will be reused after
367 * the logger returns.
368 */
369 typedef void (*dlog_t)(void* arg, const char *log);
370
371 /*
372 * The x86 internal instruction, which is produced by the decoder.
373 */
374 struct InternalInstruction {
375 /* Reader interface (C) */
376 byteReader_t reader;
377 /* Opaque value passed to the reader */
378 void* readerArg;
379 /* The address of the next byte to read via the reader */
380 uint64_t readerCursor;
381
382 /* Logger interface (C) */
383 dlog_t dlog;
384 /* Opaque value passed to the logger */
385 void* dlogArg;
386
387 /* General instruction information */
388
389 /* The mode to disassemble for (64-bit, protected, real) */
390 DisassemblerMode mode;
391 /* The start of the instruction, usable with the reader */
392 uint64_t startLocation;
393 /* The length of the instruction, in bytes */
394 size_t length;
395
396 /* Prefix state */
397
398 /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
399 uint8_t prefixPresent[0x100];
400 /* contains the location (for use with the reader) of the prefix byte */
401 uint64_t prefixLocations[0x100];
402 /* The value of the REX prefix, if present */
403 uint8_t rexPrefix;
404 /* The location of the REX prefix */
405 uint64_t rexLocation;
406 /* The location where a mandatory prefix would have to be (i.e., right before
407 the opcode, or right before the REX prefix if one is present) */
408 uint64_t necessaryPrefixLocation;
409 /* The segment override type */
410 SegmentOverride segmentOverride;
411
412 /* Sizes of various critical pieces of data */
413 uint8_t registerSize;
414 uint8_t addressSize;
415 uint8_t displacementSize;
416 uint8_t immediateSize;
417
418 /* opcode state */
419
420 /* The value of the two-byte escape prefix (usually 0x0f) */
421 uint8_t twoByteEscape;
422 /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
423 uint8_t threeByteEscape;
424 /* The last byte of the opcode, not counting any ModR/M extension */
425 uint8_t opcode;
426 /* The ModR/M byte of the instruction, if it is an opcode extension */
427 uint8_t modRMExtension;
428
429 /* decode state */
430
431 /* The type of opcode, used for indexing into the array of decode tables */
432 OpcodeType opcodeType;
433 /* The instruction ID, extracted from the decode table */
434 uint16_t instructionID;
435 /* The specifier for the instruction, from the instruction info table */
436 struct InstructionSpecifier* spec;
437
438 /* state for additional bytes, consumed during operand decode. Pattern:
439 consumed___ indicates that the byte was already consumed and does not
440 need to be consumed again */
441
442 /* The ModR/M byte, which contains most register operands and some portion of
443 all memory operands */
444 BOOL consumedModRM;
445 uint8_t modRM;
446
447 /* The SIB byte, used for more complex 32- or 64-bit memory operands */
448 BOOL consumedSIB;
449 uint8_t sib;
450
451 /* The displacement, used for memory operands */
452 BOOL consumedDisplacement;
453 int32_t displacement;
454
455 /* Immediates. There can be two in some cases */
456 uint8_t numImmediatesConsumed;
457 uint8_t numImmediatesTranslated;
458 uint64_t immediates[2];
459
460 /* A register or immediate operand encoded into the opcode */
461 BOOL consumedOpcodeModifier;
462 uint8_t opcodeModifier;
463 Reg opcodeRegister;
464
465 /* Portions of the ModR/M byte */
466
467 /* These fields determine the allowable values for the ModR/M fields, which
468 depend on operand and address widths */
469 EABase eaBaseBase;
470 EABase eaRegBase;
471 Reg regBase;
472
473 /* The Mod and R/M fields can encode a base for an effective address, or a
474 register. These are separated into two fields here */
475 EABase eaBase;
476 EADisplacement eaDisplacement;
477 /* The reg field always encodes a register */
478 Reg reg;
479
480 /* SIB state */
481 SIBIndex sibIndex;
482 uint8_t sibScale;
483 SIBBase sibBase;
484 };
485
486 /* decodeInstruction - Decode one instruction and store the decoding results in
487 * a buffer provided by the consumer.
488 * @param insn - The buffer to store the instruction in. Allocated by the
489 * consumer.
490 * @param reader - The byteReader_t for the bytes to be read.
491 * @param readerArg - An argument to pass to the reader for storing context
492 * specific to the consumer. May be NULL.
493 * @param logger - The dlog_t to be used in printing status messages from the
494 * disassembler. May be NULL.
495 * @param loggerArg - An argument to pass to the logger for storing context
496 * specific to the logger. May be NULL.
497 * @param startLoc - The address (in the reader's address space) of the first
498 * byte in the instruction.
499 * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in.
500 * @return - Nonzero if there was an error during decode, 0 otherwise.
501 */
502 int decodeInstruction(struct InternalInstruction* insn,
503 byteReader_t reader,
504 void* readerArg,
505 dlog_t logger,
506 void* loggerArg,
507 uint64_t startLoc,
508 DisassemblerMode mode);
509
510 #ifdef __cplusplus
511 }
512 #endif
513
514 #endif
0 /*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
1 *
2 * The LLVM Compiler Infrastructure
3 *
4 * This file is distributed under the University of Illinois Open Source
5 * License. See LICENSE.TXT for details.
6 *
7 *===----------------------------------------------------------------------===*
8 *
9 * This file is part of the X86 Disassembler.
10 * It contains common definitions used by both the disassembler and the table
11 * generator.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
16 /*
17 * This header file provides those definitions that need to be shared between
18 * the decoder and the table generator in a C-friendly manner.
19 */
20
21 #ifndef X86DISASSEMBLERDECODERCOMMON_H
22 #define X86DISASSEMBLERDECODERCOMMON_H
23
24 #include "llvm/System/DataTypes.h"
25
26 #define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
27 #define CONTEXTS_SYM x86DisassemblerContexts
28 #define ONEBYTE_SYM x86DisassemblerOneByteOpcodes
29 #define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes
30 #define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes
31 #define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
32
33 #define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
34 #define CONTEXTS_STR "x86DisassemblerContexts"
35 #define ONEBYTE_STR "x86DisassemblerOneByteOpcodes"
36 #define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"
37 #define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"
38 #define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
39
40 /*
41 * Attributes of an instruction that must be known before the opcode can be
42 * processed correctly. Most of these indicate the presence of particular
43 * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
44 */
45 #define ATTRIBUTE_BITS \
46 ENUM_ENTRY(ATTR_NONE, 0x00) \
47 ENUM_ENTRY(ATTR_64BIT, 0x01) \
48 ENUM_ENTRY(ATTR_XS, 0x02) \
49 ENUM_ENTRY(ATTR_XD, 0x04) \
50 ENUM_ENTRY(ATTR_REXW, 0x08) \
51 ENUM_ENTRY(ATTR_OPSIZE, 0x10)
52
53 #define ENUM_ENTRY(n, v) n = v,
54 enum attributeBits {
55 ATTRIBUTE_BITS
56 ATTR_max
57 };
58 #undef ENUM_ENTRY
59
60 /*
61 * Combinations of the above attributes that are relevant to instruction
62 * decode. Although other combinations are possible, they can be reduced to
63 * these without affecting the ultimately decoded instruction.
64 */
65
66 /* Class name Rank Rationale for rank assignment */
67 #define INSTRUCTION_CONTEXTS \
68 ENUM_ENTRY(IC, 0, "says nothing about the instruction") \
69 ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \
70 "64-bit mode but no more") \
71 ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \
72 "operands change width") \
73 ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \
74 "but not the operands") \
75 ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
76 "but not the operands") \
77 ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
78 "change width; overrides IC_OPSIZE") \
79 ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
80 ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
81 "secondary") \
82 ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
83 ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \
84 "opcode") \
85 ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \
86 "IC_64BIT_REXW_XS") \
87 ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
88 "else because this changes most " \
89 "operands' meaning")
90
91 #define ENUM_ENTRY(n, r, d) n,
92 typedef enum {
93 INSTRUCTION_CONTEXTS
94 IC_max
95 } InstructionContext;
96 #undef ENUM_ENTRY
97
98 /*
99 * Opcode types, which determine which decode table to use, both in the Intel
100 * manual and also for the decoder.
101 */
102 typedef enum {
103 ONEBYTE = 0,
104 TWOBYTE = 1,
105 THREEBYTE_38 = 2,
106 THREEBYTE_3A = 3
107 } OpcodeType;
108
109 /*
110 * The following structs are used for the hierarchical decode table. After
111 * determining the instruction's class (i.e., which IC_* constant applies to
112 * it), the decoder reads the opcode. Some instructions require specific
113 * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
114 *
115 * If a ModR/M byte is not required, "required" is left unset, and the values
116 * for each instructionID are identical.
117 */
118
119 typedef uint16_t InstrUID;
120
121 /*
122 * ModRMDecisionType - describes the type of ModR/M decision, allowing the
123 * consumer to determine the number of entries in it.
124 *
125 * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
126 * instruction is the same.
127 * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
128 * corresponds to one instruction; otherwise, it corresponds to
129 * a different instruction.
130 * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
131 * to a different instruction.
132 */
133
134 #define MODRMTYPES \
135 ENUM_ENTRY(MODRM_ONEENTRY) \
136 ENUM_ENTRY(MODRM_SPLITRM) \
137 ENUM_ENTRY(MODRM_FULL)
138
139 #define ENUM_ENTRY(n) n,
140 typedef enum {
141 MODRMTYPES
142 MODRM_max
143 } ModRMDecisionType;
144 #undef ENUM_ENTRY
145
146 /*
147 * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
148 * instruction each possible value of the ModR/M byte corresponds to. Once
149 * this information is known, we have narrowed down to a single instruction.
150 */
151 struct ModRMDecision {
152 uint8_t modrm_type;
153
154 /* The macro below must be defined wherever this file is included. */
155 INSTRUCTION_IDS
156 };
157
158 /*
159 * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
160 * given a particular opcode.
161 */
162 struct OpcodeDecision {
163 struct ModRMDecision modRMDecisions[256];
164 };
165
166 /*
167 * ContextDecision - Specifies which opcode->instruction tables to look at given
168 * a particular context (set of attributes). Since there are many possible
169 * contexts, the decoder first uses CONTEXTS_SYM to determine which context
170 * applies given a specific set of attributes. Hence there are only IC_max
171 * entries in this table, rather than 2^(ATTR_max).
172 */
173 struct ContextDecision {
174 struct OpcodeDecision opcodeDecisions[IC_max];
175 };
176
177 /*
178 * Physical encodings of instruction operands.
179 */
180
181 #define ENCODINGS \
182 ENUM_ENTRY(ENCODING_NONE, "") \
183 ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
184 ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
185 ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
186 ENUM_ENTRY(ENCODING_CW, "2-byte") \
187 ENUM_ENTRY(ENCODING_CD, "4-byte") \
188 ENUM_ENTRY(ENCODING_CP, "6-byte") \
189 ENUM_ENTRY(ENCODING_CO, "8-byte") \
190 ENUM_ENTRY(ENCODING_CT, "10-byte") \
191 ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \
192 ENUM_ENTRY(ENCODING_IW, "2-byte") \
193 ENUM_ENTRY(ENCODING_ID, "4-byte") \
194 ENUM_ENTRY(ENCODING_IO, "8-byte") \
195 ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \
196 "the opcode byte") \
197 ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \
198 ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \
199 ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \
200 ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \
201 "opcode byte") \
202 \
203 ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \
204 ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \
205 ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \
206 "opcode byte") \
207 ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
208 "in type")
209
210 #define ENUM_ENTRY(n, d) n,
211 typedef enum {
212 ENCODINGS
213 ENCODING_max
214 } OperandEncoding;
215 #undef ENUM_ENTRY
216
217 /*
218 * Semantic interpretations of instruction operands.
219 */
220
221 #define TYPES \
222 ENUM_ENTRY(TYPE_NONE, "") \
223 ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
224 ENUM_ENTRY(TYPE_REL16, "2-byte") \
225 ENUM_ENTRY(TYPE_REL32, "4-byte") \
226 ENUM_ENTRY(TYPE_REL64, "8-byte") \
227 ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \
228 ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \
229 ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \
230 ENUM_ENTRY(TYPE_R8, "1-byte register operand") \
231 ENUM_ENTRY(TYPE_R16, "2-byte") \
232 ENUM_ENTRY(TYPE_R32, "4-byte") \
233 ENUM_ENTRY(TYPE_R64, "8-byte") \
234 ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \
235 ENUM_ENTRY(TYPE_IMM16, "2-byte") \
236 ENUM_ENTRY(TYPE_IMM32, "4-byte") \
237 ENUM_ENTRY(TYPE_IMM64, "8-byte") \
238 ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
239 ENUM_ENTRY(TYPE_RM16, "2-byte") \
240 ENUM_ENTRY(TYPE_RM32, "4-byte") \
241 ENUM_ENTRY(TYPE_RM64, "8-byte") \
242 ENUM_ENTRY(TYPE_M, "Memory operand") \
243 ENUM_ENTRY(TYPE_M8, "1-byte") \
244 ENUM_ENTRY(TYPE_M16, "2-byte") \
245 ENUM_ENTRY(TYPE_M32, "4-byte") \
246 ENUM_ENTRY(TYPE_M64, "8-byte") \
247 ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
248 ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
249 ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
250 ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
251 ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \
252 ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \
253 ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \
254 ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \
255 ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \
256 "base)") \
257 ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \
258 ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \
259 ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \
260 ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \
261 "2 = SS, 3 = DS, 4 = FS, 5 = GS") \
262 ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
263 ENUM_ENTRY(TYPE_M64FP, "64-bit") \
264 ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
265 ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \
266 "floating-point instructions") \
267 ENUM_ENTRY(TYPE_M32INT, "4-byte") \
268 ENUM_ENTRY(TYPE_M64INT, "8-byte") \
269 ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
270 ENUM_ENTRY(TYPE_MM, "MMX register operand") \
271 ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \
272 ENUM_ENTRY(TYPE_MM64, "8-byte") \
273 ENUM_ENTRY(TYPE_XMM, "XMM register operand") \
274 ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
275 ENUM_ENTRY(TYPE_XMM64, "8-byte") \
276 ENUM_ENTRY(TYPE_XMM128, "16-byte") \
277 ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
278 ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
279 ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
280 ENUM_ENTRY(TYPE_CR32, "4-byte control register operand") \
281 ENUM_ENTRY(TYPE_CR64, "8-byte") \
282 \
283 ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
284 ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
285 ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \
286 ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \
287 ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \
288 ENUM_ENTRY(TYPE_DUP1, "operand 1") \
289 ENUM_ENTRY(TYPE_DUP2, "operand 2") \
290 ENUM_ENTRY(TYPE_DUP3, "operand 3") \
291 ENUM_ENTRY(TYPE_DUP4, "operand 4") \
292 ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
293
294 #define ENUM_ENTRY(n, d) n,
295 typedef enum {
296 TYPES
297 TYPE_max
298 } OperandType;
299 #undef ENUM_ENTRY
300
301 /*
302 * OperandSpecifier - The specification for how to extract and interpret one
303 * operand.
304 */
305 struct OperandSpecifier {
306 OperandEncoding encoding;
307 OperandType type;
308 };
309
310 /*
311 * Indicates where the opcode modifier (if any) is to be found. Extended
312 * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
313 */
314
315 #define MODIFIER_TYPES \
316 ENUM_ENTRY(MODIFIER_NONE) \
317 ENUM_ENTRY(MODIFIER_OPCODE) \
318 ENUM_ENTRY(MODIFIER_MODRM)
319
320 #define ENUM_ENTRY(n) n,
321 typedef enum {
322 MODIFIER_TYPES
323 MODIFIER_max
324 } ModifierType;
325 #undef ENUM_ENTRY
326
327 #define X86_MAX_OPERANDS 5
328
329 /*
330 * The specification for how to extract and interpret a full instruction and
331 * its operands.
332 */
333 struct InstructionSpecifier {
334 ModifierType modifierType;
335 uint8_t modifierBase;
336 struct OperandSpecifier operands[X86_MAX_OPERANDS];
337
338 /* The macro below must be defined wherever this file is included. */
339 INSTRUCTION_SPECIFIER_FIELDS
340 };
341
342 /*
343 * Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
344 * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
345 * respectively.
346 */
347 typedef enum {
348 MODE_16BIT,
349 MODE_32BIT,
350 MODE_64BIT
351 } DisassemblerMode;
352
353 #endif
1414 X86GenRegisterInfo.inc X86GenInstrNames.inc \
1515 X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \
1616 X86GenAsmWriter1.inc X86GenDAGISel.inc \
17 X86GenFastISel.inc \
18 X86GenCallingConv.inc X86GenSubtarget.inc
17 X86GenDisassemblerTables.inc X86GenFastISel.inc \
18 X86GenCallingConv.inc X86GenSubtarget.inc \
1919
2020 DIRS = AsmPrinter AsmParser Disassembler TargetInfo
2121
3737 }
3838 }
3939
40 extern "C" void LLVMInitializeX86Disassembler();
41
4042 extern "C" void LLVMInitializeX86Target() {
4143 // Register the target.
4244 RegisterTargetMachine X(TheX86_32Target);
4547 // Register the target asm info.
4648 RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);
4749 RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
50
51 LLVMInitializeX86Disassembler();
4852
4953 // Register the code emitter.
5054 TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter);
2222 TGValueTypes.cpp
2323 TableGen.cpp
2424 TableGenBackend.cpp
25 X86DisassemblerTables.cpp
26 X86RecognizableInstr.cpp
2527 )
2628
2729 target_link_libraries(tblgen LLVMSupport LLVMSystem)
99 #include "DisassemblerEmitter.h"
1010 #include "CodeGenTarget.h"
1111 #include "Record.h"
12 #include "X86DisassemblerTables.h"
13 #include "X86RecognizableInstr.h"
1214 using namespace llvm;
15 using namespace llvm::X86Disassembler;
16
17 /// DisassemblerEmitter - Contains disassembler table emitters for various
18 /// architectures.
19
20 /// X86 Disassembler Emitter
21 ///
22 /// *** IF YOU'RE HERE TO RESOLVE A "Primary decode conflict", LOOK DOWN NEAR
23 /// THE END OF THIS COMMENT!
24 ///
25 /// The X86 disassembler emitter is part of the X86 Disassembler, which is
26 /// documented in lib/Target/X86/X86Disassembler.h.
27 ///
28 /// The emitter produces the tables that the disassembler uses to translate
29 /// instructions. The emitter generates the following tables:
30 ///
31 /// - One table (CONTEXTS_SYM) that contains a mapping of attribute masks to
32 /// instruction contexts. Although for each attribute there are cases where
33 /// that attribute determines decoding, in the majority of cases decoding is
34 /// the same whether or not an attribute is present. For example, a 64-bit
35 /// instruction with an OPSIZE prefix and an XS prefix decodes the same way in
36 /// all cases as a 64-bit instruction with only OPSIZE set. (The XS prefix
37 /// may have effects on its execution, but does not change the instruction
38 /// returned.) This allows considerable space savings in other tables.
39 /// - Four tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and
40 /// THREEBYTE3A_SYM) contain the hierarchy that the decoder traverses while
41 /// decoding an instruction. At the lowest level of this hierarchy are
42 /// instruction UIDs, 16-bit integers that can be used to uniquely identify
43 /// the instruction and correspond exactly to its position in the list of
44 /// CodeGenInstructions for the target.
45 /// - One table (INSTRUCTIONS_SYM) contains information about the operands of
46 /// each instruction and how to decode them.
47 ///
48 /// During table generation, there may be conflicts between instructions that
49 /// occupy the same space in the decode tables. These conflicts are resolved as
50 /// follows in setTableFields() (X86DisassemblerTables.cpp)
51 ///
52 /// - If the current context is the native context for one of the instructions
53 /// (that is, the attributes specified for it in the LLVM tables specify
54 /// precisely the current context), then it has priority.
55 /// - If the current context isn't native for either of the instructions, then
56 /// the higher-priority context wins (that is, the one that is more specific).
57 /// That hierarchy is determined by outranks() (X86DisassemblerTables.cpp)
58 /// - If the current context is native for both instructions, then the table
59 /// emitter reports a conflict and dies.
60 ///
61 /// *** RESOLUTION FOR "Primary decode conflict"S
62 ///
63 /// If two instructions collide, typically the solution is (in order of
64 /// likelihood):
65 ///
66 /// (1) to filter out one of the instructions by editing filter()
67 /// (X86RecognizableInstr.cpp). This is the most common resolution, but
68 /// check the Intel manuals first to make sure that (2) and (3) are not the
69 /// problem.
70 /// (2) to fix the tables (X86.td and its subsidiaries) so the opcodes are
71 /// accurate. Sometimes they are not.
72 /// (3) to fix the tables to reflect the actual context (for example, required
73 /// prefixes), and possibly to add a new context by editing
74 /// lib/Target/X86/X86DisassemblerDecoderCommon.h. This is unlikely to be
75 /// the cause.
76 ///
77 /// DisassemblerEmitter.cpp contains the implementation for the emitter,
78 /// which simply pulls out instructions from the CodeGenTarget and pushes them
79 /// into X86DisassemblerTables.
80 /// X86DisassemblerTables.h contains the interface for the instruction tables,
81 /// which manage and emit the structures discussed above.
82 /// X86DisassemblerTables.cpp contains the implementation for the instruction
83 /// tables.
84 /// X86ModRMFilters.h contains filters that can be used to determine which
85 /// ModR/M values are valid for a particular instruction. These are used to
86 /// populate ModRMDecisions.
87 /// X86RecognizableInstr.h contains the interface for a single instruction,
88 /// which knows how to translate itself from a CodeGenInstruction and provide
89 /// the information necessary for integration into the tables.
90 /// X86RecognizableInstr.cpp contains the implementation for a single
91 /// instruction.
1392
1493 void DisassemblerEmitter::run(raw_ostream &OS) {
1594 CodeGenTarget Target;
24103 << " *===---------------------------------------------------------------"
25104 << "-------===*/\n";
26105
106 // X86 uses a custom disassembler.
107 if (Target.getName() == "X86") {
108 DisassemblerTables Tables;
109
110 std::vector numberedInstructions;
111 Target.getInstructionsByEnumValue(numberedInstructions);
112
113 for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i)
114 RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i);
115
116 // FIXME: As long as we are using exceptions, might as well drop this to the
117 // actual conflict site.
118 if (Tables.hasConflicts())
119 throw TGError(Target.getTargetRecord()->getLoc(),
120 "Primary decode conflict");
121
122 Tables.emit(OS);
123 return;
124 }
125
27126 throw TGError(Target.getTargetRecord()->getLoc(),
28127 "Unable to generate disassembler for this target");
29128 }
0 //===- X86DisassemblerShared.h - Emitter shared header ----------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef X86DISASSEMBLERSHARED_H
10 #define X86DISASSEMBLERSHARED_H
11
12 #include
13
14 #define INSTRUCTION_SPECIFIER_FIELDS \
15 bool filtered; \
16 InstructionContext insnContext; \
17 std::string name; \
18 \
19 InstructionSpecifier() { \
20 filtered = false; \
21 insnContext = IC; \
22 name = ""; \
23 modifierType = MODIFIER_NONE; \
24 modifierBase = 0; \
25 bzero(operands, sizeof(operands)); \
26 }
27
28 #define INSTRUCTION_IDS \
29 InstrUID instructionIDs[256];
30
31 #include "../../lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h"
32
33 #undef INSTRUCTION_SPECIFIER_FIELDS
34 #undef INSTRUCTION_IDS
35
36 #endif
0 //===- X86DisassemblerTables.cpp - Disassembler tables ----------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is part of the X86 Disassembler Emitter.
10 // It contains the implementation of the disassembler tables.
11 // Documentation for the disassembler emitter in general can be found in
12 // X86DisasemblerEmitter.h.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "X86DisassemblerShared.h"
17 #include "X86DisassemblerTables.h"
18
19 #include "TableGenBackend.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/Format.h"
22
23 #include
24
25 using namespace llvm;
26 using namespace X86Disassembler;
27
28 /// inheritsFrom - Indicates whether all instructions in one class also belong
29 /// to another class.
30 ///
31 /// @param child - The class that may be the subset
32 /// @param parent - The class that may be the superset
33 /// @return - True if child is a subset of parent, false otherwise.
34 static inline bool inheritsFrom(InstructionContext child,
35 InstructionContext parent) {
36 if (child == parent)
37 return true;
38
39 switch (parent) {
40 case IC:
41 return true;
42 case IC_64BIT:
43 return(inheritsFrom(child, IC_64BIT_REXW) ||
44 inheritsFrom(child, IC_64BIT_OPSIZE) ||
45 inheritsFrom(child, IC_64BIT_XD) ||
46 inheritsFrom(child, IC_64BIT_XS));
47 case IC_OPSIZE:
48 return(inheritsFrom(child, IC_64BIT_OPSIZE));
49 case IC_XD:
50 return(inheritsFrom(child, IC_64BIT_XD));
51 case IC_XS:
52 return(inheritsFrom(child, IC_64BIT_XS));
53 case IC_64BIT_REXW:
54 return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
55 inheritsFrom(child, IC_64BIT_REXW_XD) ||
56 inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
57 case IC_64BIT_OPSIZE:
58 return(inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
59 case IC_64BIT_XD:
60 return(inheritsFrom(child, IC_64BIT_REXW_XD));
61 case IC_64BIT_XS:
62 return(inheritsFrom(child, IC_64BIT_REXW_XS));
63 case IC_64BIT_REXW_XD:
64 return false;
65 case IC_64BIT_REXW_XS:
66 return false;
67 case IC_64BIT_REXW_OPSIZE:
68 return false;
69 default:
70 return false;
71 }
72 }
73
74 /// outranks - Indicates whether, if an instruction has two different applicable
75 /// classes, which class should be preferred when performing decode. This
76 /// imposes a total ordering (ties are resolved toward "lower")
77 ///
78 /// @param upper - The class that may be preferable
79 /// @param lower - The class that may be less preferable
80 /// @return - True if upper is to be preferred, false otherwise.
81 static inline bool outranks(InstructionContext upper,
82 InstructionContext lower) {
83 assert(upper < IC_max);
84 assert(lower < IC_max);
85
86 #define ENUM_ENTRY(n, r, d) r,
87 static int ranks[IC_max] = {
88 INSTRUCTION_CONTEXTS
89 };
90 #undef ENUM_ENTRY
91
92 return (ranks[upper] > ranks[lower]);
93 }
94
95 /// stringForContext - Returns a string containing the name of a particular
96 /// InstructionContext, usually for diagnostic purposes.
97 ///
98 /// @param insnContext - The instruction class to transform to a string.
99 /// @return - A statically-allocated string constant that contains the
100 /// name of the instruction class.
101 static inline const char* stringForContext(InstructionContext insnContext) {
102 switch (insnContext) {
103 default:
104 llvm_unreachable("Unhandled instruction class");
105 #define ENUM_ENTRY(n, r, d) case n: return #n; break;
106 INSTRUCTION_CONTEXTS
107 #undef ENUM_ENTRY
108 }
109 }
110
111 /// stringForOperandType - Like stringForContext, but for OperandTypes.
112 static inline const char* stringForOperandType(OperandType type) {
113 switch (type) {
114 default:
115 llvm_unreachable("Unhandled type");
116 #define ENUM_ENTRY(i, d) case i: return #i;
117 TYPES
118 #undef ENUM_ENTRY
119 }
120 }
121
122 /// stringForOperandEncoding - like stringForContext, but for
123 /// OperandEncodings.
124 static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
125 switch (encoding) {
126 default:
127 llvm_unreachable("Unhandled encoding");
128 #define ENUM_ENTRY(i, d) case i: return #i;
129 ENCODINGS
130 #undef ENUM_ENTRY
131 }
132 }
133
134 void DisassemblerTables::emitOneID(raw_ostream &o,
135 uint32_t &i,
136 InstrUID id,
137 bool addComma) const {
138 if (id)
139 o.indent(i * 2) << format("0x%hx", id);
140 else
141 o.indent(i * 2) << 0;
142
143 if (addComma)
144 o << ", ";
145 else
146 o << " ";
147
148 o << "/* ";
149 o << InstructionSpecifiers[id].name;
150 o << "*/";
151
152 o << "\n";
153 }
154
155 /// emitEmptyTable - Emits the modRMEmptyTable, which is used as a ID table by
156 /// all ModR/M decisions for instructions that are invalid for all possible
157 /// ModR/M byte values.
158 ///
159 /// @param o - The output stream on which to emit the table.
160 /// @param i - The indentation level for that output stream.
161 static void emitEmptyTable(raw_ostream &o, uint32_t &i)
162 {
163 o.indent(i * 2) << "InstrUID modRMEmptyTable[1] = { 0 };" << "\n";
164 o << "\n";
165 }
166
167 /// getDecisionType - Determines whether a ModRM decision with 255 entries can
168 /// be compacted by eliminating redundant information.
169 ///
170 /// @param decision - The decision to be compacted.
171 /// @return - The compactest available representation for the decision.
172 static ModRMDecisionType getDecisionType(ModRMDecision &decision)
173 {
174 bool satisfiesOneEntry = true;
175 bool satisfiesSplitRM = true;
176
177 uint16_t index;
178
179 for (index = 0; index < 256; ++index) {
180 if (decision.instructionIDs[index] != decision.instructionIDs[0])
181 satisfiesOneEntry = false;
182
183 if (((index & 0xc0) == 0xc0) &&
184 (decision.instructionIDs[index] != decision.instructionIDs[0xc0]))
185 satisfiesSplitRM = false;
186
187 if (((index & 0xc0) != 0xc0) &&
188 (decision.instructionIDs[index] != decision.instructionIDs[0x00]))
189 satisfiesSplitRM = false;
190 }
191
192 if (satisfiesOneEntry)
193 return MODRM_ONEENTRY;
194
195 if (satisfiesSplitRM)
196 return MODRM_SPLITRM;
197
198 return MODRM_FULL;
199 }
200
201 /// stringForDecisionType - Returns a statically-allocated string corresponding
202 /// to a particular decision type.
203 ///
204 /// @param dt - The decision type.
205 /// @return - A pointer to the statically-allocated string (e.g.,
206 /// "MODRM_ONEENTRY" for MODRM_ONEENTRY).
207 static const char* stringForDecisionType(ModRMDecisionType dt)
208 {
209 #define ENUM_ENTRY(n) case n: return #n;
210 switch (dt) {
211 default:
212 llvm_unreachable("Unknown decision type");
213 MODRMTYPES
214 };
215 #undef ENUM_ENTRY
216 }
217
218 /// stringForModifierType - Returns a statically-allocated string corresponding
219 /// to an opcode modifier type.
220 ///
221 /// @param mt - The modifier type.
222 /// @return - A pointer to the statically-allocated string (e.g.,
223 /// "MODIFIER_NONE" for MODIFIER_NONE).
224 static const char* stringForModifierType(ModifierType mt)
225 {
226 #define ENUM_ENTRY(n) case n: return #n;
227 switch(mt) {
228 default:
229 llvm_unreachable("Unknown modifier type");
230 MODIFIER_TYPES
231 };
232 #undef ENUM_ENTRY
233 }
234
235 DisassemblerTables::DisassemblerTables() {
236 unsigned i;
237
238 for (i = 0; i < 4; i++) {
239 Tables[i] = new ContextDecision;
240 bzero(Tables[i], sizeof(ContextDecision));
241 }
242
243 HasConflicts = false;
244 }
245
246 DisassemblerTables::~DisassemblerTables() {
247 unsigned i;
248
249 for (i = 0; i < 4; i++)
250 delete Tables[i];
251 }
252
253 void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
254 raw_ostream &o2,
255 uint32_t &i1,
256 uint32_t &i2,
257 ModRMDecision &decision)
258 const {
259 static uint64_t sTableNumber = 0;
260 uint64_t thisTableNumber = sTableNumber;
261 ModRMDecisionType dt = getDecisionType(decision);
262 uint16_t index;
263
264 if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0)
265 {
266 o2.indent(i2) << "{ /* ModRMDecision */" << "\n";
267 i2++;
268
269 o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
270 o2.indent(i2) << "modRMEmptyTable";
271
272 i2--;
273 o2.indent(i2) << "}";
274 return;
275 }
276
277 o1.indent(i1) << "InstrUID modRMTable" << thisTableNumber;
278
279 switch (dt) {
280 default:
281 llvm_unreachable("Unknown decision type");
282 case MODRM_ONEENTRY:
283 o1 << "[1]";
284 break;
285 case MODRM_SPLITRM:
286 o1 << "[2]";
287 break;
288 case MODRM_FULL:
289 o1 << "[256]";
290 break;
291 }
292
293 o1 << " = {" << "\n";
294 i1++;
295
296 switch (dt) {
297 default:
298 llvm_unreachable("Unknown decision type");
299 case MODRM_ONEENTRY:
300 emitOneID(o1, i1, decision.instructionIDs[0], false);
301 break;
302 case MODRM_SPLITRM:
303 emitOneID(o1, i1, decision.instructionIDs[0x00], true); // mod = 0b00
304 emitOneID(o1, i1, decision.instructionIDs[0xc0], false); // mod = 0b11
305 break;
306 case MODRM_FULL:
307 for (index = 0; index < 256; ++index)
308 emitOneID(o1, i1, decision.instructionIDs[index], index < 255);
309 break;
310 }
311
312 i1--;
313 o1.indent(i1) << "};" << "\n";
314 o1 << "\n";
315
316 o2.indent(i2) << "{ /* struct ModRMDecision */" << "\n";
317 i2++;
318
319 o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
320 o2.indent(i2) << "modRMTable" << sTableNumber << "\n";
321
322 i2--;
323 o2.indent(i2) << "}";
324
325 ++sTableNumber;
326 }
327
328 void DisassemblerTables::emitOpcodeDecision(
329 raw_ostream &o1,
330 raw_ostream &o2,
331 uint32_t &i1,
332 uint32_t &i2,
333 OpcodeDecision &decision) const {
334 uint16_t index;
335
336 o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n";
337 i2++;
338 o2.indent(i2) << "{" << "\n";
339 i2++;
340
341 for (index = 0; index < 256; ++index) {
342 o2.indent(i2);
343
344 o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n";
345
346 emitModRMDecision(o1, o2, i1, i2, decision.modRMDecisions[index]);
347
348 if (index < 255)
349 o2 << ",";
350
351 o2 << "\n";
352 }
353
354 i2--;
355 o2.indent(i2) << "}" << "\n";
356 i2--;
357 o2.indent(i2) << "}" << "\n";
358 }
359
360 void DisassemblerTables::emitContextDecision(
361 raw_ostream &o1,
362 raw_ostream &o2,
363 uint32_t &i1,
364 uint32_t &i2,
365 ContextDecision &decision,
366 const char* name) const {
367 o2.indent(i2) << "struct ContextDecision " << name << " = {" << "\n";
368 i2++;
369 o2.indent(i2) << "{ /* opcodeDecisions */" << "\n";
370 i2++;
371
372 unsigned index;
373
374 for (index = 0; index < IC_max; ++index) {
375 o2.indent(i2) << "/* ";
376 o2 << stringForContext((InstructionContext)index);
377 o2 << " */";
378 o2 << "\n";
379
380 emitOpcodeDecision(o1, o2, i1, i2, decision.opcodeDecisions[index]);
381
382 if (index + 1 < IC_max)
383 o2 << ", ";
384 }
385
386 i2--;
387 o2.indent(i2) << "}" << "\n";
388 i2--;
389 o2.indent(i2) << "};" << "\n";
390 }
391
392 void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)
393 const {
394 o.indent(i * 2) << "struct InstructionSpecifier ";
395 o << INSTRUCTIONS_STR << "[";
396 o << InstructionSpecifiers.size();
397 o << "] = {" << "\n";
398
399 i++;
400