llvm.org GIT mirror llvm / fe0e7bb
What year is it! This file has no reason to be written in C, and has doubly no reason to expose a global symbol 'decodeInstruction' nor to pollute the global scope with a bunch of external linkage entities (some of which conflict with others elsewhere in LLVM). This is just the initial transition to C++; more cleanups to follow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206717 91177308-0d34-0410-b5e6-96231b3b80d8 Richard Smith 6 years ago
6 changed file(s) with 1889 addition(s) and 1895 deletion(s). Raw diff Collapse all Expand all
0 add_llvm_library(LLVMX86Disassembler
11 X86Disassembler.cpp
2 X86DisassemblerDecoder.c
2 X86DisassemblerDecoder.cpp
33 )
3636 using namespace llvm;
3737 using namespace llvm::X86Disassembler;
3838
39 void x86DisassemblerDebug(const char *file,
40 unsigned line,
41 const char *s) {
39 void llvm::X86Disassembler::Debug(const char *file, unsigned line,
40 const char *s) {
4241 dbgs() << file << ":" << line << ": " << s;
4342 }
4443
45 const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) {
44 const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode,
45 const void *mii) {
4646 const MCInstrInfo *MII = static_cast(mii);
4747 return MII->getName(Opcode);
4848 }
4949
50 #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
50 #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s));
5151
5252 namespace llvm {
5353
+0
-1821
lib/Target/X86/Disassembler/X86DisassemblerDecoder.c less more
None /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
1 *
2 * The LLVM Compiler Infrastructure
3 *
4 * This file is distributed under the University of Illinois Open Source
5 * License. See LICENSE.TXT for details.
6 *
7 *===----------------------------------------------------------------------===*
8 *
9 * This file is part of the X86 Disassembler.
10 * It contains the implementation of the instruction decoder.
11 * Documentation for the disassembler can be found in X86Disassembler.h.
12 *
13 *===----------------------------------------------------------------------===*/
14
15 #include /* for va_*() */
16 #include /* for vsnprintf() */
17 #include /* for exit() */
18 #include /* for memset() */
19
20 #include "X86DisassemblerDecoder.h"
21
22 #include "X86GenDisassemblerTables.inc"
23
24 #define TRUE 1
25 #define FALSE 0
26
27 #ifndef NDEBUG
28 #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
29 #else
30 #define debug(s) do { } while (0)
31 #endif
32
33
34 /*
35 * contextForAttrs - Client for the instruction context table. Takes a set of
36 * attributes and returns the appropriate decode context.
37 *
38 * @param attrMask - Attributes, from the enumeration attributeBits.
39 * @return - The InstructionContext to use when looking up an
40 * an instruction with these attributes.
41 */
42 static InstructionContext contextForAttrs(uint16_t attrMask) {
43 return CONTEXTS_SYM[attrMask];
44 }
45
46 /*
47 * modRMRequired - Reads the appropriate instruction table to determine whether
48 * the ModR/M byte is required to decode a particular instruction.
49 *
50 * @param type - The opcode type (i.e., how many bytes it has).
51 * @param insnContext - The context for the instruction, as returned by
52 * contextForAttrs.
53 * @param opcode - The last byte of the instruction's opcode, not counting
54 * ModR/M extensions and escapes.
55 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
56 */
57 static int modRMRequired(OpcodeType type,
58 InstructionContext insnContext,
59 uint16_t opcode) {
60 const struct ContextDecision* decision = 0;
61
62 switch (type) {
63 case ONEBYTE:
64 decision = &ONEBYTE_SYM;
65 break;
66 case TWOBYTE:
67 decision = &TWOBYTE_SYM;
68 break;
69 case THREEBYTE_38:
70 decision = &THREEBYTE38_SYM;
71 break;
72 case THREEBYTE_3A:
73 decision = &THREEBYTE3A_SYM;
74 break;
75 case XOP8_MAP:
76 decision = &XOP8_MAP_SYM;
77 break;
78 case XOP9_MAP:
79 decision = &XOP9_MAP_SYM;
80 break;
81 case XOPA_MAP:
82 decision = &XOPA_MAP_SYM;
83 break;
84 }
85
86 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
87 modrm_type != MODRM_ONEENTRY;
88 }
89
90 /*
91 * decode - Reads the appropriate instruction table to obtain the unique ID of
92 * an instruction.
93 *
94 * @param type - See modRMRequired().
95 * @param insnContext - See modRMRequired().
96 * @param opcode - See modRMRequired().
97 * @param modRM - The ModR/M byte if required, or any value if not.
98 * @return - The UID of the instruction, or 0 on failure.
99 */
100 static InstrUID decode(OpcodeType type,
101 InstructionContext insnContext,
102 uint8_t opcode,
103 uint8_t modRM) {
104 const struct ModRMDecision* dec = 0;
105
106 switch (type) {
107 case ONEBYTE:
108 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
109 break;
110 case TWOBYTE:
111 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
112 break;
113 case THREEBYTE_38:
114 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
115 break;
116 case THREEBYTE_3A:
117 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
118 break;
119 case XOP8_MAP:
120 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
121 break;
122 case XOP9_MAP:
123 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
124 break;
125 case XOPA_MAP:
126 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
127 break;
128 }
129
130 switch (dec->modrm_type) {
131 default:
132 debug("Corrupt table! Unknown modrm_type");
133 return 0;
134 case MODRM_ONEENTRY:
135 return modRMTable[dec->instructionIDs];
136 case MODRM_SPLITRM:
137 if (modFromModRM(modRM) == 0x3)
138 return modRMTable[dec->instructionIDs+1];
139 return modRMTable[dec->instructionIDs];
140 case MODRM_SPLITREG:
141 if (modFromModRM(modRM) == 0x3)
142 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
143 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
144 case MODRM_SPLITMISC:
145 if (modFromModRM(modRM) == 0x3)
146 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
147 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
148 case MODRM_FULL:
149 return modRMTable[dec->instructionIDs+modRM];
150 }
151 }
152
153 /*
154 * specifierForUID - Given a UID, returns the name and operand specification for
155 * that instruction.
156 *
157 * @param uid - The unique ID for the instruction. This should be returned by
158 * decode(); specifierForUID will not check bounds.
159 * @return - A pointer to the specification for that instruction.
160 */
161 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
162 return &INSTRUCTIONS_SYM[uid];
163 }
164
165 /*
166 * consumeByte - Uses the reader function provided by the user to consume one
167 * byte from the instruction's memory and advance the cursor.
168 *
169 * @param insn - The instruction with the reader function to use. The cursor
170 * for this instruction is advanced.
171 * @param byte - A pointer to a pre-allocated memory buffer to be populated
172 * with the data read.
173 * @return - 0 if the read was successful; nonzero otherwise.
174 */
175 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
176 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
177
178 if (!ret)
179 ++(insn->readerCursor);
180
181 return ret;
182 }
183
184 /*
185 * lookAtByte - Like consumeByte, but does not advance the cursor.
186 *
187 * @param insn - See consumeByte().
188 * @param byte - See consumeByte().
189 * @return - See consumeByte().
190 */
191 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
192 return insn->reader(insn->readerArg, byte, insn->readerCursor);
193 }
194
195 static void unconsumeByte(struct InternalInstruction* insn) {
196 insn->readerCursor--;
197 }
198
199 #define CONSUME_FUNC(name, type) \
200 static int name(struct InternalInstruction* insn, type* ptr) { \
201 type combined = 0; \
202 unsigned offset; \
203 for (offset = 0; offset < sizeof(type); ++offset) { \
204 uint8_t byte; \
205 int ret = insn->reader(insn->readerArg, \
206 &byte, \
207 insn->readerCursor + offset); \
208 if (ret) \
209 return ret; \
210 combined = combined | ((uint64_t)byte << (offset * 8)); \
211 } \
212 *ptr = combined; \
213 insn->readerCursor += sizeof(type); \
214 return 0; \
215 }
216
217 /*
218 * consume* - Use the reader function provided by the user to consume data
219 * values of various sizes from the instruction's memory and advance the
220 * cursor appropriately. These readers perform endian conversion.
221 *
222 * @param insn - See consumeByte().
223 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
224 * be populated with the data read.
225 * @return - See consumeByte().
226 */
227 CONSUME_FUNC(consumeInt8, int8_t)
228 CONSUME_FUNC(consumeInt16, int16_t)
229 CONSUME_FUNC(consumeInt32, int32_t)
230 CONSUME_FUNC(consumeUInt16, uint16_t)
231 CONSUME_FUNC(consumeUInt32, uint32_t)
232 CONSUME_FUNC(consumeUInt64, uint64_t)
233
234 /*
235 * dbgprintf - Uses the logging function provided by the user to log a single
236 * message, typically without a carriage-return.
237 *
238 * @param insn - The instruction containing the logging function.
239 * @param format - See printf().
240 * @param ... - See printf().
241 */
242 static void dbgprintf(struct InternalInstruction* insn,
243 const char* format,
244 ...) {
245 char buffer[256];
246 va_list ap;
247
248 if (!insn->dlog)
249 return;
250
251 va_start(ap, format);
252 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
253 va_end(ap);
254
255 insn->dlog(insn->dlogArg, buffer);
256
257 return;
258 }
259
260 /*
261 * setPrefixPresent - Marks that a particular prefix is present at a particular
262 * location.
263 *
264 * @param insn - The instruction to be marked as having the prefix.
265 * @param prefix - The prefix that is present.
266 * @param location - The location where the prefix is located (in the address
267 * space of the instruction's reader).
268 */
269 static void setPrefixPresent(struct InternalInstruction* insn,
270 uint8_t prefix,
271 uint64_t location)
272 {
273 insn->prefixPresent[prefix] = 1;
274 insn->prefixLocations[prefix] = location;
275 }
276
277 /*
278 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
279 * present at a given location.
280 *
281 * @param insn - The instruction to be queried.
282 * @param prefix - The prefix.
283 * @param location - The location to query.
284 * @return - Whether the prefix is at that location.
285 */
286 static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
287 uint8_t prefix,
288 uint64_t location)
289 {
290 if (insn->prefixPresent[prefix] == 1 &&
291 insn->prefixLocations[prefix] == location)
292 return TRUE;
293 else
294 return FALSE;
295 }
296
297 /*
298 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
299 * instruction as having them. Also sets the instruction's default operand,
300 * address, and other relevant data sizes to report operands correctly.
301 *
302 * @param insn - The instruction whose prefixes are to be read.
303 * @return - 0 if the instruction could be read until the end of the prefix
304 * bytes, and no prefixes conflicted; nonzero otherwise.
305 */
306 static int readPrefixes(struct InternalInstruction* insn) {
307 BOOL isPrefix = TRUE;
308 BOOL prefixGroups[4] = { FALSE };
309 uint64_t prefixLocation;
310 uint8_t byte = 0;
311 uint8_t nextByte;
312
313 BOOL hasAdSize = FALSE;
314 BOOL hasOpSize = FALSE;
315
316 dbgprintf(insn, "readPrefixes()");
317
318 while (isPrefix) {
319 prefixLocation = insn->readerCursor;
320
321 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
322 if (consumeByte(insn, &byte))
323 break;
324
325 /*
326 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
327 * break and let it be disassembled as a normal "instruction".
328 */
329 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
330 break;
331
332 if (insn->readerCursor - 1 == insn->startLocation
333 && (byte == 0xf2 || byte == 0xf3)
334 && !lookAtByte(insn, &nextByte))
335 {
336 /*
337 * If the byte is 0xf2 or 0xf3, and any of the following conditions are
338 * met:
339 * - it is followed by a LOCK (0xf0) prefix
340 * - it is followed by an xchg instruction
341 * then it should be disassembled as a xacquire/xrelease not repne/rep.
342 */
343 if ((byte == 0xf2 || byte == 0xf3) &&
344 ((nextByte == 0xf0) |
345 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
346 insn->xAcquireRelease = TRUE;
347 /*
348 * Also if the byte is 0xf3, and the following condition is met:
349 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
350 * "mov mem, imm" (opcode 0xc6/0xc7) instructions.
351 * then it should be disassembled as an xrelease not rep.
352 */
353 if (byte == 0xf3 &&
354 (nextByte == 0x88 || nextByte == 0x89 ||
355 nextByte == 0xc6 || nextByte == 0xc7))
356 insn->xAcquireRelease = TRUE;
357 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
358 if (consumeByte(insn, &nextByte))
359 return -1;
360 if (lookAtByte(insn, &nextByte))
361 return -1;
362 unconsumeByte(insn);
363 }
364 if (nextByte != 0x0f && nextByte != 0x90)
365 break;
366 }
367
368 switch (byte) {
369 case 0xf0: /* LOCK */
370 case 0xf2: /* REPNE/REPNZ */
371 case 0xf3: /* REP or REPE/REPZ */
372 if (prefixGroups[0])
373 dbgprintf(insn, "Redundant Group 1 prefix");
374 prefixGroups[0] = TRUE;
375 setPrefixPresent(insn, byte, prefixLocation);
376 break;
377 case 0x2e: /* CS segment override -OR- Branch not taken */
378 case 0x36: /* SS segment override -OR- Branch taken */
379 case 0x3e: /* DS segment override */
380 case 0x26: /* ES segment override */
381 case 0x64: /* FS segment override */
382 case 0x65: /* GS segment override */
383 switch (byte) {
384 case 0x2e:
385 insn->segmentOverride = SEG_OVERRIDE_CS;
386 break;
387 case 0x36:
388 insn->segmentOverride = SEG_OVERRIDE_SS;
389 break;
390 case 0x3e:
391 insn->segmentOverride = SEG_OVERRIDE_DS;
392 break;
393 case 0x26:
394 insn->segmentOverride = SEG_OVERRIDE_ES;
395 break;
396 case 0x64:
397 insn->segmentOverride = SEG_OVERRIDE_FS;
398 break;
399 case 0x65:
400 insn->segmentOverride = SEG_OVERRIDE_GS;
401 break;
402 default:
403 debug("Unhandled override");
404 return -1;
405 }
406 if (prefixGroups[1])
407 dbgprintf(insn, "Redundant Group 2 prefix");
408 prefixGroups[1] = TRUE;
409 setPrefixPresent(insn, byte, prefixLocation);
410 break;
411 case 0x66: /* Operand-size override */
412 if (prefixGroups[2])
413 dbgprintf(insn, "Redundant Group 3 prefix");
414 prefixGroups[2] = TRUE;
415 hasOpSize = TRUE;
416 setPrefixPresent(insn, byte, prefixLocation);
417 break;
418 case 0x67: /* Address-size override */
419 if (prefixGroups[3])
420 dbgprintf(insn, "Redundant Group 4 prefix");
421 prefixGroups[3] = TRUE;
422 hasAdSize = TRUE;
423 setPrefixPresent(insn, byte, prefixLocation);
424 break;
425 default: /* Not a prefix byte */
426 isPrefix = FALSE;
427 break;
428 }
429
430 if (isPrefix)
431 dbgprintf(insn, "Found prefix 0x%hhx", byte);
432 }
433
434 insn->vectorExtensionType = TYPE_NO_VEX_XOP;
435
436 if (byte == 0x62) {
437 uint8_t byte1, byte2;
438
439 if (consumeByte(insn, &byte1)) {
440 dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
441 return -1;
442 }
443
444 if (lookAtByte(insn, &byte2)) {
445 dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
446 return -1;
447 }
448
449 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
450 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
451 insn->vectorExtensionType = TYPE_EVEX;
452 }
453 else {
454 unconsumeByte(insn); /* unconsume byte1 */
455 unconsumeByte(insn); /* unconsume byte */
456 insn->necessaryPrefixLocation = insn->readerCursor - 2;
457 }
458
459 if (insn->vectorExtensionType == TYPE_EVEX) {
460 insn->vectorExtensionPrefix[0] = byte;
461 insn->vectorExtensionPrefix[1] = byte1;
462 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
463 dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
464 return -1;
465 }
466 if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
467 dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
468 return -1;
469 }
470
471 /* We simulate the REX prefix for simplicity's sake */
472 if (insn->mode == MODE_64BIT) {
473 insn->rexPrefix = 0x40
474 | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
475 | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
476 | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
477 | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
478 }
479
480 dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
481 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
482 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
483 }
484 }
485 else if (byte == 0xc4) {
486 uint8_t byte1;
487
488 if (lookAtByte(insn, &byte1)) {
489 dbgprintf(insn, "Couldn't read second byte of VEX");
490 return -1;
491 }
492
493 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
494 insn->vectorExtensionType = TYPE_VEX_3B;
495 insn->necessaryPrefixLocation = insn->readerCursor - 1;
496 }
497 else {
498 unconsumeByte(insn);
499 insn->necessaryPrefixLocation = insn->readerCursor - 1;
500 }
501
502 if (insn->vectorExtensionType == TYPE_VEX_3B) {
503 insn->vectorExtensionPrefix[0] = byte;
504 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
505 consumeByte(insn, &insn->vectorExtensionPrefix[2]);
506
507 /* We simulate the REX prefix for simplicity's sake */
508
509 if (insn->mode == MODE_64BIT) {
510 insn->rexPrefix = 0x40
511 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
512 | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
513 | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
514 | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
515 }
516
517 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
518 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
519 insn->vectorExtensionPrefix[2]);
520 }
521 }
522 else if (byte == 0xc5) {
523 uint8_t byte1;
524
525 if (lookAtByte(insn, &byte1)) {
526 dbgprintf(insn, "Couldn't read second byte of VEX");
527 return -1;
528 }
529
530 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
531 insn->vectorExtensionType = TYPE_VEX_2B;
532 }
533 else {
534 unconsumeByte(insn);
535 }
536
537 if (insn->vectorExtensionType == TYPE_VEX_2B) {
538 insn->vectorExtensionPrefix[0] = byte;
539 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
540
541 if (insn->mode == MODE_64BIT) {
542 insn->rexPrefix = 0x40
543 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
544 }
545
546 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1]))
547 {
548 default:
549 break;
550 case VEX_PREFIX_66:
551 hasOpSize = TRUE;
552 break;
553 }
554
555 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
556 insn->vectorExtensionPrefix[0],
557 insn->vectorExtensionPrefix[1]);
558 }
559 }
560 else if (byte == 0x8f) {
561 uint8_t byte1;
562
563 if (lookAtByte(insn, &byte1)) {
564 dbgprintf(insn, "Couldn't read second byte of XOP");
565 return -1;
566 }
567
568 if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
569 insn->vectorExtensionType = TYPE_XOP;
570 insn->necessaryPrefixLocation = insn->readerCursor - 1;
571 }
572 else {
573 unconsumeByte(insn);
574 insn->necessaryPrefixLocation = insn->readerCursor - 1;
575 }
576
577 if (insn->vectorExtensionType == TYPE_XOP) {
578 insn->vectorExtensionPrefix[0] = byte;
579 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
580 consumeByte(insn, &insn->vectorExtensionPrefix[2]);
581
582 /* We simulate the REX prefix for simplicity's sake */
583
584 if (insn->mode == MODE_64BIT) {
585 insn->rexPrefix = 0x40
586 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
587 | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
588 | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
589 | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
590 }
591
592 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2]))
593 {
594 default:
595 break;
596 case VEX_PREFIX_66:
597 hasOpSize = TRUE;
598 break;
599 }
600
601 dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
602 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
603 insn->vectorExtensionPrefix[2]);
604 }
605 }
606 else {
607 if (insn->mode == MODE_64BIT) {
608 if ((byte & 0xf0) == 0x40) {
609 uint8_t opcodeByte;
610
611 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
612 dbgprintf(insn, "Redundant REX prefix");
613 return -1;
614 }
615
616 insn->rexPrefix = byte;
617 insn->necessaryPrefixLocation = insn->readerCursor - 2;
618
619 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
620 } else {
621 unconsumeByte(insn);
622 insn->necessaryPrefixLocation = insn->readerCursor - 1;
623 }
624 } else {
625 unconsumeByte(insn);
626 insn->necessaryPrefixLocation = insn->readerCursor - 1;
627 }
628 }
629
630 if (insn->mode == MODE_16BIT) {
631 insn->registerSize = (hasOpSize ? 4 : 2);
632 insn->addressSize = (hasAdSize ? 4 : 2);
633 insn->displacementSize = (hasAdSize ? 4 : 2);
634 insn->immediateSize = (hasOpSize ? 4 : 2);
635 } else if (insn->mode == MODE_32BIT) {
636 insn->registerSize = (hasOpSize ? 2 : 4);
637 insn->addressSize = (hasAdSize ? 2 : 4);
638 insn->displacementSize = (hasAdSize ? 2 : 4);
639 insn->immediateSize = (hasOpSize ? 2 : 4);
640 } else if (insn->mode == MODE_64BIT) {
641 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
642 insn->registerSize = 8;
643 insn->addressSize = (hasAdSize ? 4 : 8);
644 insn->displacementSize = 4;
645 insn->immediateSize = 4;
646 } else if (insn->rexPrefix) {
647 insn->registerSize = (hasOpSize ? 2 : 4);
648 insn->addressSize = (hasAdSize ? 4 : 8);
649 insn->displacementSize = (hasOpSize ? 2 : 4);
650 insn->immediateSize = (hasOpSize ? 2 : 4);
651 } else {
652 insn->registerSize = (hasOpSize ? 2 : 4);
653 insn->addressSize = (hasAdSize ? 4 : 8);
654 insn->displacementSize = (hasOpSize ? 2 : 4);
655 insn->immediateSize = (hasOpSize ? 2 : 4);
656 }
657 }
658
659 return 0;
660 }
661
662 /*
663 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
664 * extended or escape opcodes).
665 *
666 * @param insn - The instruction whose opcode is to be read.
667 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
668 */
669 static int readOpcode(struct InternalInstruction* insn) {
670 /* Determine the length of the primary opcode */
671
672 uint8_t current;
673
674 dbgprintf(insn, "readOpcode()");
675
676 insn->opcodeType = ONEBYTE;
677
678 if (insn->vectorExtensionType == TYPE_EVEX)
679 {
680 switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
681 default:
682 dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
683 mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
684 return -1;
685 case VEX_LOB_0F:
686 insn->opcodeType = TWOBYTE;
687 return consumeByte(insn, &insn->opcode);
688 case VEX_LOB_0F38:
689 insn->opcodeType = THREEBYTE_38;
690 return consumeByte(insn, &insn->opcode);
691 case VEX_LOB_0F3A:
692 insn->opcodeType = THREEBYTE_3A;
693 return consumeByte(insn, &insn->opcode);
694 }
695 }
696 else if (insn->vectorExtensionType == TYPE_VEX_3B) {
697 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
698 default:
699 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
700 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
701 return -1;
702 case VEX_LOB_0F:
703 insn->opcodeType = TWOBYTE;
704 return consumeByte(insn, &insn->opcode);
705 case VEX_LOB_0F38:
706 insn->opcodeType = THREEBYTE_38;
707 return consumeByte(insn, &insn->opcode);
708 case VEX_LOB_0F3A:
709 insn->opcodeType = THREEBYTE_3A;
710 return consumeByte(insn, &insn->opcode);
711 }
712 }
713 else if (insn->vectorExtensionType == TYPE_VEX_2B) {
714 insn->opcodeType = TWOBYTE;
715 return consumeByte(insn, &insn->opcode);
716 }
717 else if (insn->vectorExtensionType == TYPE_XOP) {
718 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
719 default:
720 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
721 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
722 return -1;
723 case XOP_MAP_SELECT_8:
724 insn->opcodeType = XOP8_MAP;
725 return consumeByte(insn, &insn->opcode);
726 case XOP_MAP_SELECT_9:
727 insn->opcodeType = XOP9_MAP;
728 return consumeByte(insn, &insn->opcode);
729 case XOP_MAP_SELECT_A:
730 insn->opcodeType = XOPA_MAP;
731 return consumeByte(insn, &insn->opcode);
732 }
733 }
734
735 if (consumeByte(insn, ¤t))
736 return -1;
737
738 if (current == 0x0f) {
739 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
740
741 if (consumeByte(insn, ¤t))
742 return -1;
743
744 if (current == 0x38) {
745 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
746
747 if (consumeByte(insn, ¤t))
748 return -1;
749
750 insn->opcodeType = THREEBYTE_38;
751 } else if (current == 0x3a) {
752 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
753
754 if (consumeByte(insn, ¤t))
755 return -1;
756
757 insn->opcodeType = THREEBYTE_3A;
758 } else {
759 dbgprintf(insn, "Didn't find a three-byte escape prefix");
760
761 insn->opcodeType = TWOBYTE;
762 }
763 }
764
765 /*
766 * At this point we have consumed the full opcode.
767 * Anything we consume from here on must be unconsumed.
768 */
769
770 insn->opcode = current;
771
772 return 0;
773 }
774
775 static int readModRM(struct InternalInstruction* insn);
776
777 /*
778 * getIDWithAttrMask - Determines the ID of an instruction, consuming
779 * the ModR/M byte as appropriate for extended and escape opcodes,
780 * and using a supplied attribute mask.
781 *
782 * @param instructionID - A pointer whose target is filled in with the ID of the
783 * instruction.
784 * @param insn - The instruction whose ID is to be determined.
785 * @param attrMask - The attribute mask to search.
786 * @return - 0 if the ModR/M could be read when needed or was not
787 * needed; nonzero otherwise.
788 */
789 static int getIDWithAttrMask(uint16_t* instructionID,
790 struct InternalInstruction* insn,
791 uint16_t attrMask) {
792 BOOL hasModRMExtension;
793
794 uint16_t instructionClass;
795
796 instructionClass = contextForAttrs(attrMask);
797
798 hasModRMExtension = modRMRequired(insn->opcodeType,
799 instructionClass,
800 insn->opcode);
801
802 if (hasModRMExtension) {
803 if (readModRM(insn))
804 return -1;
805
806 *instructionID = decode(insn->opcodeType,
807 instructionClass,
808 insn->opcode,
809 insn->modRM);
810 } else {
811 *instructionID = decode(insn->opcodeType,
812 instructionClass,
813 insn->opcode,
814 0);
815 }
816
817 return 0;
818 }
819
820 /*
821 * is16BitEquivalent - Determines whether two instruction names refer to
822 * equivalent instructions but one is 16-bit whereas the other is not.
823 *
824 * @param orig - The instruction that is not 16-bit
825 * @param equiv - The instruction that is 16-bit
826 */
827 static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
828 off_t i;
829
830 for (i = 0;; i++) {
831 if (orig[i] == '\0' && equiv[i] == '\0')
832 return TRUE;
833 if (orig[i] == '\0' || equiv[i] == '\0')
834 return FALSE;
835 if (orig[i] != equiv[i]) {
836 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
837 continue;
838 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
839 continue;
840 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
841 continue;
842 return FALSE;
843 }
844 }
845 }
846
847 /*
848 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
849 * appropriate for extended and escape opcodes. Determines the attributes and
850 * context for the instruction before doing so.
851 *
852 * @param insn - The instruction whose ID is to be determined.
853 * @return - 0 if the ModR/M could be read when needed or was not needed;
854 * nonzero otherwise.
855 */
856 static int getID(struct InternalInstruction* insn, const void *miiArg) {
857 uint16_t attrMask;
858 uint16_t instructionID;
859
860 dbgprintf(insn, "getID()");
861
862 attrMask = ATTR_NONE;
863
864 if (insn->mode == MODE_64BIT)
865 attrMask |= ATTR_64BIT;
866
867 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
868 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
869
870 if (insn->vectorExtensionType == TYPE_EVEX) {
871 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
872 case VEX_PREFIX_66:
873 attrMask |= ATTR_OPSIZE;
874 break;
875 case VEX_PREFIX_F3:
876 attrMask |= ATTR_XS;
877 break;
878 case VEX_PREFIX_F2:
879 attrMask |= ATTR_XD;
880 break;
881 }
882
883 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
884 attrMask |= ATTR_EVEXKZ;
885 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
886 attrMask |= ATTR_EVEXB;
887 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
888 attrMask |= ATTR_EVEXK;
889 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
890 attrMask |= ATTR_EVEXL;
891 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
892 attrMask |= ATTR_EVEXL2;
893 }
894 else if (insn->vectorExtensionType == TYPE_VEX_3B) {
895 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
896 case VEX_PREFIX_66:
897 attrMask |= ATTR_OPSIZE;
898 break;
899 case VEX_PREFIX_F3:
900 attrMask |= ATTR_XS;
901 break;
902 case VEX_PREFIX_F2:
903 attrMask |= ATTR_XD;
904 break;
905 }
906
907 if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
908 attrMask |= ATTR_VEXL;
909 }
910 else if (insn->vectorExtensionType == TYPE_VEX_2B) {
911 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
912 case VEX_PREFIX_66:
913 attrMask |= ATTR_OPSIZE;
914 break;
915 case VEX_PREFIX_F3:
916 attrMask |= ATTR_XS;
917 break;
918 case VEX_PREFIX_F2:
919 attrMask |= ATTR_XD;
920 break;
921 }
922
923 if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
924 attrMask |= ATTR_VEXL;
925 }
926 else if (insn->vectorExtensionType == TYPE_XOP) {
927 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
928 case VEX_PREFIX_66:
929 attrMask |= ATTR_OPSIZE;
930 break;
931 case VEX_PREFIX_F3:
932 attrMask |= ATTR_XS;
933 break;
934 case VEX_PREFIX_F2:
935 attrMask |= ATTR_XD;
936 break;
937 }
938
939 if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
940 attrMask |= ATTR_VEXL;
941 }
942 else {
943 return -1;
944 }
945 }
946 else {
947 if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
948 attrMask |= ATTR_OPSIZE;
949 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
950 attrMask |= ATTR_ADSIZE;
951 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
952 attrMask |= ATTR_XS;
953 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
954 attrMask |= ATTR_XD;
955 }
956
957 if (insn->rexPrefix & 0x08)
958 attrMask |= ATTR_REXW;
959
960 if (getIDWithAttrMask(&instructionID, insn, attrMask))
961 return -1;
962
963 /*
964 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
965 * of the AdSize prefix is inverted w.r.t. 32-bit mode.
966 */
967 if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) {
968 const struct InstructionSpecifier *spec;
969 spec = specifierForUID(instructionID);
970
971 /*
972 * Check for Ii8PCRel instructions. We could alternatively do a
973 * string-compare on the names, but this is probably cheaper.
974 */
975 if (x86OperandSets[spec->operands][0].type == TYPE_REL8) {
976 attrMask ^= ATTR_ADSIZE;
977 if (getIDWithAttrMask(&instructionID, insn, attrMask))
978 return -1;
979 }
980 }
981
982 /* The following clauses compensate for limitations of the tables. */
983
984 if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) &&
985 !(attrMask & ATTR_OPSIZE)) {
986 /*
987 * The instruction tables make no distinction between instructions that
988 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
989 * particular spot (i.e., many MMX operations). In general we're
990 * conservative, but in the specific case where OpSize is present but not
991 * in the right place we check if there's a 16-bit operation.
992 */
993
994 const struct InstructionSpecifier *spec;
995 uint16_t instructionIDWithOpsize;
996 const char *specName, *specWithOpSizeName;
997
998 spec = specifierForUID(instructionID);
999
1000 if (getIDWithAttrMask(&instructionIDWithOpsize,
1001 insn,
1002 attrMask | ATTR_OPSIZE)) {
1003 /*
1004 * ModRM required with OpSize but not present; give up and return version
1005 * without OpSize set
1006 */
1007
1008 insn->instructionID = instructionID;
1009 insn->spec = spec;
1010 return 0;
1011 }
1012
1013 specName = x86DisassemblerGetInstrName(instructionID, miiArg);
1014 specWithOpSizeName =
1015 x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
1016
1017 if (is16BitEquivalent(specName, specWithOpSizeName) &&
1018 (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) {
1019 insn->instructionID = instructionIDWithOpsize;
1020 insn->spec = specifierForUID(instructionIDWithOpsize);
1021 } else {
1022 insn->instructionID = instructionID;
1023 insn->spec = spec;
1024 }
1025 return 0;
1026 }
1027
1028 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1029 insn->rexPrefix & 0x01) {
1030 /*
1031 * NOOP shouldn't decode as NOOP if REX.b is set. Instead
1032 * it should decode as XCHG %r8, %eax.
1033 */
1034
1035 const struct InstructionSpecifier *spec;
1036 uint16_t instructionIDWithNewOpcode;
1037 const struct InstructionSpecifier *specWithNewOpcode;
1038
1039 spec = specifierForUID(instructionID);
1040
1041 /* Borrow opcode from one of the other XCHGar opcodes */
1042 insn->opcode = 0x91;
1043
1044 if (getIDWithAttrMask(&instructionIDWithNewOpcode,
1045 insn,
1046 attrMask)) {
1047 insn->opcode = 0x90;
1048
1049 insn->instructionID = instructionID;
1050 insn->spec = spec;
1051 return 0;
1052 }
1053
1054 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
1055
1056 /* Change back */
1057 insn->opcode = 0x90;
1058
1059 insn->instructionID = instructionIDWithNewOpcode;
1060 insn->spec = specWithNewOpcode;
1061
1062 return 0;
1063 }
1064
1065 insn->instructionID = instructionID;
1066 insn->spec = specifierForUID(insn->instructionID);
1067
1068 return 0;
1069 }
1070
1071 /*
1072 * readSIB - Consumes the SIB byte to determine addressing information for an
1073 * instruction.
1074 *
1075 * @param insn - The instruction whose SIB byte is to be read.
1076 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
1077 */
1078 static int readSIB(struct InternalInstruction* insn) {
1079 SIBIndex sibIndexBase = 0;
1080 SIBBase sibBaseBase = 0;
1081 uint8_t index, base;
1082
1083 dbgprintf(insn, "readSIB()");
1084
1085 if (insn->consumedSIB)
1086 return 0;
1087
1088 insn->consumedSIB = TRUE;
1089
1090 switch (insn->addressSize) {
1091 case 2:
1092 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
1093 return -1;
1094 case 4:
1095 sibIndexBase = SIB_INDEX_EAX;
1096 sibBaseBase = SIB_BASE_EAX;
1097 break;
1098 case 8:
1099 sibIndexBase = SIB_INDEX_RAX;
1100 sibBaseBase = SIB_BASE_RAX;
1101 break;
1102 }
1103
1104 if (consumeByte(insn, &insn->sib))
1105 return -1;
1106
1107 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
1108 if (insn->vectorExtensionType == TYPE_EVEX)
1109 index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
1110
1111 switch (index) {
1112 case 0x4:
1113 insn->sibIndex = SIB_INDEX_NONE;
1114 break;
1115 default:
1116 insn->sibIndex = (SIBIndex)(sibIndexBase + index);
1117 if (insn->sibIndex == SIB_INDEX_sib ||
1118 insn->sibIndex == SIB_INDEX_sib64)
1119 insn->sibIndex = SIB_INDEX_NONE;
1120 break;
1121 }
1122
1123 switch (scaleFromSIB(insn->sib)) {
1124 case 0:
1125 insn->sibScale = 1;
1126 break;
1127 case 1:
1128 insn->sibScale = 2;
1129 break;
1130 case 2:
1131 insn->sibScale = 4;
1132 break;
1133 case 3:
1134 insn->sibScale = 8;
1135 break;
1136 }
1137
1138 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
1139
1140 switch (base) {
1141 case 0x5:
1142 case 0xd:
1143 switch (modFromModRM(insn->modRM)) {
1144 case 0x0:
1145 insn->eaDisplacement = EA_DISP_32;
1146 insn->sibBase = SIB_BASE_NONE;
1147 break;
1148 case 0x1:
1149 insn->eaDisplacement = EA_DISP_8;
1150 insn->sibBase = (SIBBase)(sibBaseBase + base);
1151 break;
1152 case 0x2:
1153 insn->eaDisplacement = EA_DISP_32;
1154 insn->sibBase = (SIBBase)(sibBaseBase + base);
1155 break;
1156 case 0x3:
1157 debug("Cannot have Mod = 0b11 and a SIB byte");
1158 return -1;
1159 }
1160 break;
1161 default:
1162 insn->sibBase = (SIBBase)(sibBaseBase + base);
1163 break;
1164 }
1165
1166 return 0;
1167 }
1168
1169 /*
1170 * readDisplacement - Consumes the displacement of an instruction.
1171 *
1172 * @param insn - The instruction whose displacement is to be read.
1173 * @return - 0 if the displacement byte was successfully read; nonzero
1174 * otherwise.
1175 */
1176 static int readDisplacement(struct InternalInstruction* insn) {
1177 int8_t d8;
1178 int16_t d16;
1179 int32_t d32;
1180
1181 dbgprintf(insn, "readDisplacement()");
1182
1183 if (insn->consumedDisplacement)
1184 return 0;
1185
1186 insn->consumedDisplacement = TRUE;
1187 insn->displacementOffset = insn->readerCursor - insn->startLocation;
1188
1189 switch (insn->eaDisplacement) {
1190 case EA_DISP_NONE:
1191 insn->consumedDisplacement = FALSE;
1192 break;
1193 case EA_DISP_8:
1194 if (consumeInt8(insn, &d8))
1195 return -1;
1196 insn->displacement = d8;
1197 break;
1198 case EA_DISP_16:
1199 if (consumeInt16(insn, &d16))
1200 return -1;
1201 insn->displacement = d16;
1202 break;
1203 case EA_DISP_32:
1204 if (consumeInt32(insn, &d32))
1205 return -1;
1206 insn->displacement = d32;
1207 break;
1208 }
1209
1210 insn->consumedDisplacement = TRUE;
1211 return 0;
1212 }
1213
1214 /*
1215 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1216 * displacement) for an instruction and interprets it.
1217 *
1218 * @param insn - The instruction whose addressing information is to be read.
1219 * @return - 0 if the information was successfully read; nonzero otherwise.
1220 */
1221 static int readModRM(struct InternalInstruction* insn) {
1222 uint8_t mod, rm, reg;
1223
1224 dbgprintf(insn, "readModRM()");
1225
1226 if (insn->consumedModRM)
1227 return 0;
1228
1229 if (consumeByte(insn, &insn->modRM))
1230 return -1;
1231 insn->consumedModRM = TRUE;
1232
1233 mod = modFromModRM(insn->modRM);
1234 rm = rmFromModRM(insn->modRM);
1235 reg = regFromModRM(insn->modRM);
1236
1237 /*
1238 * This goes by insn->registerSize to pick the correct register, which messes
1239 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1240 * fixupReg().
1241 */
1242 switch (insn->registerSize) {
1243 case 2:
1244 insn->regBase = MODRM_REG_AX;
1245 insn->eaRegBase = EA_REG_AX;
1246 break;
1247 case 4:
1248 insn->regBase = MODRM_REG_EAX;
1249 insn->eaRegBase = EA_REG_EAX;
1250 break;
1251 case 8:
1252 insn->regBase = MODRM_REG_RAX;
1253 insn->eaRegBase = EA_REG_RAX;
1254 break;
1255 }
1256
1257 reg |= rFromREX(insn->rexPrefix) << 3;
1258 rm |= bFromREX(insn->rexPrefix) << 3;
1259 if (insn->vectorExtensionType == TYPE_EVEX) {
1260 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1261 rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1262 }
1263
1264 insn->reg = (Reg)(insn->regBase + reg);
1265
1266 switch (insn->addressSize) {
1267 case 2:
1268 insn->eaBaseBase = EA_BASE_BX_SI;
1269
1270 switch (mod) {
1271 case 0x0:
1272 if (rm == 0x6) {
1273 insn->eaBase = EA_BASE_NONE;
1274 insn->eaDisplacement = EA_DISP_16;
1275 if (readDisplacement(insn))
1276 return -1;
1277 } else {
1278 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1279 insn->eaDisplacement = EA_DISP_NONE;
1280 }
1281 break;
1282 case 0x1:
1283 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1284 insn->eaDisplacement = EA_DISP_8;
1285 insn->displacementSize = 1;
1286 if (readDisplacement(insn))
1287 return -1;
1288 break;
1289 case 0x2:
1290 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1291 insn->eaDisplacement = EA_DISP_16;
1292 if (readDisplacement(insn))
1293 return -1;
1294 break;
1295 case 0x3:
1296 insn->eaBase = (EABase)(insn->eaRegBase + rm);
1297 if (readDisplacement(insn))
1298 return -1;
1299 break;
1300 }
1301 break;
1302 case 4:
1303 case 8:
1304 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1305
1306 switch (mod) {
1307 case 0x0:
1308 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1309 switch (rm) {
1310 case 0x14:
1311 case 0x4:
1312 case 0xc: /* in case REXW.b is set */
1313 insn->eaBase = (insn->addressSize == 4 ?
1314 EA_BASE_sib : EA_BASE_sib64);
1315 if (readSIB(insn) || readDisplacement(insn))
1316 return -1;
1317 break;
1318 case 0x5:
1319 insn->eaBase = EA_BASE_NONE;
1320 insn->eaDisplacement = EA_DISP_32;
1321 if (readDisplacement(insn))
1322 return -1;
1323 break;
1324 default:
1325 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1326 break;
1327 }
1328 break;
1329 case 0x1:
1330 insn->displacementSize = 1;
1331 /* FALLTHROUGH */
1332 case 0x2:
1333 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1334 switch (rm) {
1335 case 0x14:
1336 case 0x4:
1337 case 0xc: /* in case REXW.b is set */
1338 insn->eaBase = EA_BASE_sib;
1339 if (readSIB(insn) || readDisplacement(insn))
1340 return -1;
1341 break;
1342 default:
1343 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1344 if (readDisplacement(insn))
1345 return -1;
1346 break;
1347 }
1348 break;
1349 case 0x3:
1350 insn->eaDisplacement = EA_DISP_NONE;
1351 insn->eaBase = (EABase)(insn->eaRegBase + rm);
1352 break;
1353 }
1354 break;
1355 } /* switch (insn->addressSize) */
1356
1357 return 0;
1358 }
1359
1360 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
1361 static uint8_t name(struct InternalInstruction *insn, \
1362 OperandType type, \
1363 uint8_t index, \
1364 uint8_t *valid) { \
1365 *valid = 1; \
1366 switch (type) { \
1367 default: \
1368 debug("Unhandled register type"); \
1369 *valid = 0; \
1370 return 0; \
1371 case TYPE_Rv: \
1372 return base + index; \
1373 case TYPE_R8: \
1374 if (insn->rexPrefix && \
1375 index >= 4 && index <= 7) { \
1376 return prefix##_SPL + (index - 4); \
1377 } else { \
1378 return prefix##_AL + index; \
1379 } \
1380 case TYPE_R16: \
1381 return prefix##_AX + index; \
1382 case TYPE_R32: \
1383 return prefix##_EAX + index; \
1384 case TYPE_R64: \
1385 return prefix##_RAX + index; \
1386 case TYPE_XMM512: \
1387 return prefix##_ZMM0 + index; \
1388 case TYPE_XMM256: \
1389 return prefix##_YMM0 + index; \
1390 case TYPE_XMM128: \
1391 case TYPE_XMM64: \
1392 case TYPE_XMM32: \
1393 case TYPE_XMM: \
1394 return prefix##_XMM0 + index; \
1395 case TYPE_VK1: \
1396 case TYPE_VK8: \
1397 case TYPE_VK16: \
1398 return prefix##_K0 + index; \
1399 case TYPE_MM64: \
1400 case TYPE_MM32: \
1401 case TYPE_MM: \
1402 if (index > 7) \
1403 *valid = 0; \
1404 return prefix##_MM0 + index; \
1405 case TYPE_SEGMENTREG: \
1406 if (index > 5) \
1407 *valid = 0; \
1408 return prefix##_ES + index; \
1409 case TYPE_DEBUGREG: \
1410 if (index > 7) \
1411 *valid = 0; \
1412 return prefix##_DR0 + index; \
1413 case TYPE_CONTROLREG: \
1414 if (index > 8) \
1415 *valid = 0; \
1416 return prefix##_CR0 + index; \
1417 } \
1418 }
1419
1420 /*
1421 * fixup*Value - Consults an operand type to determine the meaning of the
1422 * reg or R/M field. If the operand is an XMM operand, for example, an
1423 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1424 * misinterpret it as.
1425 *
1426 * @param insn - The instruction containing the operand.
1427 * @param type - The operand type.
1428 * @param index - The existing value of the field as reported by readModRM().
1429 * @param valid - The address of a uint8_t. The target is set to 1 if the
1430 * field is valid for the register class; 0 if not.
1431 * @return - The proper value.
1432 */
1433 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
1434 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1435
1436 /*
1437 * fixupReg - Consults an operand specifier to determine which of the
1438 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1439 *
1440 * @param insn - See fixup*Value().
1441 * @param op - The operand specifier.
1442 * @return - 0 if fixup was successful; -1 if the register returned was
1443 * invalid for its class.
1444 */
1445 static int fixupReg(struct InternalInstruction *insn,
1446 const struct OperandSpecifier *op) {
1447 uint8_t valid;
1448
1449 dbgprintf(insn, "fixupReg()");
1450
1451 switch ((OperandEncoding)op->encoding) {
1452 default:
1453 debug("Expected a REG or R/M encoding in fixupReg");
1454 return -1;
1455 case ENCODING_VVVV:
1456 insn->vvvv = (Reg)fixupRegValue(insn,
1457 (OperandType)op->type,
1458 insn->vvvv,
1459 &valid);
1460 if (!valid)
1461 return -1;
1462 break;
1463 case ENCODING_REG:
1464 insn->reg = (Reg)fixupRegValue(insn,
1465 (OperandType)op->type,
1466 insn->reg - insn->regBase,
1467 &valid);
1468 if (!valid)
1469 return -1;
1470 break;
1471 case ENCODING_RM:
1472 if (insn->eaBase >= insn->eaRegBase) {
1473 insn->eaBase = (EABase)fixupRMValue(insn,
1474 (OperandType)op->type,
1475 insn->eaBase - insn->eaRegBase,
1476 &valid);
1477 if (!valid)
1478 return -1;
1479 }
1480 break;
1481 }
1482
1483 return 0;
1484 }
1485
1486 /*
1487 * readOpcodeRegister - Reads an operand from the opcode field of an
1488 * instruction and interprets it appropriately given the operand width.
1489 * Handles AddRegFrm instructions.
1490 *
1491 * @param insn - the instruction whose opcode field is to be read.
1492 * @param size - The width (in bytes) of the register being specified.
1493 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1494 * RAX.
1495 * @return - 0 on success; nonzero otherwise.
1496 */
1497 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1498 dbgprintf(insn, "readOpcodeRegister()");
1499
1500 if (size == 0)
1501 size = insn->registerSize;
1502
1503 switch (size) {
1504 case 1:
1505 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1506 | (insn->opcode & 7)));
1507 if (insn->rexPrefix &&
1508 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1509 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1510 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1511 + (insn->opcodeRegister - MODRM_REG_AL - 4));
1512 }
1513
1514 break;
1515 case 2:
1516 insn->opcodeRegister = (Reg)(MODRM_REG_AX
1517 + ((bFromREX(insn->rexPrefix) << 3)
1518 | (insn->opcode & 7)));
1519 break;
1520 case 4:
1521 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1522 + ((bFromREX(insn->rexPrefix) << 3)
1523 | (insn->opcode & 7)));
1524 break;
1525 case 8:
1526 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1527 + ((bFromREX(insn->rexPrefix) << 3)
1528 | (insn->opcode & 7)));
1529 break;
1530 }
1531
1532 return 0;
1533 }
1534
1535 /*
1536 * readImmediate - Consumes an immediate operand from an instruction, given the
1537 * desired operand size.
1538 *
1539 * @param insn - The instruction whose operand is to be read.
1540 * @param size - The width (in bytes) of the operand.
1541 * @return - 0 if the immediate was successfully consumed; nonzero
1542 * otherwise.
1543 */
1544 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1545 uint8_t imm8;
1546 uint16_t imm16;
1547 uint32_t imm32;
1548 uint64_t imm64;
1549
1550 dbgprintf(insn, "readImmediate()");
1551
1552 if (insn->numImmediatesConsumed == 2) {
1553 debug("Already consumed two immediates");
1554 return -1;
1555 }
1556
1557 if (size == 0)
1558 size = insn->immediateSize;
1559 else
1560 insn->immediateSize = size;
1561 insn->immediateOffset = insn->readerCursor - insn->startLocation;
1562
1563 switch (size) {
1564 case 1:
1565 if (consumeByte(insn, &imm8))
1566 return -1;
1567 insn->immediates[insn->numImmediatesConsumed] = imm8;
1568 break;
1569 case 2:
1570 if (consumeUInt16(insn, &imm16))
1571 return -1;
1572 insn->immediates[insn->numImmediatesConsumed] = imm16;
1573 break;
1574 case 4:
1575 if (consumeUInt32(insn, &imm32))
1576 return -1;
1577 insn->immediates[insn->numImmediatesConsumed] = imm32;
1578 break;
1579 case 8:
1580 if (consumeUInt64(insn, &imm64))
1581 return -1;
1582 insn->immediates[insn->numImmediatesConsumed] = imm64;
1583 break;
1584 }
1585
1586 insn->numImmediatesConsumed++;
1587
1588 return 0;
1589 }
1590
1591 /*
1592 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1593 *
1594 * @param insn - The instruction whose operand is to be read.
1595 * @return - 0 if the vvvv was successfully consumed; nonzero
1596 * otherwise.
1597 */
1598 static int readVVVV(struct InternalInstruction* insn) {
1599 dbgprintf(insn, "readVVVV()");
1600
1601 if (insn->vectorExtensionType == TYPE_EVEX)
1602 insn->vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]);
1603 else if (insn->vectorExtensionType == TYPE_VEX_3B)
1604 insn->vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1605 else if (insn->vectorExtensionType == TYPE_VEX_2B)
1606 insn->vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1607 else if (insn->vectorExtensionType == TYPE_XOP)
1608 insn->vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1609 else
1610 return -1;
1611
1612 if (insn->mode != MODE_64BIT)
1613 insn->vvvv &= 0x7;
1614
1615 return 0;
1616 }
1617
1618 /*
1619 * readMaskRegister - Reads an mask register from the opcode field of an
1620 * instruction.
1621 *
1622 * @param insn - The instruction whose opcode field is to be read.
1623 * @return - 0 on success; nonzero otherwise.
1624 */
1625 static int readMaskRegister(struct InternalInstruction* insn) {
1626 dbgprintf(insn, "readMaskRegister()");
1627
1628 if (insn->vectorExtensionType != TYPE_EVEX)
1629 return -1;
1630
1631 insn->writemask = aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]);
1632 return 0;
1633 }
1634
1635 /*
1636 * readOperands - Consults the specifier for an instruction and consumes all
1637 * operands for that instruction, interpreting them as it goes.
1638 *
1639 * @param insn - The instruction whose operands are to be read and interpreted.
1640 * @return - 0 if all operands could be read; nonzero otherwise.
1641 */
1642 static int readOperands(struct InternalInstruction* insn) {
1643 int index;
1644 int hasVVVV, needVVVV;
1645 int sawRegImm = 0;
1646
1647 dbgprintf(insn, "readOperands()");
1648
1649 /* If non-zero vvvv specified, need to make sure one of the operands
1650 uses it. */
1651 hasVVVV = !readVVVV(insn);
1652 needVVVV = hasVVVV && (insn->vvvv != 0);
1653
1654 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1655 switch (x86OperandSets[insn->spec->operands][index].encoding) {
1656 case ENCODING_NONE:
1657 case ENCODING_SI:
1658 case ENCODING_DI:
1659 break;
1660 case ENCODING_REG:
1661 case ENCODING_RM:
1662 if (readModRM(insn))
1663 return -1;
1664 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
1665 return -1;
1666 break;
1667 case ENCODING_CB:
1668 case ENCODING_CW:
1669 case ENCODING_CD:
1670 case ENCODING_CP:
1671 case ENCODING_CO:
1672 case ENCODING_CT:
1673 dbgprintf(insn, "We currently don't hande code-offset encodings");
1674 return -1;
1675 case ENCODING_IB:
1676 if (sawRegImm) {
1677 /* Saw a register immediate so don't read again and instead split the
1678 previous immediate. FIXME: This is a hack. */
1679 insn->immediates[insn->numImmediatesConsumed] =
1680 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1681 ++insn->numImmediatesConsumed;
1682 break;
1683 }
1684 if (readImmediate(insn, 1))
1685 return -1;
1686 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
1687 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1688 return -1;
1689 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
1690 insn->immediates[insn->numImmediatesConsumed - 1] > 31)
1691 return -1;
1692 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
1693 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
1694 sawRegImm = 1;
1695 break;
1696 case ENCODING_IW:
1697 if (readImmediate(insn, 2))
1698 return -1;
1699 break;
1700 case ENCODING_ID:
1701 if (readImmediate(insn, 4))
1702 return -1;
1703 break;
1704 case ENCODING_IO:
1705 if (readImmediate(insn, 8))
1706 return -1;
1707 break;
1708 case ENCODING_Iv:
1709 if (readImmediate(insn, insn->immediateSize))
1710 return -1;
1711 break;
1712 case ENCODING_Ia:
1713 if (readImmediate(insn, insn->addressSize))
1714 return -1;
1715 break;
1716 case ENCODING_RB:
1717 if (readOpcodeRegister(insn, 1))
1718 return -1;
1719 break;
1720 case ENCODING_RW:
1721 if (readOpcodeRegister(insn, 2))
1722 return -1;
1723 break;
1724 case ENCODING_RD:
1725 if (readOpcodeRegister(insn, 4))
1726 return -1;
1727 break;
1728 case ENCODING_RO:
1729 if (readOpcodeRegister(insn, 8))
1730 return -1;
1731 break;
1732 case ENCODING_Rv:
1733 if (readOpcodeRegister(insn, 0))
1734 return -1;
1735 break;
1736 case ENCODING_FP:
1737 break;
1738 case ENCODING_VVVV:
1739 needVVVV = 0; /* Mark that we have found a VVVV operand. */
1740 if (!hasVVVV)
1741 return -1;
1742 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
1743 return -1;
1744 break;
1745 case ENCODING_WRITEMASK:
1746 if (readMaskRegister(insn))
1747 return -1;
1748 break;
1749 case ENCODING_DUP:
1750 break;
1751 default:
1752 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1753 return -1;
1754 }
1755 }
1756
1757 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1758 if (needVVVV) return -1;
1759
1760 return 0;
1761 }
1762
1763 /*
1764 * decodeInstruction - Reads and interprets a full instruction provided by the
1765 * user.
1766 *
1767 * @param insn - A pointer to the instruction to be populated. Must be
1768 * pre-allocated.
1769 * @param reader - The function to be used to read the instruction's bytes.
1770 * @param readerArg - A generic argument to be passed to the reader to store
1771 * any internal state.
1772 * @param logger - If non-NULL, the function to be used to write log messages
1773 * and warnings.
1774 * @param loggerArg - A generic argument to be passed to the logger to store
1775 * any internal state.
1776 * @param startLoc - The address (in the reader's address space) of the first
1777 * byte in the instruction.
1778 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1779 * decode the instruction in.
1780 * @return - 0 if the instruction's memory could be read; nonzero if
1781 * not.
1782 */
1783 int decodeInstruction(struct InternalInstruction* insn,
1784 byteReader_t reader,
1785 const void* readerArg,
1786 dlog_t logger,
1787 void* loggerArg,
1788 const void* miiArg,
1789 uint64_t startLoc,
1790 DisassemblerMode mode) {
1791 memset(insn, 0, sizeof(struct InternalInstruction));
1792
1793 insn->reader = reader;
1794 insn->readerArg = readerArg;
1795 insn->dlog = logger;
1796 insn->dlogArg = loggerArg;
1797 insn->startLocation = startLoc;
1798 insn->readerCursor = startLoc;
1799 insn->mode = mode;
1800 insn->numImmediatesConsumed = 0;
1801
1802 if (readPrefixes(insn) ||
1803 readOpcode(insn) ||
1804 getID(insn, miiArg) ||
1805 insn->instructionID == 0 ||
1806 readOperands(insn))
1807 return -1;
1808
1809 insn->operands = &x86OperandSets[insn->spec->operands][0];
1810
1811 insn->length = insn->readerCursor - insn->startLocation;
1812
1813 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1814 startLoc, insn->readerCursor, insn->length);
1815
1816 if (insn->length > 15)
1817 dbgprintf(insn, "Instruction exceeds 15-byte limit");
1818
1819 return 0;
1820 }
0 //===-- X86DisassemblerDecoder.c - Disassembler decoder -------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is part of the X86 Disassembler.
10 // It contains the implementation of the instruction decoder.
11 // Documentation for the disassembler can be found in X86Disassembler.h.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include /* for va_*() */
16 #include /* for vsnprintf() */
17 #include /* for exit() */
18 #include /* for memset() */
19
20 #include "X86DisassemblerDecoder.h"
21
22 using namespace llvm::X86Disassembler;
23
24 #include "X86GenDisassemblerTables.inc"
25
26 #define TRUE 1
27 #define FALSE 0
28
29 #ifndef NDEBUG
30 #define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
31 #else
32 #define debug(s) do { } while (0)
33 #endif
34
35
36 /*
37 * contextForAttrs - Client for the instruction context table. Takes a set of
38 * attributes and returns the appropriate decode context.
39 *
40 * @param attrMask - Attributes, from the enumeration attributeBits.
41 * @return - The InstructionContext to use when looking up an
42 * an instruction with these attributes.
43 */
44 static InstructionContext contextForAttrs(uint16_t attrMask) {
45 return static_cast(CONTEXTS_SYM[attrMask]);
46 }
47
48 /*
49 * modRMRequired - Reads the appropriate instruction table to determine whether
50 * the ModR/M byte is required to decode a particular instruction.
51 *
52 * @param type - The opcode type (i.e., how many bytes it has).
53 * @param insnContext - The context for the instruction, as returned by
54 * contextForAttrs.
55 * @param opcode - The last byte of the instruction's opcode, not counting
56 * ModR/M extensions and escapes.
57 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
58 */
59 static int modRMRequired(OpcodeType type,
60 InstructionContext insnContext,
61 uint16_t opcode) {
62 const struct ContextDecision* decision = 0;
63
64 switch (type) {
65 case ONEBYTE:
66 decision = &ONEBYTE_SYM;
67 break;
68 case TWOBYTE:
69 decision = &TWOBYTE_SYM;
70 break;
71 case THREEBYTE_38:
72 decision = &THREEBYTE38_SYM;
73 break;
74 case THREEBYTE_3A:
75 decision = &THREEBYTE3A_SYM;
76 break;
77 case XOP8_MAP:
78 decision = &XOP8_MAP_SYM;
79 break;
80 case XOP9_MAP:
81 decision = &XOP9_MAP_SYM;
82 break;
83 case XOPA_MAP:
84 decision = &XOPA_MAP_SYM;
85 break;
86 }
87
88 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
89 modrm_type != MODRM_ONEENTRY;
90 }
91
92 /*
93 * decode - Reads the appropriate instruction table to obtain the unique ID of
94 * an instruction.
95 *
96 * @param type - See modRMRequired().
97 * @param insnContext - See modRMRequired().
98 * @param opcode - See modRMRequired().
99 * @param modRM - The ModR/M byte if required, or any value if not.
100 * @return - The UID of the instruction, or 0 on failure.
101 */
102 static InstrUID decode(OpcodeType type,
103 InstructionContext insnContext,
104 uint8_t opcode,
105 uint8_t modRM) {
106 const struct ModRMDecision* dec = 0;
107
108 switch (type) {
109 case ONEBYTE:
110 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
111 break;
112 case TWOBYTE:
113 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
114 break;
115 case THREEBYTE_38:
116 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
117 break;
118 case THREEBYTE_3A:
119 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
120 break;
121 case XOP8_MAP:
122 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
123 break;
124 case XOP9_MAP:
125 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
126 break;
127 case XOPA_MAP:
128 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
129 break;
130 }
131
132 switch (dec->modrm_type) {
133 default:
134 debug("Corrupt table! Unknown modrm_type");
135 return 0;
136 case MODRM_ONEENTRY:
137 return modRMTable[dec->instructionIDs];
138 case MODRM_SPLITRM:
139 if (modFromModRM(modRM) == 0x3)
140 return modRMTable[dec->instructionIDs+1];
141 return modRMTable[dec->instructionIDs];
142 case MODRM_SPLITREG:
143 if (modFromModRM(modRM) == 0x3)
144 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
145 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
146 case MODRM_SPLITMISC:
147 if (modFromModRM(modRM) == 0x3)
148 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
149 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
150 case MODRM_FULL:
151 return modRMTable[dec->instructionIDs+modRM];
152 }
153 }
154
155 /*
156 * specifierForUID - Given a UID, returns the name and operand specification for
157 * that instruction.
158 *
159 * @param uid - The unique ID for the instruction. This should be returned by
160 * decode(); specifierForUID will not check bounds.
161 * @return - A pointer to the specification for that instruction.
162 */
163 static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
164 return &INSTRUCTIONS_SYM[uid];
165 }
166
167 /*
168 * consumeByte - Uses the reader function provided by the user to consume one
169 * byte from the instruction's memory and advance the cursor.
170 *
171 * @param insn - The instruction with the reader function to use. The cursor
172 * for this instruction is advanced.
173 * @param byte - A pointer to a pre-allocated memory buffer to be populated
174 * with the data read.
175 * @return - 0 if the read was successful; nonzero otherwise.
176 */
177 static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
178 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
179
180 if (!ret)
181 ++(insn->readerCursor);
182
183 return ret;
184 }
185
186 /*
187 * lookAtByte - Like consumeByte, but does not advance the cursor.
188 *
189 * @param insn - See consumeByte().
190 * @param byte - See consumeByte().
191 * @return - See consumeByte().
192 */
193 static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
194 return insn->reader(insn->readerArg, byte, insn->readerCursor);
195 }
196
197 static void unconsumeByte(struct InternalInstruction* insn) {
198 insn->readerCursor--;
199 }
200
201 #define CONSUME_FUNC(name, type) \
202 static int name(struct InternalInstruction* insn, type* ptr) { \
203 type combined = 0; \
204 unsigned offset; \
205 for (offset = 0; offset < sizeof(type); ++offset) { \
206 uint8_t byte; \
207 int ret = insn->reader(insn->readerArg, \
208 &byte, \
209 insn->readerCursor + offset); \
210 if (ret) \
211 return ret; \
212 combined = combined | ((uint64_t)byte << (offset * 8)); \
213 } \
214 *ptr = combined; \
215 insn->readerCursor += sizeof(type); \
216 return 0; \
217 }
218
219 /*
220 * consume* - Use the reader function provided by the user to consume data
221 * values of various sizes from the instruction's memory and advance the
222 * cursor appropriately. These readers perform endian conversion.
223 *
224 * @param insn - See consumeByte().
225 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
226 * be populated with the data read.
227 * @return - See consumeByte().
228 */
229 CONSUME_FUNC(consumeInt8, int8_t)
230 CONSUME_FUNC(consumeInt16, int16_t)
231 CONSUME_FUNC(consumeInt32, int32_t)
232 CONSUME_FUNC(consumeUInt16, uint16_t)
233 CONSUME_FUNC(consumeUInt32, uint32_t)
234 CONSUME_FUNC(consumeUInt64, uint64_t)
235
236 /*
237 * dbgprintf - Uses the logging function provided by the user to log a single
238 * message, typically without a carriage-return.
239 *
240 * @param insn - The instruction containing the logging function.
241 * @param format - See printf().
242 * @param ... - See printf().
243 */
244 static void dbgprintf(struct InternalInstruction* insn,
245 const char* format,
246 ...) {
247 char buffer[256];
248 va_list ap;
249
250 if (!insn->dlog)
251 return;
252
253 va_start(ap, format);
254 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
255 va_end(ap);
256
257 insn->dlog(insn->dlogArg, buffer);
258
259 return;
260 }
261
262 /*
263 * setPrefixPresent - Marks that a particular prefix is present at a particular
264 * location.
265 *
266 * @param insn - The instruction to be marked as having the prefix.
267 * @param prefix - The prefix that is present.
268 * @param location - The location where the prefix is located (in the address
269 * space of the instruction's reader).
270 */
271 static void setPrefixPresent(struct InternalInstruction* insn,
272 uint8_t prefix,
273 uint64_t location)
274 {
275 insn->prefixPresent[prefix] = 1;
276 insn->prefixLocations[prefix] = location;
277 }
278
279 /*
280 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
281 * present at a given location.
282 *
283 * @param insn - The instruction to be queried.
284 * @param prefix - The prefix.
285 * @param location - The location to query.
286 * @return - Whether the prefix is at that location.
287 */
288 static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
289 uint8_t prefix,
290 uint64_t location)
291 {
292 if (insn->prefixPresent[prefix] == 1 &&
293 insn->prefixLocations[prefix] == location)
294 return TRUE;
295 else
296 return FALSE;
297 }
298
299 /*
300 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
301 * instruction as having them. Also sets the instruction's default operand,
302 * address, and other relevant data sizes to report operands correctly.
303 *
304 * @param insn - The instruction whose prefixes are to be read.
305 * @return - 0 if the instruction could be read until the end of the prefix
306 * bytes, and no prefixes conflicted; nonzero otherwise.
307 */
308 static int readPrefixes(struct InternalInstruction* insn) {
309 BOOL isPrefix = TRUE;
310 BOOL prefixGroups[4] = { FALSE };
311 uint64_t prefixLocation;
312 uint8_t byte = 0;
313 uint8_t nextByte;
314
315 BOOL hasAdSize = FALSE;
316 BOOL hasOpSize = FALSE;
317
318 dbgprintf(insn, "readPrefixes()");
319
320 while (isPrefix) {
321 prefixLocation = insn->readerCursor;
322
323 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
324 if (consumeByte(insn, &byte))
325 break;
326
327 /*
328 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
329 * break and let it be disassembled as a normal "instruction".
330 */
331 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
332 break;
333
334 if (insn->readerCursor - 1 == insn->startLocation
335 && (byte == 0xf2 || byte == 0xf3)
336 && !lookAtByte(insn, &nextByte))
337 {
338 /*
339 * If the byte is 0xf2 or 0xf3, and any of the following conditions are
340 * met:
341 * - it is followed by a LOCK (0xf0) prefix
342 * - it is followed by an xchg instruction
343 * then it should be disassembled as a xacquire/xrelease not repne/rep.
344 */
345 if ((byte == 0xf2 || byte == 0xf3) &&
346 ((nextByte == 0xf0) |
347 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
348 insn->xAcquireRelease = TRUE;
349 /*
350 * Also if the byte is 0xf3, and the following condition is met:
351 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
352 * "mov mem, imm" (opcode 0xc6/0xc7) instructions.
353 * then it should be disassembled as an xrelease not rep.
354 */
355 if (byte == 0xf3 &&
356 (nextByte == 0x88 || nextByte == 0x89 ||
357 nextByte == 0xc6 || nextByte == 0xc7))
358 insn->xAcquireRelease = TRUE;
359 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
360 if (consumeByte(insn, &nextByte))
361 return -1;
362 if (lookAtByte(insn, &nextByte))
363 return -1;
364 unconsumeByte(insn);
365 }
366 if (nextByte != 0x0f && nextByte != 0x90)
367 break;
368 }
369
370 switch (byte) {
371 case 0xf0: /* LOCK */
372 case 0xf2: /* REPNE/REPNZ */
373 case 0xf3: /* REP or REPE/REPZ */
374 if (prefixGroups[0])
375 dbgprintf(insn, "Redundant Group 1 prefix");
376 prefixGroups[0] = TRUE;
377 setPrefixPresent(insn, byte, prefixLocation);
378 break;
379 case 0x2e: /* CS segment override -OR- Branch not taken */
380 case 0x36: /* SS segment override -OR- Branch taken */
381 case 0x3e: /* DS segment override */
382 case 0x26: /* ES segment override */
383 case 0x64: /* FS segment override */
384 case 0x65: /* GS segment override */
385 switch (byte) {
386 case 0x2e:
387 insn->segmentOverride = SEG_OVERRIDE_CS;
388 break;
389 case 0x36:
390 insn->segmentOverride = SEG_OVERRIDE_SS;
391 break;
392 case 0x3e:
393 insn->segmentOverride = SEG_OVERRIDE_DS;
394 break;
395 case 0x26:
396 insn->segmentOverride = SEG_OVERRIDE_ES;
397 break;
398 case 0x64:
399 insn->segmentOverride = SEG_OVERRIDE_FS;
400 break;
401 case 0x65:
402 insn->segmentOverride = SEG_OVERRIDE_GS;
403 break;
404 default:
405 debug("Unhandled override");
406 return -1;
407 }
408 if (prefixGroups[1])
409 dbgprintf(insn, "Redundant Group 2 prefix");
410 prefixGroups[1] = TRUE;
411 setPrefixPresent(insn, byte, prefixLocation);
412 break;
413 case 0x66: /* Operand-size override */
414 if (prefixGroups[2])
415 dbgprintf(insn, "Redundant Group 3 prefix");
416 prefixGroups[2] = TRUE;
417 hasOpSize = TRUE;
418 setPrefixPresent(insn, byte, prefixLocation);
419 break;
420 case 0x67: /* Address-size override */
421 if (prefixGroups[3])
422 dbgprintf(insn, "Redundant Group 4 prefix");
423 prefixGroups[3] = TRUE;
424 hasAdSize = TRUE;
425 setPrefixPresent(insn, byte, prefixLocation);
426 break;
427 default: /* Not a prefix byte */
428 isPrefix = FALSE;
429 break;
430 }
431
432 if (isPrefix)
433 dbgprintf(insn, "Found prefix 0x%hhx", byte);
434 }
435
436 insn->vectorExtensionType = TYPE_NO_VEX_XOP;
437
438 if (byte == 0x62) {
439 uint8_t byte1, byte2;
440
441 if (consumeByte(insn, &byte1)) {
442 dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
443 return -1;
444 }
445
446 if (lookAtByte(insn, &byte2)) {
447 dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
448 return -1;
449 }
450
451 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
452 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
453 insn->vectorExtensionType = TYPE_EVEX;
454 }
455 else {
456 unconsumeByte(insn); /* unconsume byte1 */
457 unconsumeByte(insn); /* unconsume byte */
458 insn->necessaryPrefixLocation = insn->readerCursor - 2;
459 }
460
461 if (insn->vectorExtensionType == TYPE_EVEX) {
462 insn->vectorExtensionPrefix[0] = byte;
463 insn->vectorExtensionPrefix[1] = byte1;
464 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
465 dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
466 return -1;
467 }
468 if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
469 dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
470 return -1;
471 }
472
473 /* We simulate the REX prefix for simplicity's sake */
474 if (insn->mode == MODE_64BIT) {
475 insn->rexPrefix = 0x40
476 | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
477 | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
478 | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
479 | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
480 }
481
482 dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
483 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
484 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
485 }
486 }
487 else if (byte == 0xc4) {
488 uint8_t byte1;
489
490 if (lookAtByte(insn, &byte1)) {
491 dbgprintf(insn, "Couldn't read second byte of VEX");
492 return -1;
493 }
494
495 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
496 insn->vectorExtensionType = TYPE_VEX_3B;
497 insn->necessaryPrefixLocation = insn->readerCursor - 1;
498 }
499 else {
500 unconsumeByte(insn);
501 insn->necessaryPrefixLocation = insn->readerCursor - 1;
502 }
503
504 if (insn->vectorExtensionType == TYPE_VEX_3B) {
505 insn->vectorExtensionPrefix[0] = byte;
506 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
507 consumeByte(insn, &insn->vectorExtensionPrefix[2]);
508
509 /* We simulate the REX prefix for simplicity's sake */
510
511 if (insn->mode == MODE_64BIT) {
512 insn->rexPrefix = 0x40
513 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
514 | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
515 | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
516 | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
517 }
518
519 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
520 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
521 insn->vectorExtensionPrefix[2]);
522 }
523 }
524 else if (byte == 0xc5) {
525 uint8_t byte1;
526
527 if (lookAtByte(insn, &byte1)) {
528 dbgprintf(insn, "Couldn't read second byte of VEX");
529 return -1;
530 }
531
532 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
533 insn->vectorExtensionType = TYPE_VEX_2B;
534 }
535 else {
536 unconsumeByte(insn);
537 }
538
539 if (insn->vectorExtensionType == TYPE_VEX_2B) {
540 insn->vectorExtensionPrefix[0] = byte;
541 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
542
543 if (insn->mode == MODE_64BIT) {
544 insn->rexPrefix = 0x40
545 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
546 }
547
548 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1]))
549 {
550 default:
551 break;
552 case VEX_PREFIX_66:
553 hasOpSize = TRUE;
554 break;
555 }
556
557 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
558 insn->vectorExtensionPrefix[0],
559 insn->vectorExtensionPrefix[1]);
560 }
561 }
562 else if (byte == 0x8f) {
563 uint8_t byte1;
564
565 if (lookAtByte(insn, &byte1)) {
566 dbgprintf(insn, "Couldn't read second byte of XOP");
567 return -1;
568 }
569
570 if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
571 insn->vectorExtensionType = TYPE_XOP;
572 insn->necessaryPrefixLocation = insn->readerCursor - 1;
573 }
574 else {
575 unconsumeByte(insn);
576 insn->necessaryPrefixLocation = insn->readerCursor - 1;
577 }
578
579 if (insn->vectorExtensionType == TYPE_XOP) {
580 insn->vectorExtensionPrefix[0] = byte;
581 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
582 consumeByte(insn, &insn->vectorExtensionPrefix[2]);
583
584 /* We simulate the REX prefix for simplicity's sake */
585
586 if (insn->mode == MODE_64BIT) {
587 insn->rexPrefix = 0x40
588 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
589 | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
590 | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
591 | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
592 }
593
594 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2]))
595 {
596 default:
597 break;
598 case VEX_PREFIX_66:
599 hasOpSize = TRUE;
600 break;
601 }
602
603 dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
604 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
605 insn->vectorExtensionPrefix[2]);
606 }
607 }
608 else {
609 if (insn->mode == MODE_64BIT) {
610 if ((byte & 0xf0) == 0x40) {
611 uint8_t opcodeByte;
612
613 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
614 dbgprintf(insn, "Redundant REX prefix");
615 return -1;
616 }
617
618 insn->rexPrefix = byte;
619 insn->necessaryPrefixLocation = insn->readerCursor - 2;
620
621 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
622 } else {
623 unconsumeByte(insn);
624 insn->necessaryPrefixLocation = insn->readerCursor - 1;
625 }
626 } else {
627 unconsumeByte(insn);
628 insn->necessaryPrefixLocation = insn->readerCursor - 1;
629 }
630 }
631
632 if (insn->mode == MODE_16BIT) {
633 insn->registerSize = (hasOpSize ? 4 : 2);
634 insn->addressSize = (hasAdSize ? 4 : 2);
635 insn->displacementSize = (hasAdSize ? 4 : 2);
636 insn->immediateSize = (hasOpSize ? 4 : 2);
637 } else if (insn->mode == MODE_32BIT) {
638 insn->registerSize = (hasOpSize ? 2 : 4);
639 insn->addressSize = (hasAdSize ? 2 : 4);
640 insn->displacementSize = (hasAdSize ? 2 : 4);
641 insn->immediateSize = (hasOpSize ? 2 : 4);
642 } else if (insn->mode == MODE_64BIT) {
643 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
644 insn->registerSize = 8;
645 insn->addressSize = (hasAdSize ? 4 : 8);
646 insn->displacementSize = 4;
647 insn->immediateSize = 4;
648 } else if (insn->rexPrefix) {
649 insn->registerSize = (hasOpSize ? 2 : 4);
650 insn->addressSize = (hasAdSize ? 4 : 8);
651 insn->displacementSize = (hasOpSize ? 2 : 4);
652 insn->immediateSize = (hasOpSize ? 2 : 4);
653 } else {
654 insn->registerSize = (hasOpSize ? 2 : 4);
655 insn->addressSize = (hasAdSize ? 4 : 8);
656 insn->displacementSize = (hasOpSize ? 2 : 4);
657 insn->immediateSize = (hasOpSize ? 2 : 4);
658 }
659 }
660
661 return 0;
662 }
663
664 /*
665 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
666 * extended or escape opcodes).
667 *
668 * @param insn - The instruction whose opcode is to be read.
669 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
670 */
671 static int readOpcode(struct InternalInstruction* insn) {
672 /* Determine the length of the primary opcode */
673
674 uint8_t current;
675
676 dbgprintf(insn, "readOpcode()");
677
678 insn->opcodeType = ONEBYTE;
679
680 if (insn->vectorExtensionType == TYPE_EVEX)
681 {
682 switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
683 default:
684 dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
685 mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
686 return -1;
687 case VEX_LOB_0F:
688 insn->opcodeType = TWOBYTE;
689 return consumeByte(insn, &insn->opcode);
690 case VEX_LOB_0F38:
691 insn->opcodeType = THREEBYTE_38;
692 return consumeByte(insn, &insn->opcode);
693 case VEX_LOB_0F3A:
694 insn->opcodeType = THREEBYTE_3A;
695 return consumeByte(insn, &insn->opcode);
696 }
697 }
698 else if (insn->vectorExtensionType == TYPE_VEX_3B) {
699 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
700 default:
701 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
702 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
703 return -1;
704 case VEX_LOB_0F:
705 insn->opcodeType = TWOBYTE;
706 return consumeByte(insn, &insn->opcode);
707 case VEX_LOB_0F38:
708 insn->opcodeType = THREEBYTE_38;
709 return consumeByte(insn, &insn->opcode);
710 case VEX_LOB_0F3A:
711 insn->opcodeType = THREEBYTE_3A;
712 return consumeByte(insn, &insn->opcode);
713 }
714 }
715 else if (insn->vectorExtensionType == TYPE_VEX_2B) {
716 insn->opcodeType = TWOBYTE;
717 return consumeByte(insn, &insn->opcode);
718 }
719 else if (insn->vectorExtensionType == TYPE_XOP) {
720 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
721 default:
722 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
723 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
724 return -1;
725 case XOP_MAP_SELECT_8:
726 insn->opcodeType = XOP8_MAP;
727 return consumeByte(insn, &insn->opcode);
728 case XOP_MAP_SELECT_9:
729 insn->opcodeType = XOP9_MAP;
730 return consumeByte(insn, &insn->opcode);
731 case XOP_MAP_SELECT_A:
732 insn->opcodeType = XOPA_MAP;
733 return consumeByte(insn, &insn->opcode);
734 }
735 }
736
737 if (consumeByte(insn, ¤t))
738 return -1;
739
740 if (current == 0x0f) {
741 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
742
743 if (consumeByte(insn, ¤t))
744 return -1;
745
746 if (current == 0x38) {
747 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
748
749 if (consumeByte(insn, ¤t))
750 return -1;
751
752 insn->opcodeType = THREEBYTE_38;
753 } else if (current == 0x3a) {
754 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
755
756 if (consumeByte(insn, ¤t))
757 return -1;
758
759 insn->opcodeType = THREEBYTE_3A;
760 } else {
761 dbgprintf(insn, "Didn't find a three-byte escape prefix");
762
763 insn->opcodeType = TWOBYTE;
764 }
765 }
766
767 /*
768 * At this point we have consumed the full opcode.
769 * Anything we consume from here on must be unconsumed.
770 */
771
772 insn->opcode = current;
773
774 return 0;
775 }
776
777 static int readModRM(struct InternalInstruction* insn);
778
779 /*
780 * getIDWithAttrMask - Determines the ID of an instruction, consuming
781 * the ModR/M byte as appropriate for extended and escape opcodes,
782 * and using a supplied attribute mask.
783 *
784 * @param instructionID - A pointer whose target is filled in with the ID of the
785 * instruction.
786 * @param insn - The instruction whose ID is to be determined.
787 * @param attrMask - The attribute mask to search.
788 * @return - 0 if the ModR/M could be read when needed or was not
789 * needed; nonzero otherwise.
790 */
791 static int getIDWithAttrMask(uint16_t* instructionID,
792 struct InternalInstruction* insn,
793 uint16_t attrMask) {
794 BOOL hasModRMExtension;
795
796 InstructionContext instructionClass = contextForAttrs(attrMask);
797
798 hasModRMExtension = modRMRequired(insn->opcodeType,
799 instructionClass,
800 insn->opcode);
801
802 if (hasModRMExtension) {
803 if (readModRM(insn))
804 return -1;
805
806 *instructionID = decode(insn->opcodeType,
807 instructionClass,
808 insn->opcode,
809 insn->modRM);
810 } else {
811 *instructionID = decode(insn->opcodeType,
812 instructionClass,
813 insn->opcode,
814 0);
815 }
816
817 return 0;
818 }
819
820 /*
821 * is16BitEquivalent - Determines whether two instruction names refer to
822 * equivalent instructions but one is 16-bit whereas the other is not.
823 *
824 * @param orig - The instruction that is not 16-bit
825 * @param equiv - The instruction that is 16-bit
826 */
827 static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
828 off_t i;
829
830 for (i = 0;; i++) {
831 if (orig[i] == '\0' && equiv[i] == '\0')
832 return TRUE;
833 if (orig[i] == '\0' || equiv[i] == '\0')
834 return FALSE;
835 if (orig[i] != equiv[i]) {
836 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
837 continue;
838 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
839 continue;
840 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
841 continue;
842 return FALSE;
843 }
844 }
845 }
846
847 /*
848 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
849 * appropriate for extended and escape opcodes. Determines the attributes and
850 * context for the instruction before doing so.
851 *
852 * @param insn - The instruction whose ID is to be determined.
853 * @return - 0 if the ModR/M could be read when needed or was not needed;
854 * nonzero otherwise.
855 */
856 static int getID(struct InternalInstruction* insn, const void *miiArg) {
857 uint16_t attrMask;
858 uint16_t instructionID;
859
860 dbgprintf(insn, "getID()");
861
862 attrMask = ATTR_NONE;
863
864 if (insn->mode == MODE_64BIT)
865 attrMask |= ATTR_64BIT;
866
867 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
868 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
869
870 if (insn->vectorExtensionType == TYPE_EVEX) {
871 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
872 case VEX_PREFIX_66:
873 attrMask |= ATTR_OPSIZE;
874 break;
875 case VEX_PREFIX_F3:
876 attrMask |= ATTR_XS;
877 break;
878 case VEX_PREFIX_F2:
879 attrMask |= ATTR_XD;
880 break;
881 }
882
883 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
884 attrMask |= ATTR_EVEXKZ;
885 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
886 attrMask |= ATTR_EVEXB;
887 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
888 attrMask |= ATTR_EVEXK;
889 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
890 attrMask |= ATTR_EVEXL;
891 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
892 attrMask |= ATTR_EVEXL2;
893 }
894 else if (insn->vectorExtensionType == TYPE_VEX_3B) {
895 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
896 case VEX_PREFIX_66:
897 attrMask |= ATTR_OPSIZE;
898 break;
899 case VEX_PREFIX_F3:
900 attrMask |= ATTR_XS;
901 break;
902 case VEX_PREFIX_F2:
903 attrMask |= ATTR_XD;
904 break;
905 }
906
907 if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
908 attrMask |= ATTR_VEXL;
909 }
910 else if (insn->vectorExtensionType == TYPE_VEX_2B) {
911 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
912 case VEX_PREFIX_66:
913 attrMask |= ATTR_OPSIZE;
914 break;
915 case VEX_PREFIX_F3:
916 attrMask |= ATTR_XS;
917 break;
918 case VEX_PREFIX_F2:
919 attrMask |= ATTR_XD;
920 break;
921 }
922
923 if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
924 attrMask |= ATTR_VEXL;
925 }
926 else if (insn->vectorExtensionType == TYPE_XOP) {
927 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
928 case VEX_PREFIX_66:
929 attrMask |= ATTR_OPSIZE;
930 break;
931 case VEX_PREFIX_F3:
932 attrMask |= ATTR_XS;
933 break;
934 case VEX_PREFIX_F2:
935 attrMask |= ATTR_XD;
936 break;
937 }
938
939 if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
940 attrMask |= ATTR_VEXL;
941 }
942 else {
943 return -1;
944 }
945 }
946 else {
947 if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
948 attrMask |= ATTR_OPSIZE;
949 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
950 attrMask |= ATTR_ADSIZE;
951 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
952 attrMask |= ATTR_XS;
953 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
954 attrMask |= ATTR_XD;
955 }
956
957 if (insn->rexPrefix & 0x08)
958 attrMask |= ATTR_REXW;
959
960 if (getIDWithAttrMask(&instructionID, insn, attrMask))
961 return -1;
962
963 /*
964 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
965 * of the AdSize prefix is inverted w.r.t. 32-bit mode.
966 */
967 if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) {
968 const struct InstructionSpecifier *spec;
969 spec = specifierForUID(instructionID);
970
971 /*
972 * Check for Ii8PCRel instructions. We could alternatively do a
973 * string-compare on the names, but this is probably cheaper.
974 */
975 if (x86OperandSets[spec->operands][0].type == TYPE_REL8) {
976 attrMask ^= ATTR_ADSIZE;
977 if (getIDWithAttrMask(&instructionID, insn, attrMask))
978 return -1;
979 }
980 }
981
982 /* The following clauses compensate for limitations of the tables. */
983
984 if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) &&
985 !(attrMask & ATTR_OPSIZE)) {
986 /*
987 * The instruction tables make no distinction between instructions that
988 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
989 * particular spot (i.e., many MMX operations). In general we're
990 * conservative, but in the specific case where OpSize is present but not
991 * in the right place we check if there's a 16-bit operation.
992 */
993
994 const struct InstructionSpecifier *spec;
995 uint16_t instructionIDWithOpsize;
996 const char *specName, *specWithOpSizeName;
997
998 spec = specifierForUID(instructionID);
999
1000 if (getIDWithAttrMask(&instructionIDWithOpsize,
1001 insn,
1002 attrMask | ATTR_OPSIZE)) {
1003 /*
1004 * ModRM required with OpSize but not present; give up and return version
1005 * without OpSize set
1006 */
1007
1008 insn->instructionID = instructionID;
1009 insn->spec = spec;
1010 return 0;
1011 }
1012
1013 specName = GetInstrName(instructionID, miiArg);
1014 specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
1015
1016 if (is16BitEquivalent(specName, specWithOpSizeName) &&
1017 (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) {
1018 insn->instructionID = instructionIDWithOpsize;
1019 insn->spec = specifierForUID(instructionIDWithOpsize);
1020 } else {
1021 insn->instructionID = instructionID;
1022 insn->spec = spec;
1023 }
1024 return 0;
1025 }
1026
1027 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1028 insn->rexPrefix & 0x01) {
1029 /*
1030 * NOOP shouldn't decode as NOOP if REX.b is set. Instead
1031 * it should decode as XCHG %r8, %eax.
1032 */
1033
1034 const struct InstructionSpecifier *spec;
1035 uint16_t instructionIDWithNewOpcode;
1036 const struct InstructionSpecifier *specWithNewOpcode;
1037
1038 spec = specifierForUID(instructionID);
1039
1040 /* Borrow opcode from one of the other XCHGar opcodes */
1041 insn->opcode = 0x91;
1042
1043 if (getIDWithAttrMask(&instructionIDWithNewOpcode,
1044 insn,
1045 attrMask)) {
1046 insn->opcode = 0x90;
1047
1048 insn->instructionID = instructionID;
1049 insn->spec = spec;
1050 return 0;
1051 }
1052
1053 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
1054
1055 /* Change back */
1056 insn->opcode = 0x90;
1057
1058 insn->instructionID = instructionIDWithNewOpcode;
1059 insn->spec = specWithNewOpcode;
1060
1061 return 0;
1062 }
1063
1064 insn->instructionID = instructionID;
1065 insn->spec = specifierForUID(insn->instructionID);
1066
1067 return 0;
1068 }
1069
1070 /*
1071 * readSIB - Consumes the SIB byte to determine addressing information for an
1072 * instruction.
1073 *
1074 * @param insn - The instruction whose SIB byte is to be read.
1075 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
1076 */
1077 static int readSIB(struct InternalInstruction* insn) {
1078 SIBIndex sibIndexBase = SIB_INDEX_NONE;
1079 SIBBase sibBaseBase = SIB_BASE_NONE;
1080 uint8_t index, base;
1081
1082 dbgprintf(insn, "readSIB()");
1083
1084 if (insn->consumedSIB)
1085 return 0;
1086
1087 insn->consumedSIB = TRUE;
1088
1089 switch (insn->addressSize) {
1090 case 2:
1091 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
1092 return -1;
1093 case 4:
1094 sibIndexBase = SIB_INDEX_EAX;
1095 sibBaseBase = SIB_BASE_EAX;
1096 break;
1097 case 8:
1098 sibIndexBase = SIB_INDEX_RAX;
1099 sibBaseBase = SIB_BASE_RAX;
1100 break;
1101 }
1102
1103 if (consumeByte(insn, &insn->sib))
1104 return -1;
1105
1106 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
1107 if (insn->vectorExtensionType == TYPE_EVEX)
1108 index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
1109
1110 switch (index) {
1111 case 0x4:
1112 insn->sibIndex = SIB_INDEX_NONE;
1113 break;
1114 default:
1115 insn->sibIndex = (SIBIndex)(sibIndexBase + index);
1116 if (insn->sibIndex == SIB_INDEX_sib ||
1117 insn->sibIndex == SIB_INDEX_sib64)
1118 insn->sibIndex = SIB_INDEX_NONE;
1119 break;
1120 }
1121
1122 switch (scaleFromSIB(insn->sib)) {
1123 case 0:
1124 insn->sibScale = 1;
1125 break;
1126 case 1:
1127 insn->sibScale = 2;
1128 break;
1129 case 2:
1130 insn->sibScale = 4;
1131 break;
1132 case 3:
1133 insn->sibScale = 8;
1134 break;
1135 }
1136
1137 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
1138
1139 switch (base) {
1140 case 0x5:
1141 case 0xd:
1142 switch (modFromModRM(insn->modRM)) {
1143 case 0x0:
1144 insn->eaDisplacement = EA_DISP_32;
1145 insn->sibBase = SIB_BASE_NONE;
1146 break;
1147 case 0x1:
1148 insn->eaDisplacement = EA_DISP_8;
1149 insn->sibBase = (SIBBase)(sibBaseBase + base);
1150 break;
1151 case 0x2:
1152 insn->eaDisplacement = EA_DISP_32;
1153 insn->sibBase = (SIBBase)(sibBaseBase + base);
1154 break;
1155 case 0x3:
1156 debug("Cannot have Mod = 0b11 and a SIB byte");
1157 return -1;
1158 }
1159 break;
1160 default:
1161 insn->sibBase = (SIBBase)(sibBaseBase + base);
1162 break;
1163 }
1164
1165 return 0;
1166 }
1167
1168 /*
1169 * readDisplacement - Consumes the displacement of an instruction.
1170 *
1171 * @param insn - The instruction whose displacement is to be read.
1172 * @return - 0 if the displacement byte was successfully read; nonzero
1173 * otherwise.
1174 */
1175 static int readDisplacement(struct InternalInstruction* insn) {
1176 int8_t d8;
1177 int16_t d16;
1178 int32_t d32;
1179
1180 dbgprintf(insn, "readDisplacement()");
1181
1182 if (insn->consumedDisplacement)
1183 return 0;
1184
1185 insn->consumedDisplacement = TRUE;
1186 insn->displacementOffset = insn->readerCursor - insn->startLocation;
1187
1188 switch (insn->eaDisplacement) {
1189 case EA_DISP_NONE:
1190 insn->consumedDisplacement = FALSE;
1191 break;
1192 case EA_DISP_8:
1193 if (consumeInt8(insn, &d8))
1194 return -1;
1195 insn->displacement = d8;
1196 break;
1197 case EA_DISP_16:
1198 if (consumeInt16(insn, &d16))
1199 return -1;
1200 insn->displacement = d16;
1201 break;
1202 case EA_DISP_32:
1203 if (consumeInt32(insn, &d32))
1204 return -1;
1205 insn->displacement = d32;
1206 break;
1207 }
1208
1209 insn->consumedDisplacement = TRUE;
1210 return 0;
1211 }
1212
1213 /*
1214 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1215 * displacement) for an instruction and interprets it.
1216 *
1217 * @param insn - The instruction whose addressing information is to be read.
1218 * @return - 0 if the information was successfully read; nonzero otherwise.
1219 */
1220 static int readModRM(struct InternalInstruction* insn) {
1221 uint8_t mod, rm, reg;
1222
1223 dbgprintf(insn, "readModRM()");
1224
1225 if (insn->consumedModRM)
1226 return 0;
1227
1228 if (consumeByte(insn, &insn->modRM))
1229 return -1;
1230 insn->consumedModRM = TRUE;
1231
1232 mod = modFromModRM(insn->modRM);
1233 rm = rmFromModRM(insn->modRM);
1234 reg = regFromModRM(insn->modRM);
1235
1236 /*
1237 * This goes by insn->registerSize to pick the correct register, which messes
1238 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1239 * fixupReg().
1240 */
1241 switch (insn->registerSize) {
1242 case 2:
1243 insn->regBase = MODRM_REG_AX;
1244 insn->eaRegBase = EA_REG_AX;
1245 break;
1246 case 4:
1247 insn->regBase = MODRM_REG_EAX;
1248 insn->eaRegBase = EA_REG_EAX;
1249 break;
1250 case 8:
1251 insn->regBase = MODRM_REG_RAX;
1252 insn->eaRegBase = EA_REG_RAX;
1253 break;
1254 }
1255
1256 reg |= rFromREX(insn->rexPrefix) << 3;
1257 rm |= bFromREX(insn->rexPrefix) << 3;
1258 if (insn->vectorExtensionType == TYPE_EVEX) {
1259 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1260 rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1261 }
1262
1263 insn->reg = (Reg)(insn->regBase + reg);
1264
1265 switch (insn->addressSize) {
1266 case 2:
1267 insn->eaBaseBase = EA_BASE_BX_SI;
1268
1269 switch (mod) {
1270 case 0x0:
1271 if (rm == 0x6) {
1272 insn->eaBase = EA_BASE_NONE;
1273 insn->eaDisplacement = EA_DISP_16;
1274 if (readDisplacement(insn))
1275 return -1;
1276 } else {
1277 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1278 insn->eaDisplacement = EA_DISP_NONE;
1279 }
1280 break;
1281 case 0x1:
1282 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1283 insn->eaDisplacement = EA_DISP_8;
1284 insn->displacementSize = 1;
1285 if (readDisplacement(insn))
1286 return -1;
1287 break;
1288 case 0x2:
1289 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1290 insn->eaDisplacement = EA_DISP_16;
1291 if (readDisplacement(insn))
1292 return -1;
1293 break;
1294 case 0x3:
1295 insn->eaBase = (EABase)(insn->eaRegBase + rm);
1296 if (readDisplacement(insn))
1297 return -1;
1298 break;
1299 }
1300 break;
1301 case 4:
1302 case 8:
1303 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1304
1305 switch (mod) {
1306 case 0x0:
1307 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1308 switch (rm) {
1309 case 0x14:
1310 case 0x4:
1311 case 0xc: /* in case REXW.b is set */
1312 insn->eaBase = (insn->addressSize == 4 ?
1313 EA_BASE_sib : EA_BASE_sib64);
1314 if (readSIB(insn) || readDisplacement(insn))
1315 return -1;
1316 break;
1317 case 0x5:
1318 insn->eaBase = EA_BASE_NONE;
1319 insn->eaDisplacement = EA_DISP_32;
1320 if (readDisplacement(insn))
1321 return -1;
1322 break;
1323 default:
1324 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1325 break;
1326 }
1327 break;
1328 case 0x1:
1329 insn->displacementSize = 1;
1330 /* FALLTHROUGH */
1331 case 0x2:
1332 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1333 switch (rm) {
1334 case 0x14:
1335 case 0x4:
1336 case 0xc: /* in case REXW.b is set */
1337 insn->eaBase = EA_BASE_sib;
1338 if (readSIB(insn) || readDisplacement(insn))
1339 return -1;
1340 break;
1341 default:
1342 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1343 if (readDisplacement(insn))
1344 return -1;
1345 break;
1346 }
1347 break;
1348 case 0x3:
1349 insn->eaDisplacement = EA_DISP_NONE;
1350 insn->eaBase = (EABase)(insn->eaRegBase + rm);
1351 break;
1352 }
1353 break;
1354 } /* switch (insn->addressSize) */
1355
1356 return 0;
1357 }
1358
1359 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
1360 static uint8_t name(struct InternalInstruction *insn, \
1361 OperandType type, \
1362 uint8_t index, \
1363 uint8_t *valid) { \
1364 *valid = 1; \
1365 switch (type) { \
1366 default: \
1367 debug("Unhandled register type"); \
1368 *valid = 0; \
1369 return 0; \
1370 case TYPE_Rv: \
1371 return base + index; \
1372 case TYPE_R8: \
1373 if (insn->rexPrefix && \
1374 index >= 4 && index <= 7) { \
1375 return prefix##_SPL + (index - 4); \
1376 } else { \
1377 return prefix##_AL + index; \
1378 } \
1379 case TYPE_R16: \
1380 return prefix##_AX + index; \
1381 case TYPE_R32: \
1382 return prefix##_EAX + index; \
1383 case TYPE_R64: \
1384 return prefix##_RAX + index; \
1385 case TYPE_XMM512: \
1386 return prefix##_ZMM0 + index; \
1387 case TYPE_XMM256: \
1388 return prefix##_YMM0 + index; \
1389 case TYPE_XMM128: \
1390 case TYPE_XMM64: \
1391 case TYPE_XMM32: \
1392 case TYPE_XMM: \
1393 return prefix##_XMM0 + index; \
1394 case TYPE_VK1: \
1395 case TYPE_VK8: \
1396 case TYPE_VK16: \
1397 return prefix##_K0 + index; \
1398 case TYPE_MM64: \
1399 case TYPE_MM32: \
1400 case TYPE_MM: \
1401 if (index > 7) \
1402 *valid = 0; \
1403 return prefix##_MM0 + index; \
1404 case TYPE_SEGMENTREG: \
1405 if (index > 5) \
1406 *valid = 0; \
1407 return prefix##_ES + index; \
1408 case TYPE_DEBUGREG: \
1409 if (index > 7) \
1410 *valid = 0; \
1411 return prefix##_DR0 + index; \
1412 case TYPE_CONTROLREG: \
1413 if (index > 8) \
1414 *valid = 0; \
1415 return prefix##_CR0 + index; \
1416 } \
1417 }
1418
1419 /*
1420 * fixup*Value - Consults an operand type to determine the meaning of the
1421 * reg or R/M field. If the operand is an XMM operand, for example, an
1422 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1423 * misinterpret it as.
1424 *
1425 * @param insn - The instruction containing the operand.
1426 * @param type - The operand type.
1427 * @param index - The existing value of the field as reported by readModRM().
1428 * @param valid - The address of a uint8_t. The target is set to 1 if the
1429 * field is valid for the register class; 0 if not.
1430 * @return - The proper value.
1431 */
1432 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
1433 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1434
1435 /*
1436 * fixupReg - Consults an operand specifier to determine which of the
1437 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1438 *
1439 * @param insn - See fixup*Value().
1440 * @param op - The operand specifier.
1441 * @return - 0 if fixup was successful; -1 if the register returned was
1442 * invalid for its class.
1443 */
1444 static int fixupReg(struct InternalInstruction *insn,
1445 const struct OperandSpecifier *op) {
1446 uint8_t valid;
1447
1448 dbgprintf(insn, "fixupReg()");
1449
1450 switch ((OperandEncoding)op->encoding) {
1451 default:
1452 debug("Expected a REG or R/M encoding in fixupReg");
1453 return -1;
1454 case ENCODING_VVVV:
1455 insn->vvvv = (Reg)fixupRegValue(insn,
1456 (OperandType)op->type,
1457 insn->vvvv,
1458 &valid);
1459 if (!valid)
1460 return -1;
1461 break;
1462 case ENCODING_REG:
1463 insn->reg = (Reg)fixupRegValue(insn,
1464 (OperandType)op->type,
1465 insn->reg - insn->regBase,
1466 &valid);
1467 if (!valid)
1468 return -1;
1469 break;
1470 case ENCODING_RM:
1471 if (insn->eaBase >= insn->eaRegBase) {
1472 insn->eaBase = (EABase)fixupRMValue(insn,
1473 (OperandType)op->type,
1474 insn->eaBase - insn->eaRegBase,
1475 &valid);
1476 if (!valid)
1477 return -1;
1478 }
1479 break;
1480 }
1481
1482 return 0;
1483 }
1484
1485 /*
1486 * readOpcodeRegister - Reads an operand from the opcode field of an
1487 * instruction and interprets it appropriately given the operand width.
1488 * Handles AddRegFrm instructions.
1489 *
1490 * @param insn - the instruction whose opcode field is to be read.
1491 * @param size - The width (in bytes) of the register being specified.
1492 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1493 * RAX.
1494 * @return - 0 on success; nonzero otherwise.
1495 */
1496 static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
1497 dbgprintf(insn, "readOpcodeRegister()");
1498
1499 if (size == 0)
1500 size = insn->registerSize;
1501
1502 switch (size) {
1503 case 1:
1504 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1505 | (insn->opcode & 7)));
1506 if (insn->rexPrefix &&
1507 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1508 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1509 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1510 + (insn->opcodeRegister - MODRM_REG_AL - 4));
1511 }
1512
1513 break;
1514 case 2:
1515 insn->opcodeRegister = (Reg)(MODRM_REG_AX
1516 + ((bFromREX(insn->rexPrefix) << 3)
1517 | (insn->opcode & 7)));
1518 break;
1519 case 4:
1520 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
1521 + ((bFromREX(insn->rexPrefix) << 3)
1522 | (insn->opcode & 7)));
1523 break;
1524 case 8:
1525 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1526 + ((bFromREX(insn->rexPrefix) << 3)
1527 | (insn->opcode & 7)));
1528 break;
1529 }
1530
1531 return 0;
1532 }
1533
1534 /*
1535 * readImmediate - Consumes an immediate operand from an instruction, given the
1536 * desired operand size.
1537 *
1538 * @param insn - The instruction whose operand is to be read.
1539 * @param size - The width (in bytes) of the operand.
1540 * @return - 0 if the immediate was successfully consumed; nonzero
1541 * otherwise.
1542 */
1543 static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1544 uint8_t imm8;
1545 uint16_t imm16;
1546 uint32_t imm32;
1547 uint64_t imm64;
1548
1549 dbgprintf(insn, "readImmediate()");
1550
1551 if (insn->numImmediatesConsumed == 2) {
1552 debug("Already consumed two immediates");
1553 return -1;
1554 }
1555
1556 if (size == 0)
1557 size = insn->immediateSize;
1558 else
1559 insn->immediateSize = size;
1560 insn->immediateOffset = insn->readerCursor - insn->startLocation;
1561
1562 switch (size) {
1563 case 1:
1564 if (consumeByte(insn, &imm8))
1565 return -1;
1566 insn->immediates[insn->numImmediatesConsumed] = imm8;
1567 break;
1568 case 2:
1569 if (consumeUInt16(insn, &imm16))
1570 return -1;
1571 insn->immediates[insn->numImmediatesConsumed] = imm16;
1572 break;
1573 case 4:
1574 if (consumeUInt32(insn, &imm32))
1575 return -1;
1576 insn->immediates[insn->numImmediatesConsumed] = imm32;
1577 break;
1578 case 8:
1579 if (consumeUInt64(insn, &imm64))
1580 return -1;
1581 insn->immediates[insn->numImmediatesConsumed] = imm64;
1582 break;
1583 }
1584
1585 insn->numImmediatesConsumed++;
1586
1587 return 0;
1588 }
1589
1590 /*
1591 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
1592 *
1593 * @param insn - The instruction whose operand is to be read.
1594 * @return - 0 if the vvvv was successfully consumed; nonzero
1595 * otherwise.
1596 */
1597 static int readVVVV(struct InternalInstruction* insn) {
1598 dbgprintf(insn, "readVVVV()");
1599
1600 int vvvv;
1601 if (insn->vectorExtensionType == TYPE_EVEX)
1602 vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]);
1603 else if (insn->vectorExtensionType == TYPE_VEX_3B)
1604 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1605 else if (insn->vectorExtensionType == TYPE_VEX_2B)
1606 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1607 else if (insn->vectorExtensionType == TYPE_XOP)
1608 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1609 else
1610 return -1;
1611
1612 if (insn->mode != MODE_64BIT)
1613 vvvv &= 0x7;
1614
1615 insn->vvvv = static_cast(vvvv);
1616 return 0;
1617 }
1618
1619 /*
1620 * readMaskRegister - Reads an mask register from the opcode field of an
1621 * instruction.
1622 *
1623 * @param insn - The instruction whose opcode field is to be read.
1624 * @return - 0 on success; nonzero otherwise.
1625 */
1626 static int readMaskRegister(struct InternalInstruction* insn) {
1627 dbgprintf(insn, "readMaskRegister()");
1628
1629 if (insn->vectorExtensionType != TYPE_EVEX)
1630 return -1;
1631
1632 insn->writemask =
1633 static_cast(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1634 return 0;
1635 }
1636
1637 /*
1638 * readOperands - Consults the specifier for an instruction and consumes all
1639 * operands for that instruction, interpreting them as it goes.
1640 *
1641 * @param insn - The instruction whose operands are to be read and interpreted.
1642 * @return - 0 if all operands could be read; nonzero otherwise.
1643 */
1644 static int readOperands(struct InternalInstruction* insn) {
1645 int index;
1646 int hasVVVV, needVVVV;
1647 int sawRegImm = 0;
1648
1649 dbgprintf(insn, "readOperands()");
1650
1651 /* If non-zero vvvv specified, need to make sure one of the operands
1652 uses it. */
1653 hasVVVV = !readVVVV(insn);
1654 needVVVV = hasVVVV && (insn->vvvv != 0);
1655
1656 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1657 switch (x86OperandSets[insn->spec->operands][index].encoding) {
1658 case ENCODING_NONE:
1659 case ENCODING_SI:
1660 case ENCODING_DI:
1661 break;
1662 case ENCODING_REG:
1663 case ENCODING_RM:
1664 if (readModRM(insn))
1665 return -1;
1666 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
1667 return -1;
1668 break;
1669 case ENCODING_CB:
1670 case ENCODING_CW:
1671 case ENCODING_CD:
1672 case ENCODING_CP:
1673 case ENCODING_CO:
1674 case ENCODING_CT:
1675 dbgprintf(insn, "We currently don't hande code-offset encodings");
1676 return -1;
1677 case ENCODING_IB:
1678 if (sawRegImm) {
1679 /* Saw a register immediate so don't read again and instead split the
1680 previous immediate. FIXME: This is a hack. */
1681 insn->immediates[insn->numImmediatesConsumed] =
1682 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1683 ++insn->numImmediatesConsumed;
1684 break;
1685 }
1686 if (readImmediate(insn, 1))
1687 return -1;
1688 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
1689 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1690 return -1;
1691 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
1692 insn->immediates[insn->numImmediatesConsumed - 1] > 31)
1693 return -1;
1694 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
1695 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
1696 sawRegImm = 1;
1697 break;
1698 case ENCODING_IW:
1699 if (readImmediate(insn, 2))
1700 return -1;
1701 break;
1702 case ENCODING_ID:
1703 if (readImmediate(insn, 4))
1704 return -1;
1705 break;
1706 case ENCODING_IO:
1707 if (readImmediate(insn, 8))
1708 return -1;
1709 break;
1710 case ENCODING_Iv:
1711 if (readImmediate(insn, insn->immediateSize))
1712 return -1;
1713 break;
1714 case ENCODING_Ia:
1715 if (readImmediate(insn, insn->addressSize))
1716 return -1;
1717 break;
1718 case ENCODING_RB:
1719 if (readOpcodeRegister(insn, 1))
1720 return -1;
1721 break;
1722 case ENCODING_RW:
1723 if (readOpcodeRegister(insn, 2))
1724 return -1;
1725 break;
1726 case ENCODING_RD:
1727 if (readOpcodeRegister(insn, 4))
1728 return -1;
1729 break;
1730 case ENCODING_RO:
1731 if (readOpcodeRegister(insn, 8))
1732 return -1;
1733 break;
1734 case ENCODING_Rv:
1735 if (readOpcodeRegister(insn, 0))
1736 return -1;
1737 break;
1738 case ENCODING_FP:
1739 break;
1740 case ENCODING_VVVV:
1741 needVVVV = 0; /* Mark that we have found a VVVV operand. */
1742 if (!hasVVVV)
1743 return -1;
1744 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
1745 return -1;
1746 break;
1747 case ENCODING_WRITEMASK:
1748 if (readMaskRegister(insn))
1749 return -1;
1750 break;
1751 case ENCODING_DUP:
1752 break;
1753 default:
1754 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
1755 return -1;
1756 }
1757 }
1758
1759 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1760 if (needVVVV) return -1;
1761
1762 return 0;
1763 }
1764
1765 /*
1766 * decodeInstruction - Reads and interprets a full instruction provided by the
1767 * user.
1768 *
1769 * @param insn - A pointer to the instruction to be populated. Must be
1770 * pre-allocated.
1771 * @param reader - The function to be used to read the instruction's bytes.
1772 * @param readerArg - A generic argument to be passed to the reader to store
1773 * any internal state.
1774 * @param logger - If non-NULL, the function to be used to write log messages
1775 * and warnings.
1776 * @param loggerArg - A generic argument to be passed to the logger to store
1777 * any internal state.
1778 * @param startLoc - The address (in the reader's address space) of the first
1779 * byte in the instruction.
1780 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1781 * decode the instruction in.
1782 * @return - 0 if the instruction's memory could be read; nonzero if
1783 * not.