llvm.org GIT mirror llvm / b80d571
Updated the llvm-mc disassembler C API to support for the X86 target. rdar://10873652 As part of this I updated the llvm-mc disassembler C API to always call the SymbolLookUp call back even if there is no getOpInfo call back. If there is a getOpInfo call back that is tried first and then if that gets no information then the SymbolLookUp is called. I also made the code more robust by memset(3)'ing to zero the LLVMOpInfo1 struct before then setting SymbolicOp.Value before for the call to getOpInfo. And also don't use any values from the LLVMOpInfo1 struct if getOpInfo returns 0. And also don't use any of the ReferenceType or ReferenceName values from SymbolLookUp if it returns NULL. rdar://10873563 and rdar://10873683 For the X86 target also fixed bugs so the annotations get printed. Also fixed a few places in the ARM target that was not producing symbolic operands for some instructions. rdar://10878166 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151267 91177308-0d34-0410-b5e6-96231b3b80d8 Kevin Enderby 8 years ago
8 changed file(s) with 243 addition(s) and 66 deletion(s). Raw diff Collapse all Expand all
439439 MCInst &MI, const void *Decoder) {
440440 const MCDisassembler *Dis = static_cast(Decoder);
441441 LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
442 if (!getOpInfo)
443 return false;
444
445442 struct LLVMOpInfo1 SymbolicOp;
443 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
446444 SymbolicOp.Value = Value;
447445 void *DisInfo = Dis->getDisInfoBlock();
448 if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
449 if (isBranch) {
450 LLVMSymbolLookupCallback SymbolLookUp =
451 Dis->getLLVMSymbolLookupCallback();
452 if (SymbolLookUp) {
453 uint64_t ReferenceType;
454 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
455 const char *ReferenceName;
456 const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
457 &ReferenceName);
458 if (Name) {
459 SymbolicOp.AddSymbol.Name = Name;
460 SymbolicOp.AddSymbol.Present = true;
461 SymbolicOp.Value = 0;
462 }
463 else {
464 SymbolicOp.Value = Value;
465 }
466 if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
467 (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
468 }
469 else {
470 return false;
471 }
446
447 if (!getOpInfo ||
448 !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
449 // Clear SymbolicOp.Value from above and also all other fields.
450 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
451 LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
452 if (!SymbolLookUp)
453 return false;
454 uint64_t ReferenceType;
455 if (isBranch)
456 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
457 else
458 ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
459 const char *ReferenceName;
460 const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
461 &ReferenceName);
462 if (Name) {
463 SymbolicOp.AddSymbol.Name = Name;
464 SymbolicOp.AddSymbol.Present = true;
472465 }
473 else {
466 // For branches always create an MCExpr so it gets printed as hex address.
467 else if (isBranch) {
468 SymbolicOp.Value = Value;
469 }
470 if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
471 (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
472 if (!Name && !isBranch)
474473 return false;
475 }
476474 }
477475
478476 MCContext *Ctx = Dis->getMCContext();
547545 /// a literal 'C' string if the referenced address of the literal pool's entry
548546 /// is an address into a section with 'C' string literals.
549547 static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
550 const void *Decoder) {
548 const void *Decoder) {
551549 const MCDisassembler *Dis = static_cast(Decoder);
552550 LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
553551 if (SymbolLookUp) {
19091907 if (pred == 0xF) {
19101908 Inst.setOpcode(ARM::BLXi);
19111909 imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
1910 if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
1911 true, 4, Inst, Decoder))
19121912 Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
19131913 return S;
19141914 }
19151915
1916 if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true,
1917 4, Inst, Decoder))
1916 if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
1917 true, 4, Inst, Decoder))
19181918 Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
19191919 if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
19201920 return MCDisassembler::Fail;
31263126
31273127 static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
31283128 uint64_t Address, const void *Decoder){
3129 Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
3129 if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 8,
3130 true, 4, Inst, Decoder))
3131 Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
31303132 return MCDisassembler::Success;
31313133 }
31323134
389389
390390 void addAbsMemOperands(MCInst &Inst, unsigned N) const {
391391 assert((N == 1) && "Invalid number of operands!");
392 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
392 // Add as immediates when possible.
393 if (const MCConstantExpr *CE = dyn_cast(getMemDisp()))
394 Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
395 else
396 Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
393397 }
394398
395399 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
1717 #include "X86DisassemblerDecoder.h"
1818
1919 #include "llvm/MC/EDInstInfo.h"
20 #include "llvm/MC/MCDisassembler.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCContext.h"
2122 #include "llvm/MC/MCDisassembler.h"
2223 #include "llvm/MC/MCInst.h"
2324 #include "llvm/MC/MCInstrInfo.h"
7071 }
7172
7273 static bool translateInstruction(MCInst &target,
73 InternalInstruction &source);
74 InternalInstruction &source,
75 const MCDisassembler *Dis);
7476
7577 X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI,
7678 DisassemblerMode mode,
122124 uint64_t address,
123125 raw_ostream &vStream,
124126 raw_ostream &cStream) const {
127 CommentStream = &cStream;
128
125129 InternalInstruction internalInstr;
126130
127131 dlog_t loggerFn = logger;
143147 }
144148 else {
145149 size = internalInstr.length;
146 return (!translateInstruction(instr, internalInstr)) ? Success : Fail;
150 return (!translateInstruction(instr, internalInstr, this)) ?
151 Success : Fail;
147152 }
148153 }
149154
168173 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
169174 }
170175
176 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
177 /// immediate Value in the MCInst.
178 ///
179 /// @param Value - The immediate Value, has had any PC adjustment made by
180 /// the caller.
181 /// @param isBranch - If the instruction is a branch instruction
182 /// @param Address - The starting address of the instruction
183 /// @param Offset - The byte offset to this immediate in the instruction
184 /// @param Width - The byte width of this immediate in the instruction
185 ///
186 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
187 /// called then that function is called to get any symbolic information for the
188 /// immediate in the instruction using the Address, Offset and Width. If that
189 /// returns non-zero then the symbolic information it returns is used to create
190 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
191 /// returns zero and isBranch is true then a symbol look up for immediate Value
192 /// is done and if a symbol is found an MCExpr is created with that, else
193 /// an MCExpr with the immediate Value is created. This function returns true
194 /// if it adds an operand to the MCInst and false otherwise.
195 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
196 uint64_t Address, uint64_t Offset,
197 uint64_t Width, MCInst &MI,
198 const MCDisassembler *Dis) {
199 LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
200 struct LLVMOpInfo1 SymbolicOp;
201 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
202 SymbolicOp.Value = Value;
203 void *DisInfo = Dis->getDisInfoBlock();
204
205 if (!getOpInfo ||
206 !getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) {
207 // Clear SymbolicOp.Value from above and also all other fields.
208 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
209 LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
210 if (!SymbolLookUp)
211 return false;
212 uint64_t ReferenceType;
213 if (isBranch)
214 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
215 else
216 ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
217 const char *ReferenceName;
218 const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
219 &ReferenceName);
220 if (Name) {
221 SymbolicOp.AddSymbol.Name = Name;
222 SymbolicOp.AddSymbol.Present = true;
223 }
224 // For branches always create an MCExpr so it gets printed as hex address.
225 else if (isBranch) {
226 SymbolicOp.Value = Value;
227 }
228 if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
229 (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
230 if (!Name && !isBranch)
231 return false;
232 }
233
234 MCContext *Ctx = Dis->getMCContext();
235 const MCExpr *Add = NULL;
236 if (SymbolicOp.AddSymbol.Present) {
237 if (SymbolicOp.AddSymbol.Name) {
238 StringRef Name(SymbolicOp.AddSymbol.Name);
239 MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
240 Add = MCSymbolRefExpr::Create(Sym, *Ctx);
241 } else {
242 Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx);
243 }
244 }
245
246 const MCExpr *Sub = NULL;
247 if (SymbolicOp.SubtractSymbol.Present) {
248 if (SymbolicOp.SubtractSymbol.Name) {
249 StringRef Name(SymbolicOp.SubtractSymbol.Name);
250 MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
251 Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
252 } else {
253 Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx);
254 }
255 }
256
257 const MCExpr *Off = NULL;
258 if (SymbolicOp.Value != 0)
259 Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
260
261 const MCExpr *Expr;
262 if (Sub) {
263 const MCExpr *LHS;
264 if (Add)
265 LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
266 else
267 LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
268 if (Off != 0)
269 Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
270 else
271 Expr = LHS;
272 } else if (Add) {
273 if (Off != 0)
274 Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
275 else
276 Expr = Add;
277 } else {
278 if (Off != 0)
279 Expr = Off;
280 else
281 Expr = MCConstantExpr::Create(0, *Ctx);
282 }
283
284 MI.addOperand(MCOperand::CreateExpr(Expr));
285
286 return true;
287 }
288
171289 /// translateImmediate - Appends an immediate operand to an MCInst.
172290 ///
173291 /// @param mcInst - The MCInst to append to.
176294 /// @param insn - The internal instruction.
177295 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
178296 const OperandSpecifier &operand,
179 InternalInstruction &insn) {
297 InternalInstruction &insn,
298 const MCDisassembler *Dis) {
180299 // Sign-extend the immediate if necessary.
181300
182301 OperandType type = operand.type;
232351 }
233352 }
234353
354 bool isBranch = false;
355 uint64_t pcrel = 0;
235356 switch (type) {
236357 case TYPE_XMM128:
237358 mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
239360 case TYPE_XMM256:
240361 mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
241362 return;
363 case TYPE_REL8:
364 isBranch = true;
365 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
366 // fall through to sign extend the immediate if needed.
242367 case TYPE_MOFFS8:
243 case TYPE_REL8:
244368 if(immediate & 0x80)
245369 immediate |= ~(0xffull);
246370 break;
248372 if(immediate & 0x8000)
249373 immediate |= ~(0xffffull);
250374 break;
251 case TYPE_MOFFS32:
252375 case TYPE_REL32:
253376 case TYPE_REL64:
377 isBranch = true;
378 pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
379 // fall through to sign extend the immediate if needed.
380 case TYPE_MOFFS32:
254381 if(immediate & 0x80000000)
255382 immediate |= ~(0xffffffffull);
256383 break;
260387 break;
261388 }
262389
263 mcInst.addOperand(MCOperand::CreateImm(immediate));
390 if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
391 insn.immediateOffset, insn.immediateSize,
392 mcInst, Dis))
393 mcInst.addOperand(MCOperand::CreateImm(immediate));
264394 }
265395
266396 /// translateRMRegister - Translates a register stored in the R/M field of the
307437 /// @param insn - The instruction to extract Mod, R/M, and SIB fields
308438 /// from.
309439 /// @return - 0 on success; nonzero otherwise
310 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
440 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
441 const MCDisassembler *Dis) {
311442 // Addresses in an MCInst are represented as five operands:
312443 // 1. basereg (register) The R/M base, or (if there is a SIB) the
313444 // SIB base
325456 MCOperand indexReg;
326457 MCOperand displacement;
327458 MCOperand segmentReg;
459 uint64_t pcrel = 0;
328460
329461 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
330462 if (insn.sibBase != SIB_BASE_NONE) {
366498 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
367499 return true;
368500 }
369 if (insn.mode == MODE_64BIT)
501 if (insn.mode == MODE_64BIT){
502 pcrel = insn.startLocation +
503 insn.displacementOffset + insn.displacementSize;
370504 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
505 }
371506 else
372507 baseReg = MCOperand::CreateReg(0);
373508
433568 mcInst.addOperand(baseReg);
434569 mcInst.addOperand(scaleAmount);
435570 mcInst.addOperand(indexReg);
436 mcInst.addOperand(displacement);
571 if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
572 insn.startLocation, insn.displacementOffset,
573 insn.displacementSize, mcInst, Dis))
574 mcInst.addOperand(displacement);
437575 mcInst.addOperand(segmentReg);
438576 return false;
439577 }
447585 /// from.
448586 /// @return - 0 on success; nonzero otherwise
449587 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
450 InternalInstruction &insn) {
588 InternalInstruction &insn, const MCDisassembler *Dis) {
451589 switch (operand.type) {
452590 default:
453591 debug("Unexpected type for a R/M operand");
487625 case TYPE_M1632:
488626 case TYPE_M1664:
489627 case TYPE_LEA:
490 return translateRMMemory(mcInst, insn);
628 return translateRMMemory(mcInst, insn, Dis);
491629 }
492630 }
493631
517655 /// @param insn - The internal instruction.
518656 /// @return - false on success; true otherwise.
519657 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
520 InternalInstruction &insn) {
658 InternalInstruction &insn,
659 const MCDisassembler *Dis) {
521660 switch (operand.encoding) {
522661 default:
523662 debug("Unhandled operand encoding during translation");
526665 translateRegister(mcInst, insn.reg);
527666 return false;
528667 case ENCODING_RM:
529 return translateRM(mcInst, operand, insn);
668 return translateRM(mcInst, operand, insn, Dis);
530669 case ENCODING_CB:
531670 case ENCODING_CW:
532671 case ENCODING_CD:
544683 translateImmediate(mcInst,
545684 insn.immediates[insn.numImmediatesTranslated++],
546685 operand,
547 insn);
686 insn,
687 Dis);
548688 return false;
549689 case ENCODING_RB:
550690 case ENCODING_RW:
563703 case ENCODING_DUP:
564704 return translateOperand(mcInst,
565705 insn.spec->operands[operand.type - TYPE_DUP0],
566 insn);
706 insn, Dis);
567707 }
568708 }
569709
574714 /// @param insn - The internal instruction.
575715 /// @return - false on success; true otherwise.
576716 static bool translateInstruction(MCInst &mcInst,
577 InternalInstruction &insn) {
717 InternalInstruction &insn,
718 const MCDisassembler *Dis) {
578719 if (!insn.spec) {
579720 debug("Instruction has no specification");
580721 return true;
588729
589730 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
590731 if (insn.spec->operands[index].encoding != ENCODING_NONE) {
591 if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
732 if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) {
592733 return true;
593734 }
594735 }
10131013 return 0;
10141014
10151015 insn->consumedDisplacement = TRUE;
1016 insn->displacementOffset = insn->readerCursor - insn->startLocation;
10161017
10171018 switch (insn->eaDisplacement) {
10181019 case EA_DISP_NONE:
14091410 size = insn->immediateSize;
14101411 else
14111412 insn->immediateSize = size;
1413 insn->immediateOffset = insn->readerCursor - insn->startLocation;
14121414
14131415 switch (size) {
14141416 case 1:
458458 uint8_t addressSize;
459459 uint8_t displacementSize;
460460 uint8_t immediateSize;
461
462 /* Offsets from the start of the instruction to the pieces of data, which is
463 needed to find relocation entries for adding symbolic operands */
464 uint8_t displacementOffset;
465 uint8_t immediateOffset;
461466
462467 /* opcode state */
463468
4444 if (!printAliasInstr(MI, OS))
4545 printInstruction(MI, OS);
4646
47 // Next always print the annotation.
48 printAnnotation(OS, Annot);
49
4750 // If verbose assembly is enabled, we can print some informative comments.
48 if (CommentStream) {
49 printAnnotation(OS, Annot);
51 if (CommentStream)
5052 EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
51 }
5253 }
5354
5455 StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
102103 raw_ostream &O) {
103104 const MCOperand &Op = MI->getOperand(OpNo);
104105 if (Op.isImm())
105 // Print this as a signed 32-bit value.
106 O << (int)Op.getImm();
106 O << Op.getImm();
107107 else {
108108 assert(Op.isExpr() && "unknown pcrel immediate operand");
109 O << *Op.getExpr();
109 // If a symbolic branch target was added as a constant expression then print
110 // that address in hex.
111 const MCConstantExpr *BranchTarget = dyn_cast(Op.getExpr());
112 int64_t Address;
113 if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
114 O << "0x";
115 O.write_hex(Address);
116 }
117 else {
118 // Otherwise, just print the expression.
119 O << *Op.getExpr();
120 }
110121 }
111122 }
112123
3434 void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
3535 StringRef Annot) {
3636 printInstruction(MI, OS);
37
37
38 // Next always print the annotation.
39 printAnnotation(OS, Annot);
40
3841 // If verbose assembly is enabled, we can print some informative comments.
39 if (CommentStream) {
40 printAnnotation(OS, Annot);
42 if (CommentStream)
4143 EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
42 }
4344 }
4445 StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
4546 return getInstructionName(Opcode);
9495 O << Op.getImm();
9596 else {
9697 assert(Op.isExpr() && "unknown pcrel immediate operand");
97 O << *Op.getExpr();
98 // If a symbolic branch target was added as a constant expression then print
99 // that address in hex.
100 const MCConstantExpr *BranchTarget = dyn_cast(Op.getExpr());
101 int64_t Address;
102 if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
103 O << "0x";
104 O.write_hex(Address);
105 }
106 else {
107 // Otherwise, just print the expression.
108 O << *Op.getExpr();
109 }
98110 }
99111 }
100112
9696 sal $1, %eax
9797
9898 // moffset forms of moves, rdar://7947184
99 movb 0, %al // CHECK: movb 0, %al # encoding: [0xa0,A,A,A,A]
100 movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0xa1,A,A,A,A]
101 movl 0, %eax // CHECK: movl 0, %eax # encoding: [0xa1,A,A,A,A]
99 movb 0, %al // CHECK: movb 0, %al # encoding: [0xa0,0x00,0x00,0x00,0x00]
100 movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0xa1,0x00,0x00,0x00,0x00]
101 movl 0, %eax // CHECK: movl 0, %eax # encoding: [0xa1,0x00,0x00,0x00,0x00]
102102
103103 // rdar://7973775
104104 into