llvm.org GIT mirror llvm / c37e7b9
[MC] Fix Intel Operand assembly parsing for .set ids Recommitting after fixing overaggressive fastpath return in parsing. Fix intel syntax special case identifier operands that refer to a constant (e.g. .set <ID> n) to be interpreted as immediate not memory in parsing. Associated commit to fix clang test commited shortly. Reviewers: rnk Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D22585 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@277489 91177308-0d34-0410-b5e6-96231b3b80d8 Nirav Dave 4 years ago
3 changed file(s) with 98 addition(s) and 116 deletion(s). Raw diff Collapse all Expand all
697697 std::unique_ptr ParseIntelOperator(unsigned OpKind);
698698 std::unique_ptr
699699 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
700 std::unique_ptr
701 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
702700 std::unique_ptr ParseRoundingModeOp(SMLoc Start, SMLoc End);
703701 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
704 std::unique_ptr ParseIntelBracExpression(unsigned SegReg,
705 SMLoc Start,
706 int64_t ImmDisp,
707 unsigned Size);
702 std::unique_ptr
703 ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp,
704 bool isSymbol, unsigned Size);
708705 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
709706 InlineAsmIdentifierInfo &Info,
710707 bool IsUnevaluatedOperand, SMLoc &End);
12701267
12711268 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
12721269 // identifier. Don't try an parse it as a register.
1273 if (Tok.getString().startswith("."))
1270 if (PrevTK != AsmToken::Error && Tok.getString().startswith("."))
12741271 break;
12751272
12761273 // If we're parsing an immediate expression, we don't expect a '['.
13851382
13861383 std::unique_ptr
13871384 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1388 int64_t ImmDisp, unsigned Size) {
1385 int64_t ImmDisp, bool isSymbol,
1386 unsigned Size) {
13891387 MCAsmParser &Parser = getParser();
13901388 const AsmToken &Tok = Parser.getTok();
13911389 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
14351433 Disp = NewDisp;
14361434 }
14371435
1436 if (isSymbol) {
1437 if (SM.getSym()) {
1438 Error(Start, "cannot use more than one symbol in memory operand");
1439 return nullptr;
1440 }
1441 if (SM.getBaseReg()) {
1442 Error(Start, "cannot use base register with variable reference");
1443 return nullptr;
1444 }
1445 if (SM.getIndexReg()) {
1446 Error(Start, "cannot use index register with variable reference");
1447 return nullptr;
1448 }
1449 }
1450
14381451 int BaseReg = SM.getBaseReg();
14391452 int IndexReg = SM.getIndexReg();
14401453 int Scale = SM.getScale();
15401553 }
15411554
15421555 if (getLexer().is(AsmToken::LBrac))
1543 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1556 return ParseIntelBracExpression(SegReg, Start, ImmDisp, false, Size);
15441557
15451558 const MCExpr *Val;
15461559 SMLoc End;
15971610 }
15981611 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
15991612 }
1600 /// ParseIntelMemOperand - Parse intel style memory operand.
1601 std::unique_ptr X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1602 SMLoc Start,
1603 unsigned Size) {
1604 MCAsmParser &Parser = getParser();
1605 const AsmToken &Tok = Parser.getTok();
1606 SMLoc End;
1607
1608 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1609 if (getLexer().is(AsmToken::LBrac))
1610 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1611 assert(ImmDisp == 0);
1612
1613 const MCExpr *Val;
1614 if (!isParsingInlineAsm()) {
1615 if (getParser().parsePrimaryExpr(Val, End))
1616 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1617
1618 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1619 }
1620
1621 InlineAsmIdentifierInfo Info;
1622 StringRef Identifier = Tok.getString();
1623 if (ParseIntelIdentifier(Val, Identifier, Info,
1624 /*Unevaluated=*/false, End))
1625 return nullptr;
1626
1627 if (!getLexer().is(AsmToken::LBrac))
1628 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1629 /*Scale=*/1, Start, End, Size, Identifier, Info);
1630
1631 Parser.Lex(); // Eat '['
1632
1633 // Parse Identifier [ ImmDisp ]
1634 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1635 /*AddImmPrefix=*/false);
1636 if (ParseIntelExpression(SM, End))
1637 return nullptr;
1638
1639 if (SM.getSym()) {
1640 Error(Start, "cannot use more than one symbol in memory operand");
1641 return nullptr;
1642 }
1643 if (SM.getBaseReg()) {
1644 Error(Start, "cannot use base register with variable reference");
1645 return nullptr;
1646 }
1647 if (SM.getIndexReg()) {
1648 Error(Start, "cannot use index register with variable reference");
1649 return nullptr;
1650 }
1651
1652 const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
1653 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1654 // we're pointing to a local variable in memory, so the base register is
1655 // really the frame or stack pointer.
1656 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1657 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1658 Start, End, Size, Identifier, Info.OpDecl);
1659 }
16601613
16611614 /// Parse the '.' operator.
16621615 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
18031756 Parser.Lex(); // Eat ptr.
18041757 PtrInOperand = true;
18051758 }
1759
18061760 Start = Tok.getLoc();
1807
1808 // Immediate.
1809 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1810 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1811 AsmToken StartTok = Tok;
1812 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1813 /*AddImmPrefix=*/false);
1814 if (ParseIntelExpression(SM, End))
1815 return nullptr;
1816
1817 int64_t Imm = SM.getImm();
1818 if (isParsingInlineAsm()) {
1819 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1820 if (StartTok.getString().size() == Len)
1821 // Just add a prefix if this wasn't a complex immediate expression.
1822 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
1823 else
1824 // Otherwise, rewrite the complex expression as a single immediate.
1825 InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
1826 }
1827
1828 if (getLexer().isNot(AsmToken::LBrac)) {
1829 // If a directional label (ie. 1f or 2b) was parsed above from
1830 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1831 // to the MCExpr with the directional local symbol and this is a
1832 // memory operand not an immediate operand.
1833 if (SM.getSym())
1834 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1835 Size);
1836
1837 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1838 return X86Operand::CreateImm(ImmExpr, Start, End);
1839 }
1840
1841 // Only positive immediates are valid.
1842 if (Imm < 0)
1843 return ErrorOperand(Start, "expected a positive immediate displacement "
1844 "before bracketed expr.");
1845
1846 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1847 return ParseIntelMemOperand(Imm, Start, Size);
1848 }
18491761
18501762 // rounding mode token
18511763 if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
18541766
18551767 // Register.
18561768 unsigned RegNo = 0;
1857 if (!ParseRegister(RegNo, Start, End)) {
1769 if (getLexer().is(AsmToken::Identifier) &&
1770 !ParseRegister(RegNo, Start, End)) {
18581771 // If this is a segment register followed by a ':', then this is the start
18591772 // of a segment override, otherwise this is a normal register reference.
18601773 // In case it is a normal register and there is ptr in the operand this
18661779 }
18671780 return X86Operand::CreateReg(RegNo, Start, End);
18681781 }
1869
18701782 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
18711783 }
18721784
1873 // Memory operand.
1874 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1785 // Immediates and Memory
1786
1787 // Parse [ BaseReg + Scale*IndexReg + Disp ].
1788 if (getLexer().is(AsmToken::LBrac))
1789 return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, false,
1790 Size);
1791
1792 AsmToken StartTok = Tok;
1793 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1794 /*AddImmPrefix=*/false);
1795 if (ParseIntelExpression(SM, End))
1796 return nullptr;
1797
1798 bool isSymbol = SM.getSym() && SM.getSym()->getKind() != MCExpr::Constant;
1799 int64_t Imm = SM.getImm();
1800 if (SM.getSym() && SM.getSym()->getKind() == MCExpr::Constant)
1801 SM.getSym()->evaluateAsAbsolute(Imm);
1802
1803 if (StartTok.isNot(AsmToken::Identifier) &&
1804 StartTok.isNot(AsmToken::String) && isParsingInlineAsm()) {
1805 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1806 if (StartTok.getString().size() == Len)
1807 // Just add a prefix if this wasn't a complex immediate expression.
1808 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
1809 else
1810 // Otherwise, rewrite the complex expression as a single immediate.
1811 InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
1812 }
1813
1814 if (getLexer().isNot(AsmToken::LBrac)) {
1815 // If a directional label (ie. 1f or 2b) was parsed above from
1816 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1817 // to the MCExpr with the directional local symbol and this is a
1818 // memory operand not an immediate operand.
1819 if (isSymbol) {
1820 if (isParsingInlineAsm())
1821 return CreateMemForInlineAsm(/*SegReg=*/0, SM.getSym(), /*BaseReg=*/0,
1822 /*IndexReg=*/0,
1823 /*Scale=*/1, Start, End, Size,
1824 SM.getSymName(), SM.getIdentifierInfo());
1825 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1826 Size);
1827 }
1828
1829 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1830 return X86Operand::CreateImm(ImmExpr, Start, End);
1831 }
1832
1833 // Only positive immediates are valid.
1834 if (Imm < 0)
1835 return ErrorOperand(Start, "expected a positive immediate displacement "
1836 "before bracketed expr.");
1837
1838 return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, isSymbol, Size);
18751839 }
18761840
18771841 std::unique_ptr X86AsmParser::ParseATTOperand() {
19151879 SMLoc Start = Parser.getTok().getLoc(), End;
19161880 if (getSTI().getFeatureBits()[X86::FeatureAVX512])
19171881 return ParseRoundingModeOp(Start, End);
1918 return ErrorOperand(Start, "unknown token in expression");
1882 return ErrorOperand(Start, "Unexpected '{' in expression");
19191883 }
19201884 }
19211885 }
7575 // CHECK: encoding: [0xca,0x08,0x00]
7676 retf 8
7777
78 .set FOO, 2
79 cmp eax, FOO
80 // CHECK: encoding: [0x83,0xf8,0x02]
81 cmp eax, FOO[eax]
82 // CHECK: encoding: [0x67,0x3b,0x40,0x02]
1010 .att_syntax noprefix
1111 // CHECK: error: '.att_syntax noprefix' is not supported: registers must have a '%' prefix in .att_syntax
1212 movl $257, -4(esp)
13
14
15 .intel_syntax noprefix
16
17 .global arr
18 .global i
19 .set FOO, 2
20 //CHECK-STDERR: error: cannot use base register with variable reference
21 mov eax, DWORD PTR arr[ebp + 1 + (2 * 5) - 3 + 1<<1]
22 //CHECK-STDERR: error: cannot use index register with variable reference
23 mov eax, DWORD PTR arr[esi*4]
24 //CHECK-STDERR: error: cannot use more than one symbol in memory operand
25 mov eax, DWORD PTR arr[i]