llvm.org GIT mirror llvm / 1539acf
[AsmParser] Backends can parameterize ASM tokenization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252439 91177308-0d34-0410-b5e6-96231b3b80d8 Colin LeMahieu 4 years ago
7 changed file(s) with 103 addition(s) and 47 deletion(s). Raw diff Collapse all Expand all
964964 // register tokens as constrained registers, instead of tokens, for the
965965 // purposes of matching.
966966 string RegisterPrefix = "";
967
968 // TokenizingCharacters - Characters that are standalone tokens
969 string TokenizingCharacters = "[]*!";
970
971 // SeparatorCharacters - Characters that are not tokens
972 string SeparatorCharacters = " \t,";
973
974 // BreakCharacters - Characters that start new identifiers
975 string BreakCharacters = "";
967976 }
968977 def DefaultAsmParserVariant : AsmParserVariant;
969978
13331333 // Treat '.' as a valid identifier in this context.
13341334 Lex();
13351335 IDVal = ".";
1336 } else if (Lexer.is(AsmToken::LCurly)) {
1337 // Treat '{' as a valid identifier in this context.
1338 Lex();
1339 IDVal = "{";
1340
1341 } else if (Lexer.is(AsmToken::RCurly)) {
1342 // Treat '}' as a valid identifier in this context.
1343 Lex();
1344 IDVal = "}";
13361345 } else if (parseIdentifier(IDVal)) {
13371346 if (!TheCondState.Ignore)
13381347 return TokError("unexpected token at start of statement");
124124 def GenericAsmParserVariant : AsmParserVariant {
125125 int Variant = 0;
126126 string Name = "generic";
127 string BreakCharacters = ".";
127128 }
128129
129130 def AppleAsmParserVariant : AsmParserVariant {
130131 int Variant = 1;
131132 string Name = "apple-neon";
133 string BreakCharacters = ".";
132134 }
133135
134136 //===----------------------------------------------------------------------===//
510510 bit isMCAsmWriter = 1;
511511 }
512512
513 def ARMAsmParserVariant : AsmParserVariant {
514 int Variant = 0;
515 string Name = "ARM";
516 string BreakCharacters = ".";
517 }
518
513519 def ARM : Target {
514520 // Pull in Instruction Info:
515521 let InstructionSet = ARMInstrInfo;
516522 let AssemblyWriters = [ARMAsmWriter];
523 let AssemblyParserVariants = [ARMAsmParserVariant];
517524 }
2424 bit isMCAsmWriter = 1;
2525 }
2626
27 def BPFAsmParserVariant : AsmParserVariant {
28 int Variant = 0;
29 string Name = "BPF";
30 string BreakCharacters = ".";
31 }
32
2733 def BPF : Target {
2834 let InstructionSet = BPFInstrInfo;
2935 let AssemblyWriters = [BPFInstPrinter];
36 let AssemblyParserVariants = [BPFAsmParserVariant];
3037 }
402402 // InstAlias definitions use immediate literals. Set RegisterPrefix
403403 // so that those are not misinterpreted as registers.
404404 string RegisterPrefix = "%";
405 string BreakCharacters = ".";
405406 }
406407
407408 def PPC : Target {
293293 }
294294 };
295295
296 class AsmVariantInfo {
297 public:
298 std::string TokenizingCharacters;
299 std::string SeparatorCharacters;
300 std::string BreakCharacters;
301 };
302
296303 /// MatchableInfo - Helper class for storing the necessary information for an
297304 /// instruction or alias which is capable of being matched.
298305 struct MatchableInfo {
483490
484491 void initialize(const AsmMatcherInfo &Info,
485492 SmallPtrSetImpl &SingletonRegisters,
486 int AsmVariantNo, StringRef RegisterPrefix);
493 int AsmVariantNo, StringRef RegisterPrefix,
494 AsmVariantInfo const &Variant);
487495
488496 /// validate - Return true if this matchable is a valid thing to match against
489497 /// and perform a bunch of validity checking.
583591 void dump() const;
584592
585593 private:
586 void tokenizeAsmString(const AsmMatcherInfo &Info);
587 void addAsmOperand(size_t Start, size_t End);
594 void tokenizeAsmString(AsmMatcherInfo const &Info,
595 AsmVariantInfo const &Variant);
596 void addAsmOperand(size_t Start, size_t End,
597 std::string const &SeparatorCharacters);
588598 };
589599
590600 /// SubtargetFeatureInfo - Helper class for storing information on a subtarget
827837
828838 void MatchableInfo::initialize(const AsmMatcherInfo &Info,
829839 SmallPtrSetImpl &SingletonRegisters,
830 int AsmVariantNo, StringRef RegisterPrefix) {
840 int AsmVariantNo, StringRef RegisterPrefix,
841 AsmVariantInfo const &Variant) {
831842 AsmVariantID = AsmVariantNo;
832843 AsmString =
833844 CodeGenInstruction::FlattenAsmStringVariants(AsmString, AsmVariantNo);
834845
835 tokenizeAsmString(Info);
846 tokenizeAsmString(Info, Variant);
836847
837848 // Compute the require features.
838849 for (Record *Predicate : TheDef->getValueAsListOfDefs("Predicates"))
856867 }
857868
858869 /// Append an AsmOperand for the given substring of AsmString.
859 void MatchableInfo::addAsmOperand(size_t Start, size_t End) {
870 void MatchableInfo::addAsmOperand(size_t Start, size_t End,
871 std::string const &Separators) {
860872 StringRef String = AsmString;
861 StringRef Separators = "[]*! \t,";
862873 // Look for separators before and after to figure out is this token is
863874 // isolated. Accept '$$' as that's how we escape '$'.
864875 bool IsIsolatedToken =
869880 }
870881
871882 /// tokenizeAsmString - Tokenize a simplified assembly string.
872 void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) {
883 void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info,
884 AsmVariantInfo const &Variant) {
873885 StringRef String = AsmString;
874 size_t Prev = 0;
875 bool InTok = true;
876 for (size_t i = 0, e = String.size(); i != e; ++i) {
877 switch (String[i]) {
878 case '[':
879 case ']':
880 case '*':
881 case '!':
882 case ' ':
883 case '\t':
884 case ',':
885 if (InTok) {
886 addAsmOperand(Prev, i);
886 unsigned Prev = 0;
887 bool InTok = false;
888 std::string Separators = Variant.TokenizingCharacters +
889 Variant.SeparatorCharacters;
890 for (unsigned i = 0, e = String.size(); i != e; ++i) {
891 if(Variant.BreakCharacters.find(String[i]) != std::string::npos) {
892 if(InTok) {
893 addAsmOperand(Prev, i, Separators);
894 Prev = i;
895 }
896 InTok = true;
897 continue;
898 }
899 if(Variant.TokenizingCharacters.find(String[i]) != std::string::npos) {
900 if(InTok) {
901 addAsmOperand(Prev, i, Separators);
887902 InTok = false;
888903 }
889 if (!isspace(String[i]) && String[i] != ',')
890 addAsmOperand(i, i + 1);
904 addAsmOperand(i, i + 1, Separators);
891905 Prev = i + 1;
892 break;
893
906 continue;
907 }
908 if(Variant.SeparatorCharacters.find(String[i]) != std::string::npos) {
909 if(InTok) {
910 addAsmOperand(Prev, i, Separators);
911 InTok = false;
912 }
913 Prev = i + 1;
914 continue;
915 }
916 switch (String[i]) {
894917 case '\\':
895918 if (InTok) {
896 addAsmOperand(Prev, i);
919 addAsmOperand(Prev, i, Separators);
897920 InTok = false;
898921 }
899922 ++i;
900923 assert(i != String.size() && "Invalid quoted character");
901 addAsmOperand(i, i + 1);
924 addAsmOperand(i, i + 1, Separators);
902925 Prev = i + 1;
903926 break;
904927
905928 case '$': {
906 if (InTok) {
907 addAsmOperand(Prev, i);
929 if (InTok && Prev != i) {
930 addAsmOperand(Prev, i, Separators);
908931 InTok = false;
909932 }
910933
914937 break;
915938 }
916939
917 // If this is "${" find the next "}" and make an identifier like "${xxx}"
918 size_t EndPos = String.find('}', i);
919 assert(EndPos != StringRef::npos &&
920 "Missing brace in operand reference!");
921 addAsmOperand(i, EndPos+1);
940 StringRef::iterator End = std::find(String.begin() + i, String.end(),'}');
941 assert(End != String.end() && "Missing brace in operand reference!");
942 size_t EndPos = End - String.begin();
943 addAsmOperand(i, EndPos+1, Separators);
922944 Prev = EndPos + 1;
923945 i = EndPos;
924946 break;
925947 }
926
927 case '.':
928 if (!Info.AsmParser->getValueAsBit("MnemonicContainsDot")) {
929 if (InTok)
930 addAsmOperand(Prev, i);
931 Prev = i;
932 }
933 InTok = true;
934 break;
935
936948 default:
937949 InTok = true;
938950 }
939951 }
940952 if (InTok && Prev != String.size())
941 addAsmOperand(Prev, StringRef::npos);
953 addAsmOperand(Prev, StringRef::npos, Separators);
942954
943955 // The first token of the instruction is the mnemonic, which must be a
944956 // simple string, not a $foo variable or a singleton register.
13721384 std::string CommentDelimiter =
13731385 AsmVariant->getValueAsString("CommentDelimiter");
13741386 std::string RegisterPrefix = AsmVariant->getValueAsString("RegisterPrefix");
1387 AsmVariantInfo Variant;
1388 Variant.TokenizingCharacters =
1389 AsmVariant->getValueAsString("TokenizingCharacters");
1390 Variant.SeparatorCharacters =
1391 AsmVariant->getValueAsString("SeparatorCharacters");
1392 Variant.BreakCharacters =
1393 AsmVariant->getValueAsString("BreakCharacters");
13751394 int AsmVariantNo = AsmVariant->getValueAsInt("Variant");
13761395
13771396 for (const CodeGenInstruction *CGI : Target.instructions()) {
13871406
13881407 auto II = llvm::make_unique(*CGI);
13891408
1390 II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
1409 II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix,
1410 Variant);
13911411
13921412 // Ignore instructions which shouldn't be matched and diagnose invalid
13931413 // instruction definitions with an error.
14141434
14151435 auto II = llvm::make_unique(std::move(Alias));
14161436
1417 II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
1437 II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix,
1438 Variant);
14181439
14191440 // Validate the alias definitions.
14201441 II->validate(CommentDelimiter, false);