llvm.org GIT mirror llvm / d2a5c0d
Add Regex::sub, for doing regular expression substitution with backreferences. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@96503 91177308-0d34-0410-b5e6-96231b3b80d8 Daniel Dunbar 9 years ago
3 changed file(s) with 118 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
5555 ///
5656 /// This returns true on a successful match.
5757 bool match(const StringRef &String, SmallVectorImpl *Matches=0);
58
59 /// sub - Return the result of replacing the first match of the regex in
60 /// \arg String with the \arg Repl string. Backreferences like "\0" in the
61 /// replacement string are replaced with the appropriate match substring.
62 ///
63 /// Note that the replacement string has backslash escaping performed on
64 /// it. Invalid backreferences are ignored (replaced by empty strings).
65 ///
66 /// \param Error If non-null, any errors in the substitution (invalid
67 /// backreferences, trailing backslashes) will be recorded as a non-empty
68 /// string.
69 std::string sub(StringRef Repl, StringRef String, std::string *Error = 0);
70
5871 private:
5972 struct llvm_regex *preg;
6073 int error;
8989
9090 return true;
9191 }
92
93 std::string Regex::sub(StringRef Repl, StringRef String,
94 std::string *Error) {
95 SmallVector Matches;
96
97 // Reset error, if given.
98 if (Error && !Error->empty()) *Error = "";
99
100 // Return the input if there was no match.
101 if (!match(String, &Matches))
102 return String;
103
104 // Otherwise splice in the replacement string, starting with the prefix before
105 // the match.
106 std::string Res(String.begin(), Matches[0].begin());
107
108 // Then the replacement string, honoring possible substitutions.
109 while (!Repl.empty()) {
110 // Skip to the next escape.
111 std::pair Split = Repl.split('\\');
112
113 // Add the skipped substring.
114 Res += Split.first;
115
116 // Check for terminimation and trailing backslash.
117 if (Split.second.empty()) {
118 if (Repl.size() != Split.first.size() &&
119 Error && Error->empty())
120 *Error = "replacement string contained trailing backslash";
121 break;
122 }
123
124 // Otherwise update the replacement string and interpret escapes.
125 Repl = Split.second;
126
127 // FIXME: We should have a StringExtras function for mapping C99 escapes.
128 switch (Repl[0]) {
129 // Treat all unrecognized characters as self-quoting.
130 default:
131 Res += Repl[0];
132 Repl = Repl.substr(1);
133 break;
134
135 // Single character escapes.
136 case 't':
137 Res += '\t';
138 Repl = Repl.substr(1);
139 break;
140 case 'n':
141 Res += '\n';
142 Repl = Repl.substr(1);
143 break;
144
145 // Decimal escapes are backreferences.
146 case '0': case '1': case '2': case '3': case '4':
147 case '5': case '6': case '7': case '8': case '9': {
148 // Extract the backreference number.
149 StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
150 Repl = Repl.substr(Ref.size());
151
152 unsigned RefValue;
153 if (!Ref.getAsInteger(10, RefValue) &&
154 RefValue < Matches.size())
155 Res += Matches[RefValue];
156 else if (Error && Error->empty())
157 *Error = "invalid backreference string '" + Ref.str() + "'";
158 break;
159 }
160 }
161 }
162
163 // And finally the suffix.
164 Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
165
166 return Res;
167 }
6161 EXPECT_TRUE(r5.match(String));
6262 }
6363
64 TEST_F(RegexTest, Substitution) {
65 std::string Error;
66
67 EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
68
69 // Standard Escapes
70 EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error));
71 EXPECT_EQ(Error, "");
72 EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error));
73 EXPECT_EQ(Error, "");
74 EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error));
75 EXPECT_EQ(Error, "");
76 EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error));
77 EXPECT_EQ(Error, "");
78
79 EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error));
80 EXPECT_EQ(Error, "replacement string contained trailing backslash");
81
82 // Backreferences
83 EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error));
84 EXPECT_EQ(Error, "");
85
86 EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error));
87 EXPECT_EQ(Error, "");
88
89 EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
90 EXPECT_EQ(Error, "invalid backreference string '100'");
6491 }
92
93 }