llvm.org GIT mirror llvm / 14a5c69
Revert r188376, "[lit] Support parsing scripts with inconsistent or invalid encodings.", this doesn't work yet for bots using the internal shell. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188379 91177308-0d34-0410-b5e6-96231b3b80d8 Daniel Dunbar 6 years ago
2 changed file(s) with 12 addition(s) and 45 deletion(s). Raw diff Collapse all Expand all
304304
305305 return False
306306
307 def parseIntegratedTestScriptCommands(source_path):
307 def parseIntegratedTestScriptCommands(sourcepath):
308308 """
309309 parseIntegratedTestScriptCommands(source_path) -> commands
310310
311311 Parse the commands in an integrated test script file into a list of
312312 (line_number, command_type, line).
313313 """
314
315 # This code is carefully written to be dual compatible with Python 2.5+ and
316 # Python 3 without requiring input files to always have valid codings. The
317 # trick we use is to open the file in binary mode and use the regular
318 # expression library to find the commands, with it scanning strings in
319 # Python2 and bytes in Python3.
320 #
321 # Once we find a match, we do require each script line to be decodable to
322 # ascii, so we convert the outputs to ascii before returning. This way the
323 # remaining code can work with "strings" agnostic of the executing Python
324 # version.
325
326 def to_bytes(str):
327 # Encode to Latin1 to get binary data.
328 return str.encode('ISO-8859-1')
329 keywords = ('RUN:', 'XFAIL:', 'REQUIRES:', 'END.')
330 keywords_re = re.compile(
331 to_bytes("(%s)(.*)\n" % ("|".join(k for k in keywords),)))
332
333 f = open(source_path, 'rb')
334 try:
335 # Read the entire file contents.
336 data = f.read()
337
338 # Iterate over the matches.
339 line_number = 1
340 last_match_position = 0
341 for match in keywords_re.finditer(data):
342 # Compute the updated line number by counting the intervening
343 # newlines.
344 match_position = match.start()
345 line_number += data.count(to_bytes('\n'), last_match_position,
346 match_position)
347 last_match_position = match_position
348
349 # Convert the keyword and line to ascii and yield the command.
350 keyword,ln = match.groups()
351 yield (line_number, keyword[:-1].decode('ascii'),
352 ln.decode('ascii'))
353 finally:
354 f.close()
314 line_number = 0
315 for ln in open(sourcepath):
316 line_number += 1
317 if 'RUN:' in ln:
318 yield (line_number, 'RUN', ln[ln.index('RUN:')+4:])
319 elif 'XFAIL:' in ln:
320 yield (line_number, 'XFAIL', ln[ln.index('XFAIL:') + 6:])
321 elif 'REQUIRES:' in ln:
322 yield (line_number, 'REQUIRES', ln[ln.index('REQUIRES:') + 9:])
323 elif 'END.' in ln:
324 yield (line_number, 'END', ln[ln.index('END.') + 4:])
355325
356326 def parseIntegratedTestScript(test, normalize_slashes=False,
357327 extra_substitutions=[]):
+0
-3
utils/lit/tests/shtest-encoding.py less more
None # RUN: true
1
2 # Here is a string that cannot be decoded in line mode: Â.