llvm.org GIT mirror llvm / d6bec11
Add Support for Creating and Deleting Unicode Files and Directories in Lit This enables lit to work with unicode file names via mkdir, rm, and redirection. Lit still uses utf-8 internally, but converts to utf-16 on Windows, or just utf-8 bytes on everything else. Committed on behalf of Jason Mittertreiner Differential Revision: https://reviews.llvm.org/D56754 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355122 91177308-0d34-0410-b5e6-96231b3b80d8 Serge Guelton 5 months ago
4 changed file(s) with 40 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
2222 import lit.ShUtil as ShUtil
2323 import lit.Test as Test
2424 import lit.util
25 from lit.util import to_bytes, to_string
25 from lit.util import to_bytes, to_string, to_unicode
2626 from lit.BooleanExpression import BooleanExpression
2727
2828 class InternalShellError(Exception):
343343 stderr = StringIO()
344344 exitCode = 0
345345 for dir in args:
346 cwd = cmd_shenv.cwd
347 dir = to_unicode(dir) if kIsWindows else to_bytes(dir)
348 cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
346349 if not os.path.isabs(dir):
347 dir = os.path.realpath(os.path.join(cmd_shenv.cwd, dir))
350 dir = os.path.realpath(os.path.join(cwd, dir))
348351 if parent:
349352 lit.util.mkdir_p(dir)
350353 else:
597600 stderr = StringIO()
598601 exitCode = 0
599602 for path in args:
603 cwd = cmd_shenv.cwd
604 path = to_unicode(path) if kIsWindows else to_bytes(path)
605 cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
600606 if not os.path.isabs(path):
601 path = os.path.realpath(os.path.join(cmd_shenv.cwd, path))
607 path = os.path.realpath(os.path.join(cwd, path))
602608 if force and not os.path.exists(path):
603609 continue
604610 try:
694700 else:
695701 # Make sure relative paths are relative to the cwd.
696702 redir_filename = os.path.join(cmd_shenv.cwd, name)
703 redir_filename = to_unicode(redir_filename) \
704 if kIsWindows else to_bytes(redir_filename)
697705 fd = open(redir_filename, mode)
698706 # Workaround a Win32 and/or subprocess bug when appending.
699707 #
10951103 for i, ln in enumerate(commands):
10961104 commands[i] = re.sub(kPdbgRegex, ": '\\1'; ", ln)
10971105 if test.config.pipefail:
1098 f.write('set -o pipefail;')
1106 f.write(b'set -o pipefail;' if mode == 'wb' else 'set -o pipefail;')
10991107 if litConfig.echo_all_commands:
1100 f.write('set -x;')
1101 f.write('{ ' + '; } &&\n{ '.join(commands) + '; }')
1102 f.write('\n')
1108 f.write(b'set -x;' if mode == 'wb' else 'set -x;')
1109 if sys.version_info > (3,0) and mode == 'wb':
1110 f.write(bytes('{ ' + '; } &&\n{ '.join(commands) + '; }', 'utf-8'))
1111 else:
1112 f.write('{ ' + '; } &&\n{ '.join(commands) + '; }')
1113 f.write(b'\n' if mode == 'wb' else '\n')
11031114 f.close()
11041115
11051116 if isWin32CMDEXE:
9999 return b.encode('utf-8')
100100 except AttributeError:
101101 raise TypeError('not sure how to convert %s to %s' % (type(b), str))
102
103
104 def to_unicode(s):
105 """Return the parameter as type which supports unicode, possibly decoding
106 it.
107
108 In Python2, this is the unicode type. In Python3 it's the str type.
109
110 """
111 if isinstance(s, bytes):
112 # In Python2, this branch is taken for both 'str' and 'bytes'.
113 # In Python3, this branch is taken only for 'bytes'.
114 return s.decode('utf-8')
115 return s
102116
103117
104118 def detectCPUs():
0 # Check removing unicode
1 #
2 # RUN: mkdir -p Output/中文
3 # RUN: echo "" > Output/中文/你好.txt
4 # RUN: rm Output/中文/你好.txt
5 # RUN: echo "" > Output/中文/你好.txt
6 # RUN: rm -r Output/中文
223223 # CHECK: Exit Code: 1
224224 # CHECK: ***
225225
226 # CHECK: PASS: shtest-shell :: rm-unicode-0.txt
226227 # CHECK: PASS: shtest-shell :: sequencing-0.txt
227228 # CHECK: XFAIL: shtest-shell :: sequencing-1.txt
228229 # CHECK: PASS: shtest-shell :: valid-shell.txt