diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
commit | cd749a9c07f1de2fb8affde90537efa4bc3e7c54 (patch) | |
tree | b21f6de4e08b89bb7931806bab798fc2a5e3a686 /utils/lit/ShUtil.py | |
parent | 72621d11de5b873f1695f391eb95f0b336c3d2d4 (diff) | |
download | FreeBSD-src-cd749a9c07f1de2fb8affde90537efa4bc3e7c54.zip FreeBSD-src-cd749a9c07f1de2fb8affde90537efa4bc3e7c54.tar.gz |
Update llvm to r84119.
Diffstat (limited to 'utils/lit/ShUtil.py')
-rw-r--r-- | utils/lit/ShUtil.py | 346 |
1 files changed, 346 insertions, 0 deletions
diff --git a/utils/lit/ShUtil.py b/utils/lit/ShUtil.py new file mode 100644 index 0000000..c4bbb3d --- /dev/null +++ b/utils/lit/ShUtil.py @@ -0,0 +1,346 @@ +import itertools + +import Util +from ShCommands import Command, Pipeline, Seq + +class ShLexer: + def __init__(self, data, win32Escapes = False): + self.data = data + self.pos = 0 + self.end = len(data) + self.win32Escapes = win32Escapes + + def eat(self): + c = self.data[self.pos] + self.pos += 1 + return c + + def look(self): + return self.data[self.pos] + + def maybe_eat(self, c): + """ + maybe_eat(c) - Consume the character c if it is the next character, + returning True if a character was consumed. """ + if self.data[self.pos] == c: + self.pos += 1 + return True + return False + + def lex_arg_fast(self, c): + # Get the leading whitespace free section. + chunk = self.data[self.pos - 1:].split(None, 1)[0] + + # If it has special characters, the fast path failed. + if ('|' in chunk or '&' in chunk or + '<' in chunk or '>' in chunk or + "'" in chunk or '"' in chunk or + '\\' in chunk): + return None + + self.pos = self.pos - 1 + len(chunk) + return chunk + + def lex_arg_slow(self, c): + if c in "'\"": + str = self.lex_arg_quoted(c) + else: + str = c + while self.pos != self.end: + c = self.look() + if c.isspace() or c in "|&": + break + elif c in '><': + # This is an annoying case; we treat '2>' as a single token so + # we don't have to track whitespace tokens. + + # If the parse string isn't an integer, do the usual thing. + if not str.isdigit(): + break + + # Otherwise, lex the operator and convert to a redirection + # token. + num = int(str) + tok = self.lex_one_token() + assert isinstance(tok, tuple) and len(tok) == 1 + return (tok[0], num) + elif c == '"': + self.eat() + str += self.lex_arg_quoted('"')
+ elif not self.win32Escapes and c == '\\': + # Outside of a string, '\\' escapes everything. + self.eat() + if self.pos == self.end: + Util.warning("escape at end of quoted argument in: %r" % + self.data) + return str + str += self.eat() + else: + str += self.eat() + return str + + def lex_arg_quoted(self, delim): + str = '' + while self.pos != self.end: + c = self.eat() + if c == delim: + return str + elif c == '\\' and delim == '"': + # Inside a '"' quoted string, '\\' only escapes the quote + # character and backslash, otherwise it is preserved. + if self.pos == self.end: + Util.warning("escape at end of quoted argument in: %r" % + self.data) + return str + c = self.eat() + if c == '"': # + str += '"' + elif c == '\\': + str += '\\' + else: + str += '\\' + c + else: + str += c + Util.warning("missing quote character in %r" % self.data) + return str + + def lex_arg_checked(self, c): + pos = self.pos + res = self.lex_arg_fast(c) + end = self.pos + + self.pos = pos + reference = self.lex_arg_slow(c) + if res is not None: + if res != reference: + raise ValueError,"Fast path failure: %r != %r" % (res, reference) + if self.pos != end: + raise ValueError,"Fast path failure: %r != %r" % (self.pos, end) + return reference + + def lex_arg(self, c): + return self.lex_arg_fast(c) or self.lex_arg_slow(c) + + def lex_one_token(self): + """ + lex_one_token - Lex a single 'sh' token. """ + + c = self.eat() + if c in ';!': + return (c,) + if c == '|': + if self.maybe_eat('|'): + return ('||',) + return (c,) + if c == '&': + if self.maybe_eat('&'): + return ('&&',) + if self.maybe_eat('>'): + return ('&>',) + return (c,) + if c == '>': + if self.maybe_eat('&'): + return ('>&',) + if self.maybe_eat('>'): + return ('>>',) + return (c,) + if c == '<': + if self.maybe_eat('&'): + return ('<&',) + if self.maybe_eat('>'): + return ('<<',) + return (c,) + + return self.lex_arg(c) + + def lex(self): + while self.pos != self.end: + if self.look().isspace(): + self.eat() + else: + yield self.lex_one_token() + +### + +class ShParser: + def __init__(self, data, win32Escapes = False): + self.data = data + self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex() + + def lex(self): + try: + return self.tokens.next() + except StopIteration: + return None + + def look(self): + next = self.lex() + if next is not None: + self.tokens = itertools.chain([next], self.tokens) + return next + + def parse_command(self): + tok = self.lex() + if not tok: + raise ValueError,"empty command!" + if isinstance(tok, tuple): + raise ValueError,"syntax error near unexpected token %r" % tok[0] + + args = [tok] + redirects = [] + while 1: + tok = self.look() + + # EOF? + if tok is None: + break + + # If this is an argument, just add it to the current command. + if isinstance(tok, str): + args.append(self.lex()) + continue + + # Otherwise see if it is a terminator. + assert isinstance(tok, tuple) + if tok[0] in ('|',';','&','||','&&'): + break + + # Otherwise it must be a redirection. + op = self.lex() + arg = self.lex() + if not arg: + raise ValueError,"syntax error near token %r" % op[0] + redirects.append((op, arg)) + + return Command(args, redirects) + + def parse_pipeline(self): + negate = False + if self.look() == ('!',): + self.lex() + negate = True + + commands = [self.parse_command()] + while self.look() == ('|',): + self.lex() + commands.append(self.parse_command()) + return Pipeline(commands, negate) + + def parse(self): + lhs = self.parse_pipeline() + + while self.look(): + operator = self.lex() + assert isinstance(operator, tuple) and len(operator) == 1 + + if not self.look(): + raise ValueError, "missing argument to operator %r" % operator[0] + + # FIXME: Operator precedence!! + lhs = Seq(lhs, operator[0], self.parse_pipeline()) + + return lhs + +### + +import unittest + +class TestShLexer(unittest.TestCase): + def lex(self, str, *args, **kwargs): + return list(ShLexer(str, *args, **kwargs).lex()) + + def test_basic(self): + self.assertEqual(self.lex('a|b>c&d<e'), + ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd', + ('<',), 'e']) + + def test_redirection_tokens(self): + self.assertEqual(self.lex('a2>c'), + ['a2', ('>',), 'c']) + self.assertEqual(self.lex('a 2>c'), + ['a', ('>',2), 'c']) + + def test_quoting(self): + self.assertEqual(self.lex(""" 'a' """), + ['a']) + self.assertEqual(self.lex(""" "hello\\"world" """), + ['hello"world']) + self.assertEqual(self.lex(""" "hello\\'world" """), + ["hello\\'world"]) + self.assertEqual(self.lex(""" "hello\\\\world" """), + ["hello\\world"]) + self.assertEqual(self.lex(""" he"llo wo"rld """), + ["hello world"]) + self.assertEqual(self.lex(""" a\\ b a\\\\b """), + ["a b", "a\\b"]) + self.assertEqual(self.lex(""" "" "" """), + ["", ""]) + self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True), + ['a\\', 'b']) + +class TestShParse(unittest.TestCase): + def parse(self, str): + return ShParser(str).parse() + + def test_basic(self): + self.assertEqual(self.parse('echo hello'), + Pipeline([Command(['echo', 'hello'], [])], False)) + self.assertEqual(self.parse('echo ""'), + Pipeline([Command(['echo', ''], [])], False)) + + def test_redirection(self): + self.assertEqual(self.parse('echo hello > c'), + Pipeline([Command(['echo', 'hello'], + [((('>'),), 'c')])], False)) + self.assertEqual(self.parse('echo hello > c >> d'), + Pipeline([Command(['echo', 'hello'], [(('>',), 'c'), + (('>>',), 'd')])], False)) + self.assertEqual(self.parse('a 2>&1'), + Pipeline([Command(['a'], [(('>&',2), '1')])], False)) + + def test_pipeline(self): + self.assertEqual(self.parse('a | b'), + Pipeline([Command(['a'], []), + Command(['b'], [])], + False)) + + self.assertEqual(self.parse('a | b | c'), + Pipeline([Command(['a'], []), + Command(['b'], []), + Command(['c'], [])], + False)) + + self.assertEqual(self.parse('! a'), + Pipeline([Command(['a'], [])], + True)) + + def test_list(self): + self.assertEqual(self.parse('a ; b'), + Seq(Pipeline([Command(['a'], [])], False), + ';', + Pipeline([Command(['b'], [])], False))) + + self.assertEqual(self.parse('a & b'), + Seq(Pipeline([Command(['a'], [])], False), + '&', + Pipeline([Command(['b'], [])], False))) + + self.assertEqual(self.parse('a && b'), + Seq(Pipeline([Command(['a'], [])], False), + '&&', + Pipeline([Command(['b'], [])], False))) + + self.assertEqual(self.parse('a || b'), + Seq(Pipeline([Command(['a'], [])], False), + '||', + Pipeline([Command(['b'], [])], False))) + + self.assertEqual(self.parse('a && b || c'), + Seq(Seq(Pipeline([Command(['a'], [])], False), + '&&', + Pipeline([Command(['b'], [])], False)), + '||', + Pipeline([Command(['c'], [])], False))) + +if __name__ == '__main__': + unittest.main() |