summaryrefslogtreecommitdiffstats
path: root/utils/lit/ShUtil.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils/lit/ShUtil.py')
-rw-r--r--utils/lit/ShUtil.py346
1 files changed, 346 insertions, 0 deletions
diff --git a/utils/lit/ShUtil.py b/utils/lit/ShUtil.py
new file mode 100644
index 0000000..c4bbb3d
--- /dev/null
+++ b/utils/lit/ShUtil.py
@@ -0,0 +1,346 @@
+import itertools
+
+import Util
+from ShCommands import Command, Pipeline, Seq
+
+class ShLexer:
+ def __init__(self, data, win32Escapes = False):
+ self.data = data
+ self.pos = 0
+ self.end = len(data)
+ self.win32Escapes = win32Escapes
+
+ def eat(self):
+ c = self.data[self.pos]
+ self.pos += 1
+ return c
+
+ def look(self):
+ return self.data[self.pos]
+
+ def maybe_eat(self, c):
+ """
+ maybe_eat(c) - Consume the character c if it is the next character,
+ returning True if a character was consumed. """
+ if self.data[self.pos] == c:
+ self.pos += 1
+ return True
+ return False
+
+ def lex_arg_fast(self, c):
+ # Get the leading whitespace free section.
+ chunk = self.data[self.pos - 1:].split(None, 1)[0]
+
+ # If it has special characters, the fast path failed.
+ if ('|' in chunk or '&' in chunk or
+ '<' in chunk or '>' in chunk or
+ "'" in chunk or '"' in chunk or
+ '\\' in chunk):
+ return None
+
+ self.pos = self.pos - 1 + len(chunk)
+ return chunk
+
+ def lex_arg_slow(self, c):
+ if c in "'\"":
+ str = self.lex_arg_quoted(c)
+ else:
+ str = c
+ while self.pos != self.end:
+ c = self.look()
+ if c.isspace() or c in "|&":
+ break
+ elif c in '><':
+ # This is an annoying case; we treat '2>' as a single token so
+ # we don't have to track whitespace tokens.
+
+ # If the parse string isn't an integer, do the usual thing.
+ if not str.isdigit():
+ break
+
+ # Otherwise, lex the operator and convert to a redirection
+ # token.
+ num = int(str)
+ tok = self.lex_one_token()
+ assert isinstance(tok, tuple) and len(tok) == 1
+ return (tok[0], num)
+ elif c == '"':
+ self.eat()
+ str += self.lex_arg_quoted('"')
+ elif not self.win32Escapes and c == '\\':
+ # Outside of a string, '\\' escapes everything.
+ self.eat()
+ if self.pos == self.end:
+ Util.warning("escape at end of quoted argument in: %r" %
+ self.data)
+ return str
+ str += self.eat()
+ else:
+ str += self.eat()
+ return str
+
+ def lex_arg_quoted(self, delim):
+ str = ''
+ while self.pos != self.end:
+ c = self.eat()
+ if c == delim:
+ return str
+ elif c == '\\' and delim == '"':
+ # Inside a '"' quoted string, '\\' only escapes the quote
+ # character and backslash, otherwise it is preserved.
+ if self.pos == self.end:
+ Util.warning("escape at end of quoted argument in: %r" %
+ self.data)
+ return str
+ c = self.eat()
+ if c == '"': #
+ str += '"'
+ elif c == '\\':
+ str += '\\'
+ else:
+ str += '\\' + c
+ else:
+ str += c
+ Util.warning("missing quote character in %r" % self.data)
+ return str
+
+ def lex_arg_checked(self, c):
+ pos = self.pos
+ res = self.lex_arg_fast(c)
+ end = self.pos
+
+ self.pos = pos
+ reference = self.lex_arg_slow(c)
+ if res is not None:
+ if res != reference:
+ raise ValueError,"Fast path failure: %r != %r" % (res, reference)
+ if self.pos != end:
+ raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
+ return reference
+
+ def lex_arg(self, c):
+ return self.lex_arg_fast(c) or self.lex_arg_slow(c)
+
+ def lex_one_token(self):
+ """
+ lex_one_token - Lex a single 'sh' token. """
+
+ c = self.eat()
+ if c in ';!':
+ return (c,)
+ if c == '|':
+ if self.maybe_eat('|'):
+ return ('||',)
+ return (c,)
+ if c == '&':
+ if self.maybe_eat('&'):
+ return ('&&',)
+ if self.maybe_eat('>'):
+ return ('&>',)
+ return (c,)
+ if c == '>':
+ if self.maybe_eat('&'):
+ return ('>&',)
+ if self.maybe_eat('>'):
+ return ('>>',)
+ return (c,)
+ if c == '<':
+ if self.maybe_eat('&'):
+ return ('<&',)
+ if self.maybe_eat('>'):
+ return ('<<',)
+ return (c,)
+
+ return self.lex_arg(c)
+
+ def lex(self):
+ while self.pos != self.end:
+ if self.look().isspace():
+ self.eat()
+ else:
+ yield self.lex_one_token()
+
+###
+
+class ShParser:
+ def __init__(self, data, win32Escapes = False):
+ self.data = data
+ self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
+
+ def lex(self):
+ try:
+ return self.tokens.next()
+ except StopIteration:
+ return None
+
+ def look(self):
+ next = self.lex()
+ if next is not None:
+ self.tokens = itertools.chain([next], self.tokens)
+ return next
+
+ def parse_command(self):
+ tok = self.lex()
+ if not tok:
+ raise ValueError,"empty command!"
+ if isinstance(tok, tuple):
+ raise ValueError,"syntax error near unexpected token %r" % tok[0]
+
+ args = [tok]
+ redirects = []
+ while 1:
+ tok = self.look()
+
+ # EOF?
+ if tok is None:
+ break
+
+ # If this is an argument, just add it to the current command.
+ if isinstance(tok, str):
+ args.append(self.lex())
+ continue
+
+ # Otherwise see if it is a terminator.
+ assert isinstance(tok, tuple)
+ if tok[0] in ('|',';','&','||','&&'):
+ break
+
+ # Otherwise it must be a redirection.
+ op = self.lex()
+ arg = self.lex()
+ if not arg:
+ raise ValueError,"syntax error near token %r" % op[0]
+ redirects.append((op, arg))
+
+ return Command(args, redirects)
+
+ def parse_pipeline(self):
+ negate = False
+ if self.look() == ('!',):
+ self.lex()
+ negate = True
+
+ commands = [self.parse_command()]
+ while self.look() == ('|',):
+ self.lex()
+ commands.append(self.parse_command())
+ return Pipeline(commands, negate)
+
+ def parse(self):
+ lhs = self.parse_pipeline()
+
+ while self.look():
+ operator = self.lex()
+ assert isinstance(operator, tuple) and len(operator) == 1
+
+ if not self.look():
+ raise ValueError, "missing argument to operator %r" % operator[0]
+
+ # FIXME: Operator precedence!!
+ lhs = Seq(lhs, operator[0], self.parse_pipeline())
+
+ return lhs
+
+###
+
+import unittest
+
+class TestShLexer(unittest.TestCase):
+ def lex(self, str, *args, **kwargs):
+ return list(ShLexer(str, *args, **kwargs).lex())
+
+ def test_basic(self):
+ self.assertEqual(self.lex('a|b>c&d<e'),
+ ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd',
+ ('<',), 'e'])
+
+ def test_redirection_tokens(self):
+ self.assertEqual(self.lex('a2>c'),
+ ['a2', ('>',), 'c'])
+ self.assertEqual(self.lex('a 2>c'),
+ ['a', ('>',2), 'c'])
+
+ def test_quoting(self):
+ self.assertEqual(self.lex(""" 'a' """),
+ ['a'])
+ self.assertEqual(self.lex(""" "hello\\"world" """),
+ ['hello"world'])
+ self.assertEqual(self.lex(""" "hello\\'world" """),
+ ["hello\\'world"])
+ self.assertEqual(self.lex(""" "hello\\\\world" """),
+ ["hello\\world"])
+ self.assertEqual(self.lex(""" he"llo wo"rld """),
+ ["hello world"])
+ self.assertEqual(self.lex(""" a\\ b a\\\\b """),
+ ["a b", "a\\b"])
+ self.assertEqual(self.lex(""" "" "" """),
+ ["", ""])
+ self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True),
+ ['a\\', 'b'])
+
+class TestShParse(unittest.TestCase):
+ def parse(self, str):
+ return ShParser(str).parse()
+
+ def test_basic(self):
+ self.assertEqual(self.parse('echo hello'),
+ Pipeline([Command(['echo', 'hello'], [])], False))
+ self.assertEqual(self.parse('echo ""'),
+ Pipeline([Command(['echo', ''], [])], False))
+
+ def test_redirection(self):
+ self.assertEqual(self.parse('echo hello > c'),
+ Pipeline([Command(['echo', 'hello'],
+ [((('>'),), 'c')])], False))
+ self.assertEqual(self.parse('echo hello > c >> d'),
+ Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
+ (('>>',), 'd')])], False))
+ self.assertEqual(self.parse('a 2>&1'),
+ Pipeline([Command(['a'], [(('>&',2), '1')])], False))
+
+ def test_pipeline(self):
+ self.assertEqual(self.parse('a | b'),
+ Pipeline([Command(['a'], []),
+ Command(['b'], [])],
+ False))
+
+ self.assertEqual(self.parse('a | b | c'),
+ Pipeline([Command(['a'], []),
+ Command(['b'], []),
+ Command(['c'], [])],
+ False))
+
+ self.assertEqual(self.parse('! a'),
+ Pipeline([Command(['a'], [])],
+ True))
+
+ def test_list(self):
+ self.assertEqual(self.parse('a ; b'),
+ Seq(Pipeline([Command(['a'], [])], False),
+ ';',
+ Pipeline([Command(['b'], [])], False)))
+
+ self.assertEqual(self.parse('a & b'),
+ Seq(Pipeline([Command(['a'], [])], False),
+ '&',
+ Pipeline([Command(['b'], [])], False)))
+
+ self.assertEqual(self.parse('a && b'),
+ Seq(Pipeline([Command(['a'], [])], False),
+ '&&',
+ Pipeline([Command(['b'], [])], False)))
+
+ self.assertEqual(self.parse('a || b'),
+ Seq(Pipeline([Command(['a'], [])], False),
+ '||',
+ Pipeline([Command(['b'], [])], False)))
+
+ self.assertEqual(self.parse('a && b || c'),
+ Seq(Seq(Pipeline([Command(['a'], [])], False),
+ '&&',
+ Pipeline([Command(['b'], [])], False)),
+ '||',
+ Pipeline([Command(['c'], [])], False)))
+
+if __name__ == '__main__':
+ unittest.main()
OpenPOWER on IntegriCloud