diff options
-rw-r--r-- | bin/sh/parser.c | 281 | ||||
-rw-r--r-- | tools/regression/bin/sh/expansion/plus-minus2.0 | 4 | ||||
-rw-r--r-- | tools/regression/bin/sh/parser/heredoc2.0 | 44 |
3 files changed, 256 insertions, 73 deletions
diff --git a/bin/sh/parser.c b/bin/sh/parser.c index 3f3103a..29ccd83 100644 --- a/bin/sh/parser.c +++ b/bin/sh/parser.c @@ -79,6 +79,10 @@ struct heredoc { int striptabs; /* if set, strip leading tabs */ }; +struct parser_temp { + struct parser_temp *next; + void *data; +}; STATIC struct heredoc *heredoclist; /* list of here documents to read */ @@ -94,6 +98,7 @@ STATIC struct heredoc *heredoc; STATIC int quoteflag; /* set if (part of) last token was quoted */ STATIC int startlinno; /* line # where last token started */ STATIC int funclinno; /* line # where the current function started */ +STATIC struct parser_temp *parser_temp; /* XXX When 'noaliases' is set to one, no alias expansion takes place. */ static int noaliases = 0; @@ -117,6 +122,73 @@ STATIC void synerror(const char *); STATIC void setprompt(int); +STATIC void * +parser_temp_alloc(size_t len) +{ + struct parser_temp *t; + + INTOFF; + t = ckmalloc(sizeof(*t)); + t->data = NULL; + t->next = parser_temp; + parser_temp = t; + t->data = ckmalloc(len); + INTON; + return t->data; +} + + +STATIC void * +parser_temp_realloc(void *ptr, size_t len) +{ + struct parser_temp *t; + + INTOFF; + t = parser_temp; + if (ptr != t->data) + error("bug: parser_temp_realloc misused"); + t->data = ckrealloc(t->data, len); + INTON; + return t->data; +} + + +STATIC void +parser_temp_free_upto(void *ptr) +{ + struct parser_temp *t; + int done = 0; + + INTOFF; + while (parser_temp != NULL && !done) { + t = parser_temp; + parser_temp = t->next; + done = t->data == ptr; + ckfree(t->data); + ckfree(t); + } + INTON; + if (!done) + error("bug: parser_temp_free_upto misused"); +} + + +STATIC void +parser_temp_free_all(void) +{ + struct parser_temp *t; + + INTOFF; + while (parser_temp != NULL) { + t = parser_temp; + parser_temp = t->next; + ckfree(t->data); + ckfree(t); + } + INTON; +} + + /* * Read and parse a command. Returns NEOF on end of file. (NULL is a * valid parse tree indicating a blank line.) @@ -127,6 +199,11 @@ parsecmd(int interact) { int t; + /* This assumes the parser is not re-entered, + * which could happen if we add command substitution on PS1/PS2. + */ + parser_temp_free_all(); + tokpushback = 0; doprompt = interact; if (doprompt) @@ -863,6 +940,21 @@ breakloop: } +#define MAXNEST_STATIC 8 +struct tokenstate +{ + const char *syntax; /* *SYNTAX */ + int parenlevel; /* levels of parentheses in arithmetic */ + enum tokenstate_category + { + TSTATE_TOP, + TSTATE_VAR_OLD, /* ${var+-=?}, inherits dquotes */ + TSTATE_VAR_NEW, /* other ${var...}, own dquote state */ + TSTATE_ARITH + } category; +}; + + /* * Called to parse command substitutions. */ @@ -1040,7 +1132,7 @@ done: #define PARSEARITH() {goto parsearith; parsearith_return:;} STATIC int -readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) +readtoken1(int firstc, char const *initialsyntax, char *eofmark, int striptabs) { int c = firstc; char *out; @@ -1048,22 +1140,21 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) char line[EOFMARKLEN + 1]; struct nodelist *bqlist; int quotef; - int dblquote; - int varnest; /* levels of variables expansion */ - int arinest; /* levels of arithmetic expansion */ - int parenlevel; /* levels of parens in arithmetic */ - char const *prevsyntax; /* syntax before arithmetic */ + int newvarnest; + int level; int synentry; + struct tokenstate state_static[MAXNEST_STATIC]; + int maxnest = MAXNEST_STATIC; + struct tokenstate *state = state_static; startlinno = plinno; - dblquote = 0; - if (syntax == DQSYNTAX) - dblquote = 1; quotef = 0; bqlist = NULL; - varnest = 0; - arinest = 0; - parenlevel = 0; + newvarnest = 0; + level = 0; + state[level].syntax = initialsyntax; + state[level].parenlevel = 0; + state[level].category = TSTATE_TOP; STARTSTACKSTR(out); loop: { /* for each line, until end of word */ @@ -1071,11 +1162,11 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) for (;;) { /* until end of line or end of word */ CHECKSTRSPACE(3, out); /* permit 3 calls to USTPUTC */ - synentry = syntax[c]; + synentry = state[level].syntax[c]; switch(synentry) { case CNL: /* '\n' */ - if (syntax == BASESYNTAX) + if (state[level].syntax == BASESYNTAX) goto endword; /* exit outer loop */ USTPUTC(c, out); plinno++; @@ -1089,7 +1180,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) USTPUTC(c, out); break; case CCTL: - if (eofmark == NULL || dblquote) + if (eofmark == NULL || initialsyntax != SQSYNTAX) USTPUTC(CTLESC, out); USTPUTC(c, out); break; @@ -1105,41 +1196,37 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) else setprompt(0); } else { - if (dblquote && c != '\\' && - c != '`' && c != '$' && - (c != '"' || eofmark != NULL)) + if (state[level].syntax == DQSYNTAX && + c != '\\' && c != '`' && c != '$' && + (c != '"' || (eofmark != NULL && + newvarnest == 0)) && + (c != '}' || state[level].category != TSTATE_VAR_OLD)) USTPUTC('\\', out); if (SQSYNTAX[c] == CCTL) USTPUTC(CTLESC, out); - else if (eofmark == NULL) + else if (eofmark == NULL || + newvarnest > 0) USTPUTC(CTLQUOTEMARK, out); USTPUTC(c, out); quotef++; } break; case CSQUOTE: - if (eofmark == NULL) - USTPUTC(CTLQUOTEMARK, out); - syntax = SQSYNTAX; + USTPUTC(CTLQUOTEMARK, out); + state[level].syntax = SQSYNTAX; break; case CDQUOTE: - if (eofmark == NULL) - USTPUTC(CTLQUOTEMARK, out); - syntax = DQSYNTAX; - dblquote = 1; + USTPUTC(CTLQUOTEMARK, out); + state[level].syntax = DQSYNTAX; break; case CENDQUOTE: - if (eofmark != NULL && arinest == 0 && - varnest == 0) { + if (eofmark != NULL && newvarnest == 0) USTPUTC(c, out); - } else { - if (arinest) { - syntax = ARISYNTAX; - dblquote = 0; - } else if (eofmark == NULL) { - syntax = BASESYNTAX; - dblquote = 0; - } + else { + if (state[level].category == TSTATE_ARITH) + state[level].syntax = ARISYNTAX; + else + state[level].syntax = BASESYNTAX; quotef++; } break; @@ -1147,30 +1234,33 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) PARSESUB(); /* parse substitution */ break; case CENDVAR: /* '}' */ - if (varnest > 0) { - varnest--; + if (level > 0 && + (state[level].category == TSTATE_VAR_OLD || + state[level].category == TSTATE_VAR_NEW)) { + if (state[level].category == TSTATE_VAR_OLD) + state[level - 1].syntax = state[level].syntax; + else + newvarnest--; + level--; USTPUTC(CTLENDVAR, out); } else { USTPUTC(c, out); } break; case CLP: /* '(' in arithmetic */ - parenlevel++; + state[level].parenlevel++; USTPUTC(c, out); break; case CRP: /* ')' in arithmetic */ - if (parenlevel > 0) { + if (state[level].parenlevel > 0) { USTPUTC(c, out); - --parenlevel; + --state[level].parenlevel; } else { if (pgetc() == ')') { - if (--arinest == 0) { + if (level > 0 && + state[level].category == TSTATE_ARITH) { + level--; USTPUTC(CTLENDARI, out); - syntax = prevsyntax; - if (syntax == DQSYNTAX) - dblquote = 1; - else - dblquote = 0; } else USTPUTC(')', out); } else { @@ -1184,13 +1274,15 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) } break; case CBQUOTE: /* '`' */ - out = parsebackq(out, &bqlist, 1, dblquote, - arinest || dblquote); + out = parsebackq(out, &bqlist, 1, + state[level].syntax == DQSYNTAX && + (eofmark == NULL || newvarnest > 0), + state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX); break; case CEOF: goto endword; /* exit outer loop */ default: - if (varnest == 0) + if (level == 0) goto endword; /* exit outer loop */ USTPUTC(c, out); } @@ -1198,14 +1290,17 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs) } } endword: - if (syntax == ARISYNTAX) + if (state[level].syntax == ARISYNTAX) synerror("Missing '))'"); - if (syntax != BASESYNTAX && eofmark == NULL) + if (state[level].syntax != BASESYNTAX && eofmark == NULL) synerror("Unterminated quoted string"); - if (varnest != 0) { + if (state[level].category == TSTATE_VAR_OLD || + state[level].category == TSTATE_VAR_NEW) { startlinno = plinno; synerror("Missing '}'"); } + if (state != state_static) + parser_temp_free_upto(state); USTPUTC('\0', out); len = out - stackblock(); out = stackblock(); @@ -1228,7 +1323,6 @@ endword: /* end of readtoken routine */ - /* * Check to see whether we are at the end of the here document. When this * is called, c is set to the first character of the next input line. If @@ -1345,8 +1439,11 @@ parsesub: { PARSEARITH(); } else { pungetc(); - out = parsebackq(out, &bqlist, 0, dblquote, - arinest || dblquote); + out = parsebackq(out, &bqlist, 0, + state[level].syntax == DQSYNTAX && + (eofmark == NULL || newvarnest > 0), + state[level].syntax == DQSYNTAX || + state[level].syntax == ARISYNTAX); } } else { USTPUTC(CTLVAR, out); @@ -1446,11 +1543,44 @@ parsesub: { pungetc(); } STPUTC('=', out); - if (subtype != VSLENGTH && (dblquote || arinest)) + if (subtype != VSLENGTH && (state[level].syntax == DQSYNTAX || + state[level].syntax == ARISYNTAX)) flags |= VSQUOTE; *(stackblock() + typeloc) = subtype | flags; - if (subtype != VSNORMAL) - varnest++; + if (subtype != VSNORMAL) { + if (level + 1 >= maxnest) { + maxnest *= 2; + if (state == state_static) { + state = parser_temp_alloc( + maxnest * sizeof(*state)); + memcpy(state, state_static, + MAXNEST_STATIC * sizeof(*state)); + } else + state = parser_temp_realloc(state, + maxnest * sizeof(*state)); + } + level++; + state[level].parenlevel = 0; + if (subtype == VSMINUS || subtype == VSPLUS || + subtype == VSQUESTION || subtype == VSASSIGN) { + /* + * For operators that were in the Bourne shell, + * inherit the double-quote state. + */ + state[level].syntax = state[level - 1].syntax; + state[level].category = TSTATE_VAR_OLD; + } else { + /* + * The other operators take a pattern, + * so go to BASESYNTAX. + * Also, ' and " are now special, even + * in here documents. + */ + state[level].syntax = BASESYNTAX; + state[level].category = TSTATE_VAR_NEW; + newvarnest++; + } + } } goto parsesub_return; } @@ -1461,21 +1591,26 @@ parsesub: { */ parsearith: { - if (++arinest == 1) { - prevsyntax = syntax; - syntax = ARISYNTAX; - USTPUTC(CTLARI, out); - if (dblquote) - USTPUTC('"',out); - else - USTPUTC(' ',out); - } else { - /* - * we collapse embedded arithmetic expansion to - * parenthesis, which should be equivalent - */ - USTPUTC('(', out); + if (level + 1 >= maxnest) { + maxnest *= 2; + if (state == state_static) { + state = parser_temp_alloc( + maxnest * sizeof(*state)); + memcpy(state, state_static, + MAXNEST_STATIC * sizeof(*state)); + } else + state = parser_temp_realloc(state, + maxnest * sizeof(*state)); } + level++; + state[level].syntax = ARISYNTAX; + state[level].parenlevel = 0; + state[level].category = TSTATE_ARITH; + USTPUTC(CTLARI, out); + if (state[level - 1].syntax == DQSYNTAX) + USTPUTC('"',out); + else + USTPUTC(' ',out); goto parsearith_return; } diff --git a/tools/regression/bin/sh/expansion/plus-minus2.0 b/tools/regression/bin/sh/expansion/plus-minus2.0 new file mode 100644 index 0000000..f5a8752 --- /dev/null +++ b/tools/regression/bin/sh/expansion/plus-minus2.0 @@ -0,0 +1,4 @@ +# $FreeBSD$ + +e= +test "${e:-\}}" = '}' diff --git a/tools/regression/bin/sh/parser/heredoc2.0 b/tools/regression/bin/sh/parser/heredoc2.0 new file mode 100644 index 0000000..b239520 --- /dev/null +++ b/tools/regression/bin/sh/parser/heredoc2.0 @@ -0,0 +1,44 @@ +# $FreeBSD$ + +failures=0 + +check() { + if ! eval "[ $* ]"; then + echo "Failed: $*" + : $((failures += 1)) + fi +} + +s='ast*que?non' sq=\' dq=\" + +check '"$(cat <<EOF +${s} +EOF +)" = "ast*que?non"' + +check '"$(cat <<EOF +${s+"x"} +EOF +)" = ${dq}x${dq}' + +check '"$(cat <<EOF +${s+'$sq'x'$sq'} +EOF +)" = ${sq}x${sq}' + +check '"$(cat <<EOF +${s#ast} +EOF +)" = "*que?non"' + +check '"$(cat <<EOF +${s##"ast"} +EOF +)" = "*que?non"' + +check '"$(cat <<EOF +${s##'$sq'ast'$sq'} +EOF +)" = "*que?non"' + +exit $((failures != 0)) |