sh: Fix various things about expansions:

* remove the backslash from \} inside double quotes inside +-=? substitutions, e.g. "${$+\}a}" * maintain separate double-quote state for ${v#...} and ${v%...}; single and double quotes are special inside, even in a double-quoted string or here document * keep track of correct order of substitutions and arithmetic This is different from dash's approach, which does not track individual double quotes in the parser, trying to fix this up during expansion. This treats single quotes inside "${v#...}" incorrectly, however. This is similar to NetBSD's approach (as submitted in PR bin/57554), but recognizes the difference between +-=? and #% substitutions hinted at in POSIX and is more refined for arithmetic expansion and here documents. PR: bin/57554 Exp-run done by: erwin (with some other sh(1) changes)
author: jilles <jilles@FreeBSD.org> 2010-04-03 20:55:56 +0000
committer: jilles <jilles@FreeBSD.org> 2010-04-03 20:55:56 +0000
commit: 631cfa174839a86bceb027b45b94ae4c25a16df6 (patch)
tree: 9ebac13ec5b096431bd0cc65726335c41fd8c3fb /bin
parent: 840173ec99f4f9addaad9214d467479f35ab3997 (diff)
download: FreeBSD-src-631cfa174839a86bceb027b45b94ae4c25a16df6.zip
FreeBSD-src-631cfa174839a86bceb027b45b94ae4c25a16df6.tar.gz
1 files changed, 208 insertions, 73 deletions
diff --git a/bin/sh/parser.c b/bin/sh/parser.c
index 3f3103a..29ccd83 100644
--- a/bin/sh/parser.c
+++ b/bin/sh/parser.c
@@ -79,6 +79,10 @@ struct heredoc {
 	int striptabs;		/* if set, strip leading tabs */
 };
 
+struct parser_temp {
+	struct parser_temp *next;
+	void *data;
+};
 
 
 STATIC struct heredoc *heredoclist;	/* list of here documents to read */
@@ -94,6 +98,7 @@ STATIC struct heredoc *heredoc;
 STATIC int quoteflag;		/* set if (part of) last token was quoted */
 STATIC int startlinno;		/* line # where last token started */
 STATIC int funclinno;		/* line # where the current function started */
+STATIC struct parser_temp *parser_temp;
 
 /* XXX When 'noaliases' is set to one, no alias expansion takes place. */
 static int noaliases = 0;
@@ -117,6 +122,73 @@ STATIC void synerror(const char *);
 STATIC void setprompt(int);
 
 
+STATIC void *
+parser_temp_alloc(size_t len)
+{
+	struct parser_temp *t;
+
+	INTOFF;
+	t = ckmalloc(sizeof(*t));
+	t->data = NULL;
+	t->next = parser_temp;
+	parser_temp = t;
+	t->data = ckmalloc(len);
+	INTON;
+	return t->data;
+}
+
+
+STATIC void *
+parser_temp_realloc(void *ptr, size_t len)
+{
+	struct parser_temp *t;
+
+	INTOFF;
+	t = parser_temp;
+	if (ptr != t->data)
+		error("bug: parser_temp_realloc misused");
+	t->data = ckrealloc(t->data, len);
+	INTON;
+	return t->data;
+}
+
+
+STATIC void
+parser_temp_free_upto(void *ptr)
+{
+	struct parser_temp *t;
+	int done = 0;
+
+	INTOFF;
+	while (parser_temp != NULL && !done) {
+		t = parser_temp;
+		parser_temp = t->next;
+		done = t->data == ptr;
+		ckfree(t->data);
+		ckfree(t);
+	}
+	INTON;
+	if (!done)
+		error("bug: parser_temp_free_upto misused");
+}
+
+
+STATIC void
+parser_temp_free_all(void)
+{
+	struct parser_temp *t;
+
+	INTOFF;
+	while (parser_temp != NULL) {
+		t = parser_temp;
+		parser_temp = t->next;
+		ckfree(t->data);
+		ckfree(t);
+	}
+	INTON;
+}
+
+
 /*
  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
  * valid parse tree indicating a blank line.)
@@ -127,6 +199,11 @@ parsecmd(int interact)
 {
 	int t;
 
+	/* This assumes the parser is not re-entered,
+	 * which could happen if we add command substitution on PS1/PS2.
+	 */
+	parser_temp_free_all();
+
 	tokpushback = 0;
 	doprompt = interact;
 	if (doprompt)
@@ -863,6 +940,21 @@ breakloop:
 }
 
 
+#define MAXNEST_STATIC 8
+struct tokenstate
+{
+	const char *syntax; /* *SYNTAX */
+	int parenlevel; /* levels of parentheses in arithmetic */
+	enum tokenstate_category
+	{
+		TSTATE_TOP,
+		TSTATE_VAR_OLD, /* ${var+-=?}, inherits dquotes */
+		TSTATE_VAR_NEW, /* other ${var...}, own dquote state */
+		TSTATE_ARITH
+	} category;
+};
+
+
 /*
  * Called to parse command substitutions.
  */
@@ -1040,7 +1132,7 @@ done:
 #define	PARSEARITH()	{goto parsearith; parsearith_return:;}
 
 STATIC int
-readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
+readtoken1(int firstc, char const *initialsyntax, char *eofmark, int striptabs)
 {
 	int c = firstc;
 	char *out;
@@ -1048,22 +1140,21 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 	char line[EOFMARKLEN + 1];
 	struct nodelist *bqlist;
 	int quotef;
-	int dblquote;
-	int varnest;	/* levels of variables expansion */
-	int arinest;	/* levels of arithmetic expansion */
-	int parenlevel;	/* levels of parens in arithmetic */
-	char const *prevsyntax;	/* syntax before arithmetic */
+	int newvarnest;
+	int level;
 	int synentry;
+	struct tokenstate state_static[MAXNEST_STATIC];
+	int maxnest = MAXNEST_STATIC;
+	struct tokenstate *state = state_static;
 
 	startlinno = plinno;
-	dblquote = 0;
-	if (syntax == DQSYNTAX)
-		dblquote = 1;
 	quotef = 0;
 	bqlist = NULL;
-	varnest = 0;
-	arinest = 0;
-	parenlevel = 0;
+	newvarnest = 0;
+	level = 0;
+	state[level].syntax = initialsyntax;
+	state[level].parenlevel = 0;
+	state[level].category = TSTATE_TOP;
 
 	STARTSTACKSTR(out);
 	loop: {	/* for each line, until end of word */
@@ -1071,11 +1162,11 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 		for (;;) {	/* until end of line or end of word */
 			CHECKSTRSPACE(3, out);	/* permit 3 calls to USTPUTC */
 
-			synentry = syntax[c];
+			synentry = state[level].syntax[c];
 
 			switch(synentry) {
 			case CNL:	/* '\n' */
-				if (syntax == BASESYNTAX)
+				if (state[level].syntax == BASESYNTAX)
 					goto endword;	/* exit outer loop */
 				USTPUTC(c, out);
 				plinno++;
@@ -1089,7 +1180,7 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 				USTPUTC(c, out);
 				break;
 			case CCTL:
-				if (eofmark == NULL || dblquote)
+				if (eofmark == NULL || initialsyntax != SQSYNTAX)
 					USTPUTC(CTLESC, out);
 				USTPUTC(c, out);
 				break;
@@ -1105,41 +1196,37 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 					else
 						setprompt(0);
 				} else {
-					if (dblquote && c != '\\' &&
-					    c != '`' && c != '$' &&
-					    (c != '"' || eofmark != NULL))
+					if (state[level].syntax == DQSYNTAX &&
+					    c != '\\' && c != '`' && c != '$' &&
+					    (c != '"' || (eofmark != NULL &&
+						newvarnest == 0)) &&
+					    (c != '}' || state[level].category != TSTATE_VAR_OLD))
 						USTPUTC('\\', out);
 					if (SQSYNTAX[c] == CCTL)
 						USTPUTC(CTLESC, out);
-					else if (eofmark == NULL)
+					else if (eofmark == NULL ||
+					    newvarnest > 0)
 						USTPUTC(CTLQUOTEMARK, out);
 					USTPUTC(c, out);
 					quotef++;
 				}
 				break;
 			case CSQUOTE:
-				if (eofmark == NULL)
-					USTPUTC(CTLQUOTEMARK, out);
-				syntax = SQSYNTAX;
+				USTPUTC(CTLQUOTEMARK, out);
+				state[level].syntax = SQSYNTAX;
 				break;
 			case CDQUOTE:
-				if (eofmark == NULL)
-					USTPUTC(CTLQUOTEMARK, out);
-				syntax = DQSYNTAX;
-				dblquote = 1;
+				USTPUTC(CTLQUOTEMARK, out);
+				state[level].syntax = DQSYNTAX;
 				break;
 			case CENDQUOTE:
-				if (eofmark != NULL && arinest == 0 &&
-				    varnest == 0) {
+				if (eofmark != NULL && newvarnest == 0)
 					USTPUTC(c, out);
-				} else {
-					if (arinest) {
-						syntax = ARISYNTAX;
-						dblquote = 0;
-					} else if (eofmark == NULL) {
-						syntax = BASESYNTAX;
-						dblquote = 0;
-					}
+				else {
+					if (state[level].category == TSTATE_ARITH)
+						state[level].syntax = ARISYNTAX;
+					else
+						state[level].syntax = BASESYNTAX;
 					quotef++;
 				}
 				break;
@@ -1147,30 +1234,33 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 				PARSESUB();		/* parse substitution */
 				break;
 			case CENDVAR:	/* '}' */
-				if (varnest > 0) {
-					varnest--;
+				if (level > 0 &&
+				    (state[level].category == TSTATE_VAR_OLD ||
+				    state[level].category == TSTATE_VAR_NEW)) {
+					if (state[level].category == TSTATE_VAR_OLD)
+						state[level - 1].syntax = state[level].syntax;
+					else
+						newvarnest--;
+					level--;
 					USTPUTC(CTLENDVAR, out);
 				} else {
 					USTPUTC(c, out);
 				}
 				break;
 			case CLP:	/* '(' in arithmetic */
-				parenlevel++;
+				state[level].parenlevel++;
 				USTPUTC(c, out);
 				break;
 			case CRP:	/* ')' in arithmetic */
-				if (parenlevel > 0) {
+				if (state[level].parenlevel > 0) {
 					USTPUTC(c, out);
-					--parenlevel;
+					--state[level].parenlevel;
 				} else {
 					if (pgetc() == ')') {
-						if (--arinest == 0) {
+						if (level > 0 &&
+						    state[level].category == TSTATE_ARITH) {
+							level--;
 							USTPUTC(CTLENDARI, out);
-							syntax = prevsyntax;
-							if (syntax == DQSYNTAX)
-								dblquote = 1;
-							else
-								dblquote = 0;
 						} else
 							USTPUTC(')', out);
 					} else {
@@ -1184,13 +1274,15 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 				}
 				break;
 			case CBQUOTE:	/* '`' */
-				out = parsebackq(out, &bqlist, 1, dblquote,
-						arinest || dblquote);
+				out = parsebackq(out, &bqlist, 1,
+				    state[level].syntax == DQSYNTAX &&
+				    (eofmark == NULL || newvarnest > 0),
+				    state[level].syntax == DQSYNTAX || state[level].syntax == ARISYNTAX);
 				break;
 			case CEOF:
 				goto endword;		/* exit outer loop */
 			default:
-				if (varnest == 0)
+				if (level == 0)
 					goto endword;	/* exit outer loop */
 				USTPUTC(c, out);
 			}
@@ -1198,14 +1290,17 @@ readtoken1(int firstc, char const *syntax, char *eofmark, int striptabs)
 		}
 	}
 endword:
-	if (syntax == ARISYNTAX)
+	if (state[level].syntax == ARISYNTAX)
 		synerror("Missing '))'");
-	if (syntax != BASESYNTAX && eofmark == NULL)
+	if (state[level].syntax != BASESYNTAX && eofmark == NULL)
 		synerror("Unterminated quoted string");
-	if (varnest != 0) {
+	if (state[level].category == TSTATE_VAR_OLD ||
+	    state[level].category == TSTATE_VAR_NEW) {
 		startlinno = plinno;
 		synerror("Missing '}'");
 	}
+	if (state != state_static)
+		parser_temp_free_upto(state);
 	USTPUTC('\0', out);
 	len = out - stackblock();
 	out = stackblock();
@@ -1228,7 +1323,6 @@ endword:
 /* end of readtoken routine */
 
 
-
 /*
  * Check to see whether we are at the end of the here document.  When this
  * is called, c is set to the first character of the next input line.  If
@@ -1345,8 +1439,11 @@ parsesub: {
 			PARSEARITH();
 		} else {
 			pungetc();
-			out = parsebackq(out, &bqlist, 0, dblquote,
-					arinest || dblquote);
+			out = parsebackq(out, &bqlist, 0,
+			    state[level].syntax == DQSYNTAX &&
+			    (eofmark == NULL || newvarnest > 0),
+			    state[level].syntax == DQSYNTAX ||
+			    state[level].syntax == ARISYNTAX);
 		}
 	} else {
 		USTPUTC(CTLVAR, out);
@@ -1446,11 +1543,44 @@ parsesub: {
 			pungetc();
 		}
 		STPUTC('=', out);
-		if (subtype != VSLENGTH && (dblquote || arinest))
+		if (subtype != VSLENGTH && (state[level].syntax == DQSYNTAX ||
+		    state[level].syntax == ARISYNTAX))
 			flags |= VSQUOTE;
 		*(stackblock() + typeloc) = subtype | flags;
-		if (subtype != VSNORMAL)
-			varnest++;
+		if (subtype != VSNORMAL) {
+			if (level + 1 >= maxnest) {
+				maxnest *= 2;
+				if (state == state_static) {
+					state = parser_temp_alloc(
+					    maxnest * sizeof(*state));
+					memcpy(state, state_static,
+					    MAXNEST_STATIC * sizeof(*state));
+				} else
+					state = parser_temp_realloc(state,
+					    maxnest * sizeof(*state));
+			}
+			level++;
+			state[level].parenlevel = 0;
+			if (subtype == VSMINUS || subtype == VSPLUS ||
+			    subtype == VSQUESTION || subtype == VSASSIGN) {
+				/*
+				 * For operators that were in the Bourne shell,
+				 * inherit the double-quote state.
+				 */
+				state[level].syntax = state[level - 1].syntax;
+				state[level].category = TSTATE_VAR_OLD;
+			} else {
+				/*
+				 * The other operators take a pattern,
+				 * so go to BASESYNTAX.
+				 * Also, ' and " are now special, even
+				 * in here documents.
+				 */
+				state[level].syntax = BASESYNTAX;
+				state[level].category = TSTATE_VAR_NEW;
+				newvarnest++;
+			}
+		}
 	}
 	goto parsesub_return;
 }
@@ -1461,21 +1591,26 @@ parsesub: {
  */
 parsearith: {
 
-	if (++arinest == 1) {
-		prevsyntax = syntax;
-		syntax = ARISYNTAX;
-		USTPUTC(CTLARI, out);
-		if (dblquote)
-			USTPUTC('"',out);
-		else
-			USTPUTC(' ',out);
-	} else {
-		/*
-		 * we collapse embedded arithmetic expansion to
-		 * parenthesis, which should be equivalent
-		 */
-		USTPUTC('(', out);
+	if (level + 1 >= maxnest) {
+		maxnest *= 2;
+		if (state == state_static) {
+			state = parser_temp_alloc(
+			    maxnest * sizeof(*state));
+			memcpy(state, state_static,
+			    MAXNEST_STATIC * sizeof(*state));
+		} else
+			state = parser_temp_realloc(state,
+			    maxnest * sizeof(*state));
 	}
+	level++;
+	state[level].syntax = ARISYNTAX;
+	state[level].parenlevel = 0;
+	state[level].category = TSTATE_ARITH;
+	USTPUTC(CTLARI, out);
+	if (state[level - 1].syntax == DQSYNTAX)
+		USTPUTC('"',out);
+	else
+		USTPUTC(' ',out);
 	goto parsearith_return;
 }
author	jilles <jilles@FreeBSD.org>	2010-04-03 20:55:56 +0000
committer	jilles <jilles@FreeBSD.org>	2010-04-03 20:55:56 +0000
commit	631cfa174839a86bceb027b45b94ae4c25a16df6 (patch)
tree	9ebac13ec5b096431bd0cc65726335c41fd8c3fb /bin
parent	840173ec99f4f9addaad9214d467479f35ab3997 (diff)
download	FreeBSD-src-631cfa174839a86bceb027b45b94ae4c25a16df6.zip FreeBSD-src-631cfa174839a86bceb027b45b94ae4c25a16df6.tar.gz