From 2e02d4aae7250e97c7dd2fea71741c365474985a Mon Sep 17 00:00:00 2001 From: wollman Date: Fri, 22 Mar 2002 20:18:26 +0000 Subject: Make expr POSIX-compliant, and fix some bugs. Specifically: - expr must conform to the Utility Syntax Guidelines, so use getopt() to eat the (non-existent) options. - Use the Standard type intmax_t for arithmetic. - If an argument cannot be *completely* converted to an integer, then it is a string. Additionally make some style cleanups near the modified lines. This utility is still not completely style-compliant. --- bin/expr/expr.1 | 113 ++++++++++++++++++++++++++----- bin/expr/expr.y | 206 ++++++++++++++++++++++++++++++++------------------------ 2 files changed, 215 insertions(+), 104 deletions(-) (limited to 'bin/expr') diff --git a/bin/expr/expr.1 b/bin/expr/expr.1 index 9da5269..2c4fb792 100644 --- a/bin/expr/expr.1 +++ b/bin/expr/expr.1 @@ -30,7 +30,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 3, 1993 +.Dd March 22, 2002 .Dt EXPR 1 .Os .Sh NAME @@ -38,6 +38,7 @@ .Nd evaluate expression .Sh SYNOPSIS .Nm +.Op Fl \&- .Ar expression .Sh DESCRIPTION The @@ -46,12 +47,21 @@ utility evaluates .Ar expression and writes the result on standard output. .Pp -All operators are separate arguments to the +All operators and operands must be passed as separate arguments. +Several of the operators have special meaning to command interpreters +and must therefore be quoted appropriately. +.Pp +Arithmetic operations are performed using signed integer math, +in the largest integral type available in the C language. The .Nm -utility. -Characters special to the command interpreter must be escaped. +utility will detect arithmetic overflow and division by zero, and +returns with an exit status of 2 in those cases. If a numeric operand +is specified which is so large as to overflow conversion to an integer, +it is parsed as a string instead. All numeric operands are interpreted +in base 10. .Pp -Operators are listed below in order of increasing precedence. +Operators are listed below in order of increasing precedence; all +are left-associative. Operators with equal precedence are grouped within { } symbols. .Bl -tag -width indent .It Ar expr1 Li | Ar expr2 @@ -82,8 +92,9 @@ operator matches .Ar expr1 against .Ar expr2 , -which must be a regular expression. The regular expression is anchored -to the beginning of the string with an implicit +which must be a basic regular expression. +The regular expression is anchored +to the beginning of the string with an implicit .Dq ^ . .Pp If the match succeeds and the pattern contains at least one regular @@ -99,19 +110,89 @@ otherwise 0. .El .Pp Parentheses are used for grouping in the usual manner. +.Pp +This version of +.Nm +adheres to the +.Tn POSIX +Utility Syntax Guidelines, which require that a leading argument beginning +with a minus sign be considered an option to the program. +The standard +.Ql \&-- +syntax may be used to prevent this interpretation. +However, many historic implementations of +.Nm , +including the one in previous versions of +.Fx , +will not permit this syntax. +See the examples below for portable ways to guarantee the correct +interpretation. +.Pp +The +.Nm +utility makes no lexical distinction between arguments which may be +operators and arguments which may be operands. +An operand which is lexically identical to an operator will be considered a +syntax error. +See the examples below for a work-around. +.Pp +The syntax of the +.Nm +command in general is historic and inconvenient. +New applications are advised to use shell arithmetic rather than +.Nm . .Sh EXAMPLES .Bl -enum .It -The following example adds one to the variable a. -.Dl a=`expr $a + 1` +The following example (in +.Xr sh 1 +syntax) adds one to the variable +.Va a . +.Dl a=$(expr $a + 1) +.Li +This will fail if the value if +.Va a +is a negative number. +To protect negative values of +.Va a +from being interpreted as options to the +.Nm +command, one might rearrange the expression: +.Dl a=$(expr 1 + $a) +.Li +More generally, parenthesize possibly-negative values: +.Dl a=$(expr \e( $a \e) + 1) .It -The following example returns the filename portion of a pathname stored -in variable a. The // characters act to eliminate ambiguity with the -division operator. -.Dl expr "//$a" Li : '.*/\e(.*\e)' +The following example prints the filename portion of a pathname stored +in variable +.Va a . +Since +.Va a +might represent the path +.Pa / , +it is necessary to prevent it from being interpreted as the division operator. +The +.Li // +characters resolve this ambiguity. +.Dl expr \*q//$a\*q \&: '.*/\e(.*\e)' .It -The following example returns the number of characters in variable a. -.Dl expr $a Li : '.*' +The following examples output the number of characters in variable +.Va a . +Again, if +.Va a +might begin with a hyphen, it is necessary to prevent it from being +interpreted as an option to +.Nm . +If the +.Nm +command conforms to +.St -p1003.1-2001 , +this is simple: +.Dl expr -- \*q$a\*q \&: \*q.*\*q +.Li +For portability to older systems, however, a more complicated command +is required: +.Dl expr \e( \*qX$a\*q \&: \*q.*\*q \e) - 1 .El .Sh DIAGNOSTICS The @@ -132,4 +213,4 @@ the expression is invalid. The .Nm utility conforms to -.St -p1003.2 . +.St -p1003.1-2001 . diff --git a/bin/expr/expr.y b/bin/expr/expr.y index d368ce3..bc40bf3 100644 --- a/bin/expr/expr.y +++ b/bin/expr/expr.y @@ -8,16 +8,25 @@ */ #include -#include -#include -#include -#include + #include #include #include -#include +#include #include +#include +#include +#include +#include +#include +#include +/* + * POSIX specifies a specific error code for syntax errors. We exit + * with this code for all errors. + */ +#define ERR_EXIT 2 + enum valtype { integer, numeric_string, string } ; @@ -26,20 +35,20 @@ struct val { enum valtype type; union { char *s; - quad_t i; + intmax_t i; } u; } ; struct val *result; -int chk_div(quad_t, quad_t); -int chk_minus(quad_t, quad_t, quad_t); -int chk_plus(quad_t, quad_t, quad_t); -int chk_times(quad_t, quad_t, quad_t); +int chk_div(intmax_t, intmax_t); +int chk_minus(intmax_t, intmax_t, intmax_t); +int chk_plus(intmax_t, intmax_t, intmax_t); +int chk_times(intmax_t, intmax_t, intmax_t); void free_value(struct val *); int is_zero_or_null(struct val *); int isstring(struct val *); -struct val *make_integer(quad_t); +struct val *make_integer(intmax_t); struct val *make_str(const char *); struct val *op_and(struct val *, struct val *); struct val *op_colon(struct val *, struct val *); @@ -55,7 +64,7 @@ struct val *op_or(struct val *, struct val *); struct val *op_plus(struct val *, struct val *); struct val *op_rem(struct val *, struct val *); struct val *op_times(struct val *, struct val *); -quad_t to_integer(struct val *); +intmax_t to_integer(struct val *); void to_string(struct val *); int yyerror(const char *); int yylex(void); @@ -105,13 +114,13 @@ expr: TOKEN %% struct val * -make_integer(quad_t i) +make_integer(intmax_t i) { struct val *vp; vp = (struct val *) malloc (sizeof (*vp)); if (vp == NULL) { - errx (2, "malloc() failed"); + errx(ERR_EXIT, "malloc() failed"); } vp->type = integer; @@ -123,26 +132,34 @@ struct val * make_str(const char *s) { struct val *vp; - size_t i; - int isint; + char *ep; vp = (struct val *) malloc (sizeof (*vp)); if (vp == NULL || ((vp->u.s = strdup (s)) == NULL)) { - errx (2, "malloc() failed"); + errx(ERR_EXIT, "malloc() failed"); } - for(i = 1, isint = isdigit(s[0]) || s[0] == '-'; - isint && i < strlen(s); - i++) - { - if(!isdigit(s[i])) - isint = 0; - } - - if (isint) - vp->type = numeric_string; - else + /* + * Previously we tried to scan the string to see if it ``looked like'' + * an integer (erroneously, as it happened). Let strtoimax() do the + * dirty work. We could cache the value, except that we are using + * a union and need to preserve the original string form until we + * are certain that it is not needed. + * + * IEEE Std.1003.1-2001 says: + * /integer/ An argument consisting only of an (optional) unary minus + * followed by digits. + * + * This means that arguments which consist of digits followed by + * non-digits MUST NOT be considered integers. strtoimax() will + * figure this out for us. + */ + (void)strtoimax(s, &ep, 10); + + if (*ep != '\0') vp->type = string; + else + vp->type = numeric_string; return vp; } @@ -156,10 +173,10 @@ free_value(struct val *vp) } -quad_t +intmax_t to_integer(struct val *vp) { - quad_t i; + intmax_t i; if (vp->type == integer) return 1; @@ -169,10 +186,10 @@ to_integer(struct val *vp) /* vp->type == numeric_string, make it numeric */ errno = 0; - i = strtoq(vp->u.s, (char**)NULL, 10); - if (errno != 0) { - errx (2, "overflow"); - } + i = strtoimax(vp->u.s, (char **)NULL, 10); + if (errno == ERANGE) + err(ERR_EXIT, NULL); + free (vp->u.s); vp->u.i = i; vp->type = integer; @@ -187,12 +204,17 @@ to_string(struct val *vp) if (vp->type == string || vp->type == numeric_string) return; - tmp = malloc ((size_t)25); - if (tmp == NULL) { - errx (2, "malloc() failed"); - } - - sprintf (tmp, "%lld", (long long)vp->u.i); + /* + * log_10(x) ~= 0.3 * log_2(x). Rounding up gives the number + * of digits; add one each for the sign and terminating null + * character, respectively. + */ +#define NDIGITS(x) (3 * (sizeof(x) * CHAR_BIT) / 10 + 1 + 1 + 1) + tmp = malloc(NDIGITS(vp->u.i)); + if (tmp == NULL) + errx(ERR_EXIT, "malloc() failed"); + + sprintf(tmp, "%jd", vp->u.i); vp->type = string; vp->u.s = tmp; } @@ -243,26 +265,34 @@ is_zero_or_null(struct val *vp) } int -main(int argc __unused, char *argv[]) +main(int argc, char *argv[]) { + int c; + setlocale (LC_ALL, ""); + while ((c = getopt(argc, argv, "")) != -1) + switch (c) { + default: + fprintf(stderr, "usage: expr [--] expression\n"); + exit(ERR_EXIT); + } - av = argv + 1; + av = argv + optind; - yyparse (); + yyparse(); if (result->type == integer) - printf ("%lld\n", (long long)result->u.i); + printf("%jd\n", result->u.i); else - printf ("%s\n", result->u.s); + printf("%s\n", result->u.s); - return (is_zero_or_null (result)); + return (is_zero_or_null(result)); } int yyerror(const char *s __unused) { - errx (2, "syntax error"); + errx(ERR_EXIT, "syntax error"); } @@ -284,7 +314,7 @@ op_and(struct val *a, struct val *b) if (is_zero_or_null (a) || is_zero_or_null (b)) { free_value (a); free_value (b); - return (make_integer ((quad_t)0)); + return (make_integer ((intmax_t)0)); } else { free_value (b); return (a); @@ -299,11 +329,11 @@ op_eq(struct val *a, struct val *b) if (isstring (a) || isstring (b)) { to_string (a); to_string (b); - r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) == 0)); + r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) == 0)); } else { (void)to_integer(a); (void)to_integer(b); - r = make_integer ((quad_t)(a->u.i == b->u.i)); + r = make_integer ((intmax_t)(a->u.i == b->u.i)); } free_value (a); @@ -319,11 +349,11 @@ op_gt(struct val *a, struct val *b) if (isstring (a) || isstring (b)) { to_string (a); to_string (b); - r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) > 0)); + r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) > 0)); } else { (void)to_integer(a); (void)to_integer(b); - r = make_integer ((quad_t)(a->u.i > b->u.i)); + r = make_integer ((intmax_t)(a->u.i > b->u.i)); } free_value (a); @@ -339,11 +369,11 @@ op_lt(struct val *a, struct val *b) if (isstring (a) || isstring (b)) { to_string (a); to_string (b); - r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) < 0)); + r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) < 0)); } else { (void)to_integer(a); (void)to_integer(b); - r = make_integer ((quad_t)(a->u.i < b->u.i)); + r = make_integer ((intmax_t)(a->u.i < b->u.i)); } free_value (a); @@ -359,11 +389,11 @@ op_ge(struct val *a, struct val *b) if (isstring (a) || isstring (b)) { to_string (a); to_string (b); - r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) >= 0)); + r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) >= 0)); } else { (void)to_integer(a); (void)to_integer(b); - r = make_integer ((quad_t)(a->u.i >= b->u.i)); + r = make_integer ((intmax_t)(a->u.i >= b->u.i)); } free_value (a); @@ -379,11 +409,11 @@ op_le(struct val *a, struct val *b) if (isstring (a) || isstring (b)) { to_string (a); to_string (b); - r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) <= 0)); + r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) <= 0)); } else { (void)to_integer(a); (void)to_integer(b); - r = make_integer ((quad_t)(a->u.i <= b->u.i)); + r = make_integer ((intmax_t)(a->u.i <= b->u.i)); } free_value (a); @@ -399,11 +429,11 @@ op_ne(struct val *a, struct val *b) if (isstring (a) || isstring (b)) { to_string (a); to_string (b); - r = make_integer ((quad_t)(strcoll (a->u.s, b->u.s) != 0)); + r = make_integer ((intmax_t)(strcoll (a->u.s, b->u.s) != 0)); } else { (void)to_integer(a); (void)to_integer(b); - r = make_integer ((quad_t)(a->u.i != b->u.i)); + r = make_integer ((intmax_t)(a->u.i != b->u.i)); } free_value (a); @@ -412,7 +442,7 @@ op_ne(struct val *a, struct val *b) } int -chk_plus(quad_t a, quad_t b, quad_t r) +chk_plus(intmax_t a, intmax_t b, intmax_t r) { /* sum of two positive numbers must be positive */ if (a > 0 && b > 0 && r <= 0) @@ -430,12 +460,12 @@ op_plus(struct val *a, struct val *b) struct val *r; if (!to_integer (a) || !to_integer (b)) { - errx (2, "non-numeric argument"); + errx(ERR_EXIT, "non-numeric argument"); } - r = make_integer (/*(quad_t)*/(a->u.i + b->u.i)); + r = make_integer (/*(intmax_t)*/(a->u.i + b->u.i)); if (chk_plus (a->u.i, b->u.i, r->u.i)) { - errx (2, "overflow"); + errx(ERR_EXIT, "overflow"); } free_value (a); free_value (b); @@ -443,16 +473,16 @@ op_plus(struct val *a, struct val *b) } int -chk_minus(quad_t a, quad_t b, quad_t r) +chk_minus(intmax_t a, intmax_t b, intmax_t r) { - /* special case subtraction of QUAD_MIN */ - if (b == QUAD_MIN) { + /* special case subtraction of INTMAX_MIN */ + if (b == INTMAX_MIN) { if (a >= 0) return 1; else return 0; } - /* this is allowed for b != QUAD_MIN */ + /* this is allowed for b != INTMAX_MIN */ return chk_plus (a, -b, r); } @@ -462,12 +492,12 @@ op_minus(struct val *a, struct val *b) struct val *r; if (!to_integer (a) || !to_integer (b)) { - errx (2, "non-numeric argument"); + errx(ERR_EXIT, "non-numeric argument"); } - r = make_integer (/*(quad_t)*/(a->u.i - b->u.i)); + r = make_integer (/*(intmax_t)*/(a->u.i - b->u.i)); if (chk_minus (a->u.i, b->u.i, r->u.i)) { - errx (2, "overflow"); + errx(ERR_EXIT, "overflow"); } free_value (a); free_value (b); @@ -475,7 +505,7 @@ op_minus(struct val *a, struct val *b) } int -chk_times(quad_t a, quad_t b, quad_t r) +chk_times(intmax_t a, intmax_t b, intmax_t r) { /* special case: first operand is 0, no overflow possible */ if (a == 0) @@ -492,12 +522,12 @@ op_times(struct val *a, struct val *b) struct val *r; if (!to_integer (a) || !to_integer (b)) { - errx (2, "non-numeric argument"); + errx(ERR_EXIT, "non-numeric argument"); } - r = make_integer (/*(quad_t)*/(a->u.i * b->u.i)); + r = make_integer (/*(intmax_t)*/(a->u.i * b->u.i)); if (chk_times (a->u.i, b->u.i, r->u.i)) { - errx (2, "overflow"); + errx(ERR_EXIT, "overflow"); } free_value (a); free_value (b); @@ -505,11 +535,11 @@ op_times(struct val *a, struct val *b) } int -chk_div(quad_t a, quad_t b) +chk_div(intmax_t a, intmax_t b) { /* div by zero has been taken care of before */ - /* only QUAD_MIN / -1 causes overflow */ - if (a == QUAD_MIN && b == -1) + /* only INTMAX_MIN / -1 causes overflow */ + if (a == INTMAX_MIN && b == -1) return 1; /* everything else is OK */ return 0; @@ -521,16 +551,16 @@ op_div(struct val *a, struct val *b) struct val *r; if (!to_integer (a) || !to_integer (b)) { - errx (2, "non-numeric argument"); + errx(ERR_EXIT, "non-numeric argument"); } if (b->u.i == 0) { - errx (2, "division by zero"); + errx(ERR_EXIT, "division by zero"); } - r = make_integer (/*(quad_t)*/(a->u.i / b->u.i)); + r = make_integer (/*(intmax_t)*/(a->u.i / b->u.i)); if (chk_div (a->u.i, b->u.i)) { - errx (2, "overflow"); + errx(ERR_EXIT, "overflow"); } free_value (a); free_value (b); @@ -543,14 +573,14 @@ op_rem(struct val *a, struct val *b) struct val *r; if (!to_integer (a) || !to_integer (b)) { - errx (2, "non-numeric argument"); + errx(ERR_EXIT, "non-numeric argument"); } if (b->u.i == 0) { - errx (2, "division by zero"); + errx(ERR_EXIT, "division by zero"); } - r = make_integer (/*(quad_t)*/(a->u.i % b->u.i)); + r = make_integer (/*(intmax_t)*/(a->u.i % b->u.i)); /* chk_rem necessary ??? */ free_value (a); free_value (b); @@ -573,7 +603,7 @@ op_colon(struct val *a, struct val *b) /* compile regular expression */ if ((eval = regcomp (&rp, b->u.s, 0)) != 0) { regerror (eval, &rp, errbuf, sizeof(errbuf)); - errx (2, "%s", errbuf); + errx(ERR_EXIT, "%s", errbuf); } /* compare string against pattern */ @@ -584,11 +614,11 @@ op_colon(struct val *a, struct val *b) v = make_str (a->u.s + rm[1].rm_so); } else { - v = make_integer ((quad_t)(rm[0].rm_eo - rm[0].rm_so)); + v = make_integer ((intmax_t)(rm[0].rm_eo - rm[0].rm_so)); } } else { if (rp.re_nsub == 0) { - v = make_integer ((quad_t)0); + v = make_integer ((intmax_t)0); } else { v = make_str (""); } -- cgit v1.1