diff options
Diffstat (limited to 'gnu/usr.bin/awk/awk.y')
-rw-r--r-- | gnu/usr.bin/awk/awk.y | 1804 |
1 files changed, 1804 insertions, 0 deletions
diff --git a/gnu/usr.bin/awk/awk.y b/gnu/usr.bin/awk/awk.y new file mode 100644 index 0000000..6e87f1c --- /dev/null +++ b/gnu/usr.bin/awk/awk.y @@ -0,0 +1,1804 @@ +/* + * awk.y --- yacc/bison parser + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +%{ +#ifdef DEBUG +#define YYDEBUG 12 +#endif + +#include "awk.h" + +static void yyerror (); /* va_alist */ +static char *get_src_buf P((void)); +static int yylex P((void)); +static NODE *node_common P((NODETYPE op)); +static NODE *snode P((NODE *subn, NODETYPE op, int sindex)); +static NODE *mkrangenode P((NODE *cpair)); +static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr)); +static NODE *append_right P((NODE *list, NODE *new)); +static void func_install P((NODE *params, NODE *def)); +static void pop_var P((NODE *np, int freeit)); +static void pop_params P((NODE *params)); +static NODE *make_param P((char *name)); +static NODE *mk_rexp P((NODE *exp)); + +static int want_assign; /* lexical scanning kludge */ +static int want_regexp; /* lexical scanning kludge */ +static int can_return; /* lexical scanning kludge */ +static int io_allowed = 1; /* lexical scanning kludge */ +static char *lexptr; /* pointer to next char during parsing */ +static char *lexend; +static char *lexptr_begin; /* keep track of where we were for error msgs */ +static char *lexeme; /* beginning of lexeme for debugging */ +static char *thisline = NULL; +#define YYDEBUG_LEXER_TEXT (lexeme) +static int param_counter; +static char *tokstart = NULL; +static char *token = NULL; +static char *tokend; + +NODE *variables[HASHSIZE]; + +extern char *source; +extern int sourceline; +extern struct src *srcfiles; +extern int numfiles; +extern int errcount; +extern NODE *begin_block; +extern NODE *end_block; +%} + +%union { + long lval; + AWKNUM fval; + NODE *nodeval; + NODETYPE nodetypeval; + char *sval; + NODE *(*ptrval)(); +} + +%type <nodeval> function_prologue function_body +%type <nodeval> rexp exp start program rule simp_exp +%type <nodeval> non_post_simp_exp +%type <nodeval> pattern +%type <nodeval> action variable param_list +%type <nodeval> rexpression_list opt_rexpression_list +%type <nodeval> expression_list opt_expression_list +%type <nodeval> statements statement if_statement opt_param_list +%type <nodeval> opt_exp opt_variable regexp +%type <nodeval> input_redir output_redir +%type <nodetypeval> print +%type <sval> func_name +%type <lval> lex_builtin + +%token <sval> FUNC_CALL NAME REGEXP +%token <lval> ERROR +%token <nodeval> YNUMBER YSTRING +%token <nodetypeval> RELOP APPEND_OP +%token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP +%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE +%token <nodetypeval> LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE +%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION +%token <nodetypeval> LEX_GETLINE +%token <nodetypeval> LEX_IN +%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT +%token <lval> LEX_BUILTIN LEX_LENGTH + +/* these are just yylval numbers */ + +/* Lowest to highest */ +%right ASSIGNOP +%right '?' ':' +%left LEX_OR +%left LEX_AND +%left LEX_GETLINE +%nonassoc LEX_IN +%left FUNC_CALL LEX_BUILTIN LEX_LENGTH +%nonassoc MATCHOP +%nonassoc RELOP '<' '>' '|' APPEND_OP +%left CONCAT_OP +%left YSTRING YNUMBER +%left '+' '-' +%left '*' '/' '%' +%right '!' UNARY +%right '^' +%left INCREMENT DECREMENT +%left '$' +%left '(' ')' +%% + +start + : opt_nls program opt_nls + { expression_value = $2; } + ; + +program + : rule + { + if ($1 != NULL) + $$ = $1; + else + $$ = NULL; + yyerrok; + } + | program rule + /* add the rule to the tail of list */ + { + if ($2 == NULL) + $$ = $1; + else if ($1 == NULL) + $$ = $2; + else { + if ($1->type != Node_rule_list) + $1 = node($1, Node_rule_list, + (NODE*)NULL); + $$ = append_right ($1, + node($2, Node_rule_list,(NODE *) NULL)); + } + yyerrok; + } + | error { $$ = NULL; } + | program error { $$ = NULL; } + ; + +rule + : LEX_BEGIN { io_allowed = 0; } + action + { + if (begin_block) { + if (begin_block->type != Node_rule_list) + begin_block = node(begin_block, Node_rule_list, + (NODE *)NULL); + (void) append_right (begin_block, node( + node((NODE *)NULL, Node_rule_node, $3), + Node_rule_list, (NODE *)NULL) ); + } else + begin_block = node((NODE *)NULL, Node_rule_node, $3); + $$ = NULL; + io_allowed = 1; + yyerrok; + } + | LEX_END { io_allowed = 0; } + action + { + if (end_block) { + if (end_block->type != Node_rule_list) + end_block = node(end_block, Node_rule_list, + (NODE *)NULL); + (void) append_right (end_block, node( + node((NODE *)NULL, Node_rule_node, $3), + Node_rule_list, (NODE *)NULL)); + } else + end_block = node((NODE *)NULL, Node_rule_node, $3); + $$ = NULL; + io_allowed = 1; + yyerrok; + } + | LEX_BEGIN statement_term + { + warning("BEGIN blocks must have an action part"); + errcount++; + yyerrok; + } + | LEX_END statement_term + { + warning("END blocks must have an action part"); + errcount++; + yyerrok; + } + | pattern action + { $$ = node ($1, Node_rule_node, $2); yyerrok; } + | action + { $$ = node ((NODE *)NULL, Node_rule_node, $1); yyerrok; } + | pattern statement_term + { + $$ = node ($1, + Node_rule_node, + node(node(node(make_number(0.0), + Node_field_spec, + (NODE *) NULL), + Node_expression_list, + (NODE *) NULL), + Node_K_print, + (NODE *) NULL)); + yyerrok; + } + | function_prologue function_body + { + func_install($1, $2); + $$ = NULL; + yyerrok; + } + ; + +func_name + : NAME + { $$ = $1; } + | FUNC_CALL + { $$ = $1; } + | lex_builtin + { + yyerror("%s() is a built-in function, it cannot be redefined", + tokstart); + errcount++; + /* yyerrok; */ + } + ; + +lex_builtin + : LEX_BUILTIN + | LEX_LENGTH + ; + +function_prologue + : LEX_FUNCTION + { + param_counter = 0; + } + func_name '(' opt_param_list r_paren opt_nls + { + $$ = append_right(make_param($3), $5); + can_return = 1; + } + ; + +function_body + : l_brace statements r_brace opt_semi + { + $$ = $2; + can_return = 0; + } + ; + + +pattern + : exp + { $$ = $1; } + | exp comma exp + { $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); } + ; + +regexp + /* + * In this rule, want_regexp tells yylex that the next thing + * is a regexp so it should read up to the closing slash. + */ + : '/' + { ++want_regexp; } + REGEXP '/' + { + NODE *n; + int len; + + getnode(n); + n->type = Node_regex; + len = strlen($3); + n->re_exp = make_string($3, len); + n->re_reg = make_regexp($3, len, 0, 1); + n->re_text = NULL; + n->re_flags = CONST; + n->re_cnt = 1; + $$ = n; + } + ; + +action + : l_brace statements r_brace opt_semi opt_nls + { $$ = $2 ; } + | l_brace r_brace opt_semi opt_nls + { $$ = NULL; } + ; + +statements + : statement + { $$ = $1; } + | statements statement + { + if ($1 == NULL || $1->type != Node_statement_list) + $1 = node($1, Node_statement_list,(NODE *)NULL); + $$ = append_right($1, + node( $2, Node_statement_list, (NODE *)NULL)); + yyerrok; + } + | error + { $$ = NULL; } + | statements error + { $$ = NULL; } + ; + +statement_term + : nls + | semi opt_nls + ; + +statement + : semi opt_nls + { $$ = NULL; } + | l_brace r_brace + { $$ = NULL; } + | l_brace statements r_brace + { $$ = $2; } + | if_statement + { $$ = $1; } + | LEX_WHILE '(' exp r_paren opt_nls statement + { $$ = node ($3, Node_K_while, $6); } + | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls + { $$ = node ($6, Node_K_do, $3); } + | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement + { + $$ = node ($8, Node_K_arrayfor, make_for_loop(variable($3,1), + (NODE *)NULL, variable($5,1))); + } + | LEX_FOR '(' opt_exp semi exp semi opt_exp r_paren opt_nls statement + { + $$ = node($10, Node_K_for, (NODE *)make_for_loop($3, $5, $7)); + } + | LEX_FOR '(' opt_exp semi semi opt_exp r_paren opt_nls statement + { + $$ = node ($9, Node_K_for, + (NODE *)make_for_loop($3, (NODE *)NULL, $6)); + } + | LEX_BREAK statement_term + /* for break, maybe we'll have to remember where to break to */ + { $$ = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); } + | LEX_CONTINUE statement_term + /* similarly */ + { $$ = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); } + | print '(' expression_list r_paren output_redir statement_term + { $$ = node ($3, $1, $5); } + | print opt_rexpression_list output_redir statement_term + { + if ($1 == Node_K_print && $2 == NULL) + $2 = node(node(make_number(0.0), + Node_field_spec, + (NODE *) NULL), + Node_expression_list, + (NODE *) NULL); + + $$ = node ($2, $1, $3); + } + | LEX_NEXT opt_exp statement_term + { NODETYPE type; + + if ($2 && $2 == lookup("file")) { + if (do_lint) + warning("`next file' is a gawk extension"); + else if (do_unix || do_posix) + yyerror("`next file' is a gawk extension"); + else if (! io_allowed) + yyerror("`next file' used in BEGIN or END action"); + type = Node_K_nextfile; + } else { + if (! io_allowed) + yyerror("next used in BEGIN or END action"); + type = Node_K_next; + } + $$ = node ((NODE *)NULL, type, (NODE *)NULL); + } + | LEX_EXIT opt_exp statement_term + { $$ = node ($2, Node_K_exit, (NODE *)NULL); } + | LEX_RETURN + { if (! can_return) yyerror("return used outside function context"); } + opt_exp statement_term + { $$ = node ($3, Node_K_return, (NODE *)NULL); } + | LEX_DELETE NAME '[' expression_list ']' statement_term + { $$ = node (variable($2,1), Node_K_delete, $4); } + | exp statement_term + { $$ = $1; } + ; + +print + : LEX_PRINT + { $$ = $1; } + | LEX_PRINTF + { $$ = $1; } + ; + +if_statement + : LEX_IF '(' exp r_paren opt_nls statement + { + $$ = node($3, Node_K_if, + node($6, Node_if_branches, (NODE *)NULL)); + } + | LEX_IF '(' exp r_paren opt_nls statement + LEX_ELSE opt_nls statement + { $$ = node ($3, Node_K_if, + node ($6, Node_if_branches, $9)); } + ; + +nls + : NEWLINE + { want_assign = 0; } + | nls NEWLINE + ; + +opt_nls + : /* empty */ + | nls + ; + +input_redir + : /* empty */ + { $$ = NULL; } + | '<' simp_exp + { $$ = node ($2, Node_redirect_input, (NODE *)NULL); } + ; + +output_redir + : /* empty */ + { $$ = NULL; } + | '>' exp + { $$ = node ($2, Node_redirect_output, (NODE *)NULL); } + | APPEND_OP exp + { $$ = node ($2, Node_redirect_append, (NODE *)NULL); } + | '|' exp + { $$ = node ($2, Node_redirect_pipe, (NODE *)NULL); } + ; + +opt_param_list + : /* empty */ + { $$ = NULL; } + | param_list + { $$ = $1; } + ; + +param_list + : NAME + { $$ = make_param($1); } + | param_list comma NAME + { $$ = append_right($1, make_param($3)); yyerrok; } + | error + { $$ = NULL; } + | param_list error + { $$ = NULL; } + | param_list comma error + { $$ = NULL; } + ; + +/* optional expression, as in for loop */ +opt_exp + : /* empty */ + { $$ = NULL; } + | exp + { $$ = $1; } + ; + +opt_rexpression_list + : /* empty */ + { $$ = NULL; } + | rexpression_list + { $$ = $1; } + ; + +rexpression_list + : rexp + { $$ = node ($1, Node_expression_list, (NODE *)NULL); } + | rexpression_list comma rexp + { + $$ = append_right($1, + node( $3, Node_expression_list, (NODE *)NULL)); + yyerrok; + } + | error + { $$ = NULL; } + | rexpression_list error + { $$ = NULL; } + | rexpression_list error rexp + { $$ = NULL; } + | rexpression_list comma error + { $$ = NULL; } + ; + +opt_expression_list + : /* empty */ + { $$ = NULL; } + | expression_list + { $$ = $1; } + ; + +expression_list + : exp + { $$ = node ($1, Node_expression_list, (NODE *)NULL); } + | expression_list comma exp + { + $$ = append_right($1, + node( $3, Node_expression_list, (NODE *)NULL)); + yyerrok; + } + | error + { $$ = NULL; } + | expression_list error + { $$ = NULL; } + | expression_list error exp + { $$ = NULL; } + | expression_list comma error + { $$ = NULL; } + ; + +/* Expressions, not including the comma operator. */ +exp : variable ASSIGNOP + { want_assign = 0; } + exp + { + if (do_lint && $4->type == Node_regex) + warning("Regular expression on left of assignment."); + $$ = node ($1, $2, $4); + } + | '(' expression_list r_paren LEX_IN NAME + { $$ = node (variable($5,1), Node_in_array, $2); } + | exp '|' LEX_GETLINE opt_variable + { + $$ = node ($4, Node_K_getline, + node ($1, Node_redirect_pipein, (NODE *)NULL)); + } + | LEX_GETLINE opt_variable input_redir + { + if (do_lint && ! io_allowed && $3 == NULL) + warning("non-redirected getline undefined inside BEGIN or END action"); + $$ = node ($2, Node_K_getline, $3); + } + | exp LEX_AND exp + { $$ = node ($1, Node_and, $3); } + | exp LEX_OR exp + { $$ = node ($1, Node_or, $3); } + | exp MATCHOP exp + { + if ($1->type == Node_regex) + warning("Regular expression on left of MATCH operator."); + $$ = node ($1, $2, mk_rexp($3)); + } + | regexp + { $$ = $1; } + | '!' regexp %prec UNARY + { + $$ = node(node(make_number(0.0), + Node_field_spec, + (NODE *) NULL), + Node_nomatch, + $2); + } + | exp LEX_IN NAME + { $$ = node (variable($3,1), Node_in_array, $1); } + | exp RELOP exp + { + if (do_lint && $3->type == Node_regex) + warning("Regular expression on left of comparison."); + $$ = node ($1, $2, $3); + } + | exp '<' exp + { $$ = node ($1, Node_less, $3); } + | exp '>' exp + { $$ = node ($1, Node_greater, $3); } + | exp '?' exp ':' exp + { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} + | simp_exp + { $$ = $1; } + | exp simp_exp %prec CONCAT_OP + { $$ = node ($1, Node_concat, $2); } + ; + +rexp + : variable ASSIGNOP + { want_assign = 0; } + rexp + { $$ = node ($1, $2, $4); } + | rexp LEX_AND rexp + { $$ = node ($1, Node_and, $3); } + | rexp LEX_OR rexp + { $$ = node ($1, Node_or, $3); } + | LEX_GETLINE opt_variable input_redir + { + if (do_lint && ! io_allowed && $3 == NULL) + warning("non-redirected getline undefined inside BEGIN or END action"); + $$ = node ($2, Node_K_getline, $3); + } + | regexp + { $$ = $1; } + | '!' regexp %prec UNARY + { $$ = node((NODE *) NULL, Node_nomatch, $2); } + | rexp MATCHOP rexp + { $$ = node ($1, $2, mk_rexp($3)); } + | rexp LEX_IN NAME + { $$ = node (variable($3,1), Node_in_array, $1); } + | rexp RELOP rexp + { $$ = node ($1, $2, $3); } + | rexp '?' rexp ':' rexp + { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} + | simp_exp + { $$ = $1; } + | rexp simp_exp %prec CONCAT_OP + { $$ = node ($1, Node_concat, $2); } + ; + +simp_exp + : non_post_simp_exp + /* Binary operators in order of decreasing precedence. */ + | simp_exp '^' simp_exp + { $$ = node ($1, Node_exp, $3); } + | simp_exp '*' simp_exp + { $$ = node ($1, Node_times, $3); } + | simp_exp '/' simp_exp + { $$ = node ($1, Node_quotient, $3); } + | simp_exp '%' simp_exp + { $$ = node ($1, Node_mod, $3); } + | simp_exp '+' simp_exp + { $$ = node ($1, Node_plus, $3); } + | simp_exp '-' simp_exp + { $$ = node ($1, Node_minus, $3); } + | variable INCREMENT + { $$ = node ($1, Node_postincrement, (NODE *)NULL); } + | variable DECREMENT + { $$ = node ($1, Node_postdecrement, (NODE *)NULL); } + ; + +non_post_simp_exp + : '!' simp_exp %prec UNARY + { $$ = node ($2, Node_not,(NODE *) NULL); } + | '(' exp r_paren + { $$ = $2; } + | LEX_BUILTIN + '(' opt_expression_list r_paren + { $$ = snode ($3, Node_builtin, (int) $1); } + | LEX_LENGTH '(' opt_expression_list r_paren + { $$ = snode ($3, Node_builtin, (int) $1); } + | LEX_LENGTH + { + if (do_lint) + warning("call of `length' without parentheses is not portable"); + $$ = snode ((NODE *)NULL, Node_builtin, (int) $1); + if (do_posix) + warning( "call of `length' without parentheses is deprecated by POSIX"); + } + | FUNC_CALL '(' opt_expression_list r_paren + { + $$ = node ($3, Node_func_call, make_string($1, strlen($1))); + } + | variable + | INCREMENT variable + { $$ = node ($2, Node_preincrement, (NODE *)NULL); } + | DECREMENT variable + { $$ = node ($2, Node_predecrement, (NODE *)NULL); } + | YNUMBER + { $$ = $1; } + | YSTRING + { $$ = $1; } + + | '-' simp_exp %prec UNARY + { if ($2->type == Node_val) { + $2->numbr = -(force_number($2)); + $$ = $2; + } else + $$ = node ($2, Node_unary_minus, (NODE *)NULL); + } + | '+' simp_exp %prec UNARY + { $$ = $2; } + ; + +opt_variable + : /* empty */ + { $$ = NULL; } + | variable + { $$ = $1; } + ; + +variable + : NAME + { $$ = variable($1,1); } + | NAME '[' expression_list ']' + { + if ($3->rnode == NULL) { + $$ = node (variable($1,1), Node_subscript, $3->lnode); + freenode($3); + } else + $$ = node (variable($1,1), Node_subscript, $3); + } + | '$' non_post_simp_exp + { $$ = node ($2, Node_field_spec, (NODE *)NULL); } + ; + +l_brace + : '{' opt_nls + ; + +r_brace + : '}' opt_nls { yyerrok; } + ; + +r_paren + : ')' { yyerrok; } + ; + +opt_semi + : /* empty */ + | semi + ; + +semi + : ';' { yyerrok; want_assign = 0; } + ; + +comma : ',' opt_nls { yyerrok; } + ; + +%% + +struct token { + char *operator; /* text to match */ + NODETYPE value; /* node type */ + int class; /* lexical class */ + unsigned flags; /* # of args. allowed and compatability */ +# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */ +# define A(n) (1<<(n)) +# define VERSION 0xFF00 /* old awk is zero */ +# define NOT_OLD 0x0100 /* feature not in old awk */ +# define NOT_POSIX 0x0200 /* feature not in POSIX */ +# define GAWKX 0x0400 /* gawk extension */ + NODE *(*ptr) (); /* function that implements this keyword */ +}; + +extern NODE + *do_exp(), *do_getline(), *do_index(), *do_length(), + *do_sqrt(), *do_log(), *do_sprintf(), *do_substr(), + *do_split(), *do_system(), *do_int(), *do_close(), + *do_atan2(), *do_sin(), *do_cos(), *do_rand(), + *do_srand(), *do_match(), *do_tolower(), *do_toupper(), + *do_sub(), *do_gsub(), *do_strftime(), *do_systime(); + +/* Tokentab is sorted ascii ascending order, so it can be binary searched. */ + +static struct token tokentab[] = { +{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0}, +{"END", Node_illegal, LEX_END, 0, 0}, +{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2}, +{"break", Node_K_break, LEX_BREAK, 0, 0}, +{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_close}, +{"continue", Node_K_continue, LEX_CONTINUE, 0, 0}, +{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos}, +{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0}, +{"do", Node_K_do, LEX_DO, NOT_OLD, 0}, +{"else", Node_illegal, LEX_ELSE, 0, 0}, +{"exit", Node_K_exit, LEX_EXIT, 0, 0}, +{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp}, +{"for", Node_K_for, LEX_FOR, 0, 0}, +{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, +{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0}, +{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0}, +{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, +{"if", Node_K_if, LEX_IF, 0, 0}, +{"in", Node_illegal, LEX_IN, 0, 0}, +{"index", Node_builtin, LEX_BUILTIN, A(2), do_index}, +{"int", Node_builtin, LEX_BUILTIN, A(1), do_int}, +{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length}, +{"log", Node_builtin, LEX_BUILTIN, A(1), do_log}, +{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match}, +{"next", Node_K_next, LEX_NEXT, 0, 0}, +{"print", Node_K_print, LEX_PRINT, 0, 0}, +{"printf", Node_K_printf, LEX_PRINTF, 0, 0}, +{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand}, +{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0}, +{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin}, +{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split}, +{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf}, +{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt}, +{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand}, +{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_strftime}, +{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, +{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, +{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, +{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime}, +{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower}, +{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper}, +{"while", Node_K_while, LEX_WHILE, 0, 0}, +}; + +/* VARARGS0 */ +static void +yyerror(va_alist) +va_dcl +{ + va_list args; + char *mesg = NULL; + register char *bp, *cp; + char *scan; + char buf[120]; + + errcount++; + /* Find the current line in the input file */ + if (lexptr) { + if (!thisline) { + cp = lexeme; + if (*cp == '\n') { + cp--; + mesg = "unexpected newline"; + } + for ( ; cp != lexptr_begin && *cp != '\n'; --cp) + ; + if (*cp == '\n') + cp++; + thisline = cp; + } + /* NL isn't guaranteed */ + bp = lexeme; + while (bp < lexend && *bp && *bp != '\n') + bp++; + } else { + thisline = "(END OF FILE)"; + bp = thisline + 13; + } + msg("%.*s", (int) (bp - thisline), thisline); + bp = buf; + cp = buf + sizeof(buf) - 24; /* 24 more than longest msg. input */ + if (lexptr) { + scan = thisline; + while (bp < cp && scan < lexeme) + if (*scan++ == '\t') + *bp++ = '\t'; + else + *bp++ = ' '; + *bp++ = '^'; + *bp++ = ' '; + } + va_start(args); + if (mesg == NULL) + mesg = va_arg(args, char *); + strcpy(bp, mesg); + err("", buf, args); + va_end(args); + exit(2); +} + +static char * +get_src_buf() +{ + static int samefile = 0; + static int nextfile = 0; + static char *buf = NULL; + static int fd; + int n; + register char *scan; + static int len = 0; + static int did_newline = 0; +# define SLOP 128 /* enough space to hold most source lines */ + + if (nextfile > numfiles) + return NULL; + + if (srcfiles[nextfile].stype == CMDLINE) { + if (len == 0) { + len = strlen(srcfiles[nextfile].val); + sourceline = 1; + lexptr = lexptr_begin = srcfiles[nextfile].val; + lexend = lexptr + len; + } else if (!did_newline && *(lexptr-1) != '\n') { + /* + * The following goop is to ensure that the source + * ends with a newline and that the entire current + * line is available for error messages. + */ + int offset; + + did_newline = 1; + offset = lexptr - lexeme; + for (scan = lexeme; scan > lexptr_begin; scan--) + if (*scan == '\n') { + scan++; + break; + } + len = lexptr - scan; + emalloc(buf, char *, len+1, "get_src_buf"); + memcpy(buf, scan, len); + thisline = buf; + lexptr = buf + len; + *lexptr = '\n'; + lexeme = lexptr - offset; + lexptr_begin = buf; + lexend = lexptr + 1; + } else { + len = 0; + lexeme = lexptr = lexptr_begin = NULL; + } + if (lexptr == NULL && ++nextfile <= numfiles) + return get_src_buf(); + return lexptr; + } + if (!samefile) { + source = srcfiles[nextfile].val; + if (source == NULL) { + if (buf) { + free(buf); + buf = NULL; + } + len = 0; + return lexeme = lexptr = lexptr_begin = NULL; + } + fd = pathopen(source); + if (fd == -1) + fatal("can't open source file \"%s\" for reading (%s)", + source, strerror(errno)); + len = optimal_bufsize(fd); + if (buf) + free(buf); + emalloc(buf, char *, len + SLOP, "get_src_buf"); + lexptr_begin = buf + SLOP; + samefile = 1; + sourceline = 1; + } else { + /* + * Here, we retain the current source line (up to length SLOP) + * in the beginning of the buffer that was overallocated above + */ + int offset; + int linelen; + + offset = lexptr - lexeme; + for (scan = lexeme; scan > lexptr_begin; scan--) + if (*scan == '\n') { + scan++; + break; + } + linelen = lexptr - scan; + if (linelen > SLOP) + linelen = SLOP; + thisline = buf + SLOP - linelen; + memcpy(thisline, scan, linelen); + lexeme = buf + SLOP - offset; + lexptr_begin = thisline; + } + n = read(fd, buf + SLOP, len); + if (n == -1) + fatal("can't read sourcefile \"%s\" (%s)", + source, strerror(errno)); + if (n == 0) { + samefile = 0; + nextfile++; + len = 0; + return get_src_buf(); + } + lexptr = buf + SLOP; + lexend = lexptr + n; + return buf; +} + +#define tokadd(x) (*token++ = (x), token == tokend ? tokexpand() : token) + +char * +tokexpand() +{ + static int toksize = 60; + int tokoffset; + + tokoffset = token - tokstart; + toksize *= 2; + if (tokstart) + erealloc(tokstart, char *, toksize, "tokexpand"); + else + emalloc(tokstart, char *, toksize, "tokexpand"); + tokend = tokstart + toksize; + token = tokstart + tokoffset; + return token; +} + +#if DEBUG +char +nextc() { + if (lexptr && lexptr < lexend) + return *lexptr++; + else if (get_src_buf()) + return *lexptr++; + else + return '\0'; +} +#else +#define nextc() ((lexptr && lexptr < lexend) ? \ + *lexptr++ : \ + (get_src_buf() ? *lexptr++ : '\0') \ + ) +#endif +#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr) + +/* + * Read the input and turn it into tokens. + */ + +static int +yylex() +{ + register int c; + int seen_e = 0; /* These are for numbers */ + int seen_point = 0; + int esc_seen; /* for literal strings */ + int low, mid, high; + static int did_newline = 0; + char *tokkey; + + if (!nextc()) + return 0; + pushback(); + lexeme = lexptr; + thisline = NULL; + if (want_regexp) { + int in_brack = 0; + + want_regexp = 0; + token = tokstart; + while ((c = nextc()) != 0) { + switch (c) { + case '[': + in_brack = 1; + break; + case ']': + in_brack = 0; + break; + case '\\': + if ((c = nextc()) == '\0') { + yyerror("unterminated regexp ends with \\ at end of file"); + } else if (c == '\n') { + sourceline++; + continue; + } else + tokadd('\\'); + break; + case '/': /* end of the regexp */ + if (in_brack) + break; + + pushback(); + tokadd('\0'); + yylval.sval = tokstart; + return REGEXP; + case '\n': + pushback(); + yyerror("unterminated regexp"); + case '\0': + yyerror("unterminated regexp at end of file"); + } + tokadd(c); + } + } +retry: + while ((c = nextc()) == ' ' || c == '\t') + ; + + lexeme = lexptr ? lexptr - 1 : lexptr; + thisline = NULL; + token = tokstart; + yylval.nodetypeval = Node_illegal; + + switch (c) { + case 0: + return 0; + + case '\n': + sourceline++; + return NEWLINE; + + case '#': /* it's a comment */ + while ((c = nextc()) != '\n') { + if (c == '\0') + return 0; + } + sourceline++; + return NEWLINE; + + case '\\': +#ifdef RELAXED_CONTINUATION + if (!do_unix) { /* strip trailing white-space and/or comment */ + while ((c = nextc()) == ' ' || c == '\t') continue; + if (c == '#') + while ((c = nextc()) != '\n') if (!c) break; + pushback(); + } +#endif /*RELAXED_CONTINUATION*/ + if (nextc() == '\n') { + sourceline++; + goto retry; + } else + yyerror("inappropriate use of backslash"); + break; + + case '$': + want_assign = 1; + return '$'; + + case ')': + case ']': + case '(': + case '[': + case ';': + case ':': + case '?': + case '{': + case ',': + return c; + + case '*': + if ((c = nextc()) == '=') { + yylval.nodetypeval = Node_assign_times; + return ASSIGNOP; + } else if (do_posix) { + pushback(); + return '*'; + } else if (c == '*') { + /* make ** and **= aliases for ^ and ^= */ + static int did_warn_op = 0, did_warn_assgn = 0; + + if (nextc() == '=') { + if (do_lint && ! did_warn_assgn) { + did_warn_assgn = 1; + warning("**= is not allowed by POSIX"); + } + yylval.nodetypeval = Node_assign_exp; + return ASSIGNOP; + } else { + pushback(); + if (do_lint && ! did_warn_op) { + did_warn_op = 1; + warning("** is not allowed by POSIX"); + } + return '^'; + } + } + pushback(); + return '*'; + + case '/': + if (want_assign) { + if (nextc() == '=') { + yylval.nodetypeval = Node_assign_quotient; + return ASSIGNOP; + } + pushback(); + } + return '/'; + + case '%': + if (nextc() == '=') { + yylval.nodetypeval = Node_assign_mod; + return ASSIGNOP; + } + pushback(); + return '%'; + + case '^': + { + static int did_warn_op = 0, did_warn_assgn = 0; + + if (nextc() == '=') { + + if (do_lint && ! did_warn_assgn) { + did_warn_assgn = 1; + warning("operator `^=' is not supported in old awk"); + } + yylval.nodetypeval = Node_assign_exp; + return ASSIGNOP; + } + pushback(); + if (do_lint && ! did_warn_op) { + did_warn_op = 1; + warning("operator `^' is not supported in old awk"); + } + return '^'; + } + + case '+': + if ((c = nextc()) == '=') { + yylval.nodetypeval = Node_assign_plus; + return ASSIGNOP; + } + if (c == '+') + return INCREMENT; + pushback(); + return '+'; + + case '!': + if ((c = nextc()) == '=') { + yylval.nodetypeval = Node_notequal; + return RELOP; + } + if (c == '~') { + yylval.nodetypeval = Node_nomatch; + want_assign = 0; + return MATCHOP; + } + pushback(); + return '!'; + + case '<': + if (nextc() == '=') { + yylval.nodetypeval = Node_leq; + return RELOP; + } + yylval.nodetypeval = Node_less; + pushback(); + return '<'; + + case '=': + if (nextc() == '=') { + yylval.nodetypeval = Node_equal; + return RELOP; + } + yylval.nodetypeval = Node_assign; + pushback(); + return ASSIGNOP; + + case '>': + if ((c = nextc()) == '=') { + yylval.nodetypeval = Node_geq; + return RELOP; + } else if (c == '>') { + yylval.nodetypeval = Node_redirect_append; + return APPEND_OP; + } + yylval.nodetypeval = Node_greater; + pushback(); + return '>'; + + case '~': + yylval.nodetypeval = Node_match; + want_assign = 0; + return MATCHOP; + + case '}': + /* + * Added did newline stuff. Easier than + * hacking the grammar + */ + if (did_newline) { + did_newline = 0; + return c; + } + did_newline++; + --lexptr; /* pick up } next time */ + return NEWLINE; + + case '"': + esc_seen = 0; + while ((c = nextc()) != '"') { + if (c == '\n') { + pushback(); + yyerror("unterminated string"); + } + if (c == '\\') { + c = nextc(); + if (c == '\n') { + sourceline++; + continue; + } + esc_seen = 1; + tokadd('\\'); + } + if (c == '\0') { + pushback(); + yyerror("unterminated string"); + } + tokadd(c); + } + yylval.nodeval = make_str_node(tokstart, + token - tokstart, esc_seen ? SCAN : 0); + yylval.nodeval->flags |= PERM; + return YSTRING; + + case '-': + if ((c = nextc()) == '=') { + yylval.nodetypeval = Node_assign_minus; + return ASSIGNOP; + } + if (c == '-') + return DECREMENT; + pushback(); + return '-'; + + case '.': + c = nextc(); + pushback(); + if (!isdigit(c)) + return '.'; + else + c = '.'; /* FALL THROUGH */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + /* It's a number */ + for (;;) { + int gotnumber = 0; + + tokadd(c); + switch (c) { + case '.': + if (seen_point) { + gotnumber++; + break; + } + ++seen_point; + break; + case 'e': + case 'E': + if (seen_e) { + gotnumber++; + break; + } + ++seen_e; + if ((c = nextc()) == '-' || c == '+') + tokadd(c); + else + pushback(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + break; + default: + gotnumber++; + } + if (gotnumber) + break; + c = nextc(); + } + pushback(); + yylval.nodeval = make_number(atof(tokstart)); + yylval.nodeval->flags |= PERM; + return YNUMBER; + + case '&': + if ((c = nextc()) == '&') { + yylval.nodetypeval = Node_and; + for (;;) { + c = nextc(); + if (c == '\0') + break; + if (c == '#') { + while ((c = nextc()) != '\n' && c != '\0') + ; + if (c == '\0') + break; + } + if (c == '\n') + sourceline++; + if (! isspace(c)) { + pushback(); + break; + } + } + want_assign = 0; + return LEX_AND; + } + pushback(); + return '&'; + + case '|': + if ((c = nextc()) == '|') { + yylval.nodetypeval = Node_or; + for (;;) { + c = nextc(); + if (c == '\0') + break; + if (c == '#') { + while ((c = nextc()) != '\n' && c != '\0') + ; + if (c == '\0') + break; + } + if (c == '\n') + sourceline++; + if (! isspace(c)) { + pushback(); + break; + } + } + want_assign = 0; + return LEX_OR; + } + pushback(); + return '|'; + } + + if (c != '_' && ! isalpha(c)) + yyerror("Invalid char '%c' in expression\n", c); + + /* it's some type of name-type-thing. Find its length */ + token = tokstart; + while (is_identchar(c)) { + tokadd(c); + c = nextc(); + } + tokadd('\0'); + emalloc(tokkey, char *, token - tokstart, "yylex"); + memcpy(tokkey, tokstart, token - tokstart); + pushback(); + + /* See if it is a special token. */ + low = 0; + high = (sizeof (tokentab) / sizeof (tokentab[0])) - 1; + while (low <= high) { + int i/* , c */; + + mid = (low + high) / 2; + c = *tokstart - tokentab[mid].operator[0]; + i = c ? c : strcmp (tokstart, tokentab[mid].operator); + + if (i < 0) { /* token < mid */ + high = mid - 1; + } else if (i > 0) { /* token > mid */ + low = mid + 1; + } else { + if (do_lint) { + if (tokentab[mid].flags & GAWKX) + warning("%s() is a gawk extension", + tokentab[mid].operator); + if (tokentab[mid].flags & NOT_POSIX) + warning("POSIX does not allow %s", + tokentab[mid].operator); + if (tokentab[mid].flags & NOT_OLD) + warning("%s is not supported in old awk", + tokentab[mid].operator); + } + if ((do_unix && (tokentab[mid].flags & GAWKX)) + || (do_posix && (tokentab[mid].flags & NOT_POSIX))) + break; + if (tokentab[mid].class == LEX_BUILTIN + || tokentab[mid].class == LEX_LENGTH + ) + yylval.lval = mid; + else + yylval.nodetypeval = tokentab[mid].value; + + return tokentab[mid].class; + } + } + + yylval.sval = tokkey; + if (*lexptr == '(') + return FUNC_CALL; + else { + want_assign = 1; + return NAME; + } +} + +static NODE * +node_common(op) +NODETYPE op; +{ + register NODE *r; + + getnode(r); + r->type = op; + r->flags = MALLOC; + /* if lookahead is NL, lineno is 1 too high */ + if (lexeme && *lexeme == '\n') + r->source_line = sourceline - 1; + else + r->source_line = sourceline; + r->source_file = source; + return r; +} + +/* + * This allocates a node with defined lnode and rnode. + */ +NODE * +node(left, op, right) +NODE *left, *right; +NODETYPE op; +{ + register NODE *r; + + r = node_common(op); + r->lnode = left; + r->rnode = right; + return r; +} + +/* + * This allocates a node with defined subnode and proc for builtin functions + * Checks for arg. count and supplies defaults where possible. + */ +static NODE * +snode(subn, op, idx) +NODETYPE op; +int idx; +NODE *subn; +{ + register NODE *r; + register NODE *n; + int nexp = 0; + int args_allowed; + + r = node_common(op); + + /* traverse expression list to see how many args. given */ + for (n= subn; n; n= n->rnode) { + nexp++; + if (nexp > 3) + break; + } + + /* check against how many args. are allowed for this builtin */ + args_allowed = tokentab[idx].flags & ARGS; + if (args_allowed && !(args_allowed & A(nexp))) + fatal("%s() cannot have %d argument%c", + tokentab[idx].operator, nexp, nexp == 1 ? ' ' : 's'); + + r->proc = tokentab[idx].ptr; + + /* special case processing for a few builtins */ + if (nexp == 0 && r->proc == do_length) { + subn = node(node(make_number(0.0),Node_field_spec,(NODE *)NULL), + Node_expression_list, + (NODE *) NULL); + } else if (r->proc == do_match) { + if (subn->rnode->lnode->type != Node_regex) + subn->rnode->lnode = mk_rexp(subn->rnode->lnode); + } else if (r->proc == do_sub || r->proc == do_gsub) { + if (subn->lnode->type != Node_regex) + subn->lnode = mk_rexp(subn->lnode); + if (nexp == 2) + append_right(subn, node(node(make_number(0.0), + Node_field_spec, + (NODE *) NULL), + Node_expression_list, + (NODE *) NULL)); + else if (do_lint && subn->rnode->rnode->lnode->type == Node_val) + warning("string literal as last arg of substitute"); + } else if (r->proc == do_split) { + if (nexp == 2) + append_right(subn, + node(FS_node, Node_expression_list, (NODE *) NULL)); + n = subn->rnode->rnode->lnode; + if (n->type != Node_regex) + subn->rnode->rnode->lnode = mk_rexp(n); + if (nexp == 2) + subn->rnode->rnode->lnode->re_flags |= FS_DFLT; + } + + r->subnode = subn; + return r; +} + +/* + * This allocates a Node_line_range node with defined condpair and + * zeroes the trigger word to avoid the temptation of assuming that calling + * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'. + */ +/* Otherwise like node() */ +static NODE * +mkrangenode(cpair) +NODE *cpair; +{ + register NODE *r; + + getnode(r); + r->type = Node_line_range; + r->condpair = cpair; + r->triggered = 0; + return r; +} + +/* Build a for loop */ +static NODE * +make_for_loop(init, cond, incr) +NODE *init, *cond, *incr; +{ + register FOR_LOOP_HEADER *r; + NODE *n; + + emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop"); + getnode(n); + n->type = Node_illegal; + r->init = init; + r->cond = cond; + r->incr = incr; + n->sub.nodep.r.hd = r; + return n; +} + +/* + * Install a name in the symbol table, even if it is already there. + * Caller must check against redefinition if that is desired. + */ +NODE * +install(name, value) +char *name; +NODE *value; +{ + register NODE *hp; + register int len, bucket; + + len = strlen(name); + bucket = hash(name, len); + getnode(hp); + hp->type = Node_hashnode; + hp->hnext = variables[bucket]; + variables[bucket] = hp; + hp->hlength = len; + hp->hvalue = value; + hp->hname = name; + hp->hvalue->vname = name; + return hp->hvalue; +} + +/* find the most recent hash node for name installed by install */ +NODE * +lookup(name) +char *name; +{ + register NODE *bucket; + register int len; + + len = strlen(name); + bucket = variables[hash(name, len)]; + while (bucket) { + if (bucket->hlength == len && STREQN(bucket->hname, name, len)) + return bucket->hvalue; + bucket = bucket->hnext; + } + return NULL; +} + +/* + * Add new to the rightmost branch of LIST. This uses n^2 time, so we make + * a simple attempt at optimizing it. + */ +static NODE * +append_right(list, new) +NODE *list, *new; +{ + register NODE *oldlist; + static NODE *savefront = NULL, *savetail = NULL; + + oldlist = list; + if (savefront == oldlist) { + savetail = savetail->rnode = new; + return oldlist; + } else + savefront = oldlist; + while (list->rnode != NULL) + list = list->rnode; + savetail = list->rnode = new; + return oldlist; +} + +/* + * check if name is already installed; if so, it had better have Null value, + * in which case def is added as the value. Otherwise, install name with def + * as value. + */ +static void +func_install(params, def) +NODE *params; +NODE *def; +{ + NODE *r; + + pop_params(params->rnode); + pop_var(params, 0); + r = lookup(params->param); + if (r != NULL) { + fatal("function name `%s' previously defined", params->param); + } else + (void) install(params->param, node(params, Node_func, def)); +} + +static void +pop_var(np, freeit) +NODE *np; +int freeit; +{ + register NODE *bucket, **save; + register int len; + char *name; + + name = np->param; + len = strlen(name); + save = &(variables[hash(name, len)]); + for (bucket = *save; bucket; bucket = bucket->hnext) { + if (len == bucket->hlength && STREQN(bucket->hname, name, len)) { + *save = bucket->hnext; + freenode(bucket); + if (freeit) + free(np->param); + return; + } + save = &(bucket->hnext); + } +} + +static void +pop_params(params) +NODE *params; +{ + register NODE *np; + + for (np = params; np != NULL; np = np->rnode) + pop_var(np, 1); +} + +static NODE * +make_param(name) +char *name; +{ + NODE *r; + + getnode(r); + r->type = Node_param_list; + r->rnode = NULL; + r->param = name; + r->param_cnt = param_counter++; + return (install(name, r)); +} + +/* Name points to a variable name. Make sure its in the symbol table */ +NODE * +variable(name, can_free) +char *name; +int can_free; +{ + register NODE *r; + static int env_loaded = 0; + + if (!env_loaded && STREQ(name, "ENVIRON")) { + load_environ(); + env_loaded = 1; + } + if ((r = lookup(name)) == NULL) + r = install(name, node(Nnull_string, Node_var, (NODE *) NULL)); + else if (can_free) + free(name); + return r; +} + +static NODE * +mk_rexp(exp) +NODE *exp; +{ + if (exp->type == Node_regex) + return exp; + else { + NODE *n; + + getnode(n); + n->type = Node_regex; + n->re_exp = exp; + n->re_text = NULL; + n->re_reg = NULL; + n->re_flags = 0; + n->re_cnt = 1; + return n; + } +} |