summaryrefslogtreecommitdiffstats
path: root/usr.bin/indent/lexi.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/indent/lexi.c')
-rw-r--r--usr.bin/indent/lexi.c559
1 files changed, 559 insertions, 0 deletions
diff --git a/usr.bin/indent/lexi.c b/usr.bin/indent/lexi.c
new file mode 100644
index 0000000..8da9d2c
--- /dev/null
+++ b/usr.bin/indent/lexi.c
@@ -0,0 +1,559 @@
+/*
+ * Copyright (c) 1985 Sun Microsystems, Inc.
+ * Copyright (c) 1980, 1993
+ * The Regents of the University of California. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
+#endif /* not lint */
+
+/*
+ * Here we have the token scanner for indent. It scans off one token and puts
+ * it in the global variable "token". It returns a code, indicating the type
+ * of token scanned.
+ */
+
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include "indent_globs.h"
+#include "indent_codes.h"
+
+#define alphanum 1
+#define opchar 3
+
+struct templ {
+ char *rwd;
+ int rwcode;
+};
+
+struct templ specials[100] =
+{
+ "switch", 1,
+ "case", 2,
+ "break", 0,
+ "struct", 3,
+ "union", 3,
+ "enum", 3,
+ "default", 2,
+ "int", 4,
+ "char", 4,
+ "float", 4,
+ "double", 4,
+ "long", 4,
+ "short", 4,
+ "typdef", 4,
+ "unsigned", 4,
+ "register", 4,
+ "static", 4,
+ "global", 4,
+ "extern", 4,
+ "void", 4,
+ "goto", 0,
+ "return", 0,
+ "if", 5,
+ "while", 5,
+ "for", 5,
+ "else", 6,
+ "do", 6,
+ "sizeof", 7,
+ 0, 0
+};
+
+char chartype[128] =
+{ /* this is used to facilitate the decision of
+ * what type (alphanumeric, operator) each
+ * character is */
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 3, 0, 0, 1, 3, 3, 0,
+ 0, 0, 3, 3, 0, 3, 0, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 0, 0, 3, 3, 3, 3,
+ 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 0, 0, 0, 3, 1,
+ 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 0, 3, 0, 3, 0
+};
+
+
+
+
+int
+lexi()
+{
+ int unary_delim; /* this is set to 1 if the current token
+ *
+ * forces a following operator to be unary */
+ static int last_code; /* the last token type returned */
+ static int l_struct; /* set to 1 if the last token was 'struct' */
+ int code; /* internal code to be returned */
+ char qchar; /* the delimiter character for a string */
+
+ e_token = s_token; /* point to start of place to save token */
+ unary_delim = false;
+ ps.col_1 = ps.last_nl; /* tell world that this token started in
+ * column 1 iff the last thing scanned was nl */
+ ps.last_nl = false;
+
+ while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
+ ps.col_1 = false; /* leading blanks imply token is not in column
+ * 1 */
+ if (++buf_ptr >= buf_end)
+ fill_buffer();
+ }
+
+ /* Scan an alphanumeric token */
+ if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
+ /*
+ * we have a character or number
+ */
+ register char *j; /* used for searching thru list of
+ *
+ * reserved words */
+ register struct templ *p;
+
+ if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
+ int seendot = 0,
+ seenexp = 0;
+ if (*buf_ptr == '0' &&
+ (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
+ *e_token++ = *buf_ptr++;
+ *e_token++ = *buf_ptr++;
+ while (isxdigit(*buf_ptr)) {
+ CHECK_SIZE_TOKEN;
+ *e_token++ = *buf_ptr++;
+ }
+ }
+ else
+ while (1) {
+ if (*buf_ptr == '.')
+ if (seendot)
+ break;
+ else
+ seendot++;
+ CHECK_SIZE_TOKEN;
+ *e_token++ = *buf_ptr++;
+ if (!isdigit(*buf_ptr) && *buf_ptr != '.')
+ if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
+ break;
+ else {
+ seenexp++;
+ seendot++;
+ CHECK_SIZE_TOKEN;
+ *e_token++ = *buf_ptr++;
+ if (*buf_ptr == '+' || *buf_ptr == '-')
+ *e_token++ = *buf_ptr++;
+ }
+ }
+ if (*buf_ptr == 'L' || *buf_ptr == 'l')
+ *e_token++ = *buf_ptr++;
+ }
+ else
+ while (chartype[*buf_ptr] == alphanum) { /* copy it over */
+ CHECK_SIZE_TOKEN;
+ *e_token++ = *buf_ptr++;
+ if (buf_ptr >= buf_end)
+ fill_buffer();
+ }
+ *e_token++ = '\0';
+ while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
+ if (++buf_ptr >= buf_end)
+ fill_buffer();
+ }
+ ps.its_a_keyword = false;
+ ps.sizeof_keyword = false;
+ if (l_struct) { /* if last token was 'struct', then this token
+ * should be treated as a declaration */
+ l_struct = false;
+ last_code = ident;
+ ps.last_u_d = true;
+ return (decl);
+ }
+ ps.last_u_d = false; /* Operator after indentifier is binary */
+ last_code = ident; /* Remember that this is the code we will
+ * return */
+
+ /*
+ * This loop will check if the token is a keyword.
+ */
+ for (p = specials; (j = p->rwd) != 0; p++) {
+ register char *p = s_token; /* point at scanned token */
+ if (*j++ != *p++ || *j++ != *p++)
+ continue; /* This test depends on the fact that
+ * identifiers are always at least 1 character
+ * long (ie. the first two bytes of the
+ * identifier are always meaningful) */
+ if (p[-1] == 0)
+ break; /* If its a one-character identifier */
+ while (*p++ == *j)
+ if (*j++ == 0)
+ goto found_keyword; /* I wish that C had a multi-level
+ * break... */
+ }
+ if (p->rwd) { /* we have a keyword */
+ found_keyword:
+ ps.its_a_keyword = true;
+ ps.last_u_d = true;
+ switch (p->rwcode) {
+ case 1: /* it is a switch */
+ return (swstmt);
+ case 2: /* a case or default */
+ return (casestmt);
+
+ case 3: /* a "struct" */
+ if (ps.p_l_follow)
+ break; /* inside parens: cast */
+ l_struct = true;
+
+ /*
+ * Next time around, we will want to know that we have had a
+ * 'struct'
+ */
+ case 4: /* one of the declaration keywords */
+ if (ps.p_l_follow) {
+ ps.cast_mask |= 1 << ps.p_l_follow;
+ break; /* inside parens: cast */
+ }
+ last_code = decl;
+ return (decl);
+
+ case 5: /* if, while, for */
+ return (sp_paren);
+
+ case 6: /* do, else */
+ return (sp_nparen);
+
+ case 7:
+ ps.sizeof_keyword = true;
+ default: /* all others are treated like any other
+ * identifier */
+ return (ident);
+ } /* end of switch */
+ } /* end of if (found_it) */
+ if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
+ register char *tp = buf_ptr;
+ while (tp < buf_end)
+ if (*tp++ == ')' && (*tp == ';' || *tp == ','))
+ goto not_proc;
+ strncpy(ps.procname, token, sizeof ps.procname - 1);
+ ps.in_parameter_declaration = 1;
+ rparen_count = 1;
+ not_proc:;
+ }
+ /*
+ * The following hack attempts to guess whether or not the current
+ * token is in fact a declaration keyword -- one that has been
+ * typedefd
+ */
+ if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
+ && !ps.p_l_follow
+ && !ps.block_init
+ && (ps.last_token == rparen || ps.last_token == semicolon ||
+ ps.last_token == decl ||
+ ps.last_token == lbrace || ps.last_token == rbrace)) {
+ ps.its_a_keyword = true;
+ ps.last_u_d = true;
+ last_code = decl;
+ return decl;
+ }
+ if (last_code == decl) /* if this is a declared variable, then
+ * following sign is unary */
+ ps.last_u_d = true; /* will make "int a -1" work */
+ last_code = ident;
+ return (ident); /* the ident is not in the list */
+ } /* end of procesing for alpanum character */
+
+ /* Scan a non-alphanumeric token */
+
+ *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
+ * moved here */
+ *e_token = '\0';
+ if (++buf_ptr >= buf_end)
+ fill_buffer();
+
+ switch (*token) {
+ case '\n':
+ unary_delim = ps.last_u_d;
+ ps.last_nl = true; /* remember that we just had a newline */
+ code = (had_eof ? 0 : newline);
+
+ /*
+ * if data has been exausted, the newline is a dummy, and we should
+ * return code to stop
+ */
+ break;
+
+ case '\'': /* start of quoted character */
+ case '"': /* start of string */
+ qchar = *token;
+ if (troff) {
+ e_token[-1] = '`';
+ if (qchar == '"')
+ *e_token++ = '`';
+ e_token = chfont(&bodyf, &stringf, e_token);
+ }
+ do { /* copy the string */
+ while (1) { /* move one character or [/<char>]<char> */
+ if (*buf_ptr == '\n') {
+ printf("%d: Unterminated literal\n", line_no);
+ goto stop_lit;
+ }
+ CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
+ * since CHECK_SIZE guarantees that there
+ * are at least 5 entries left */
+ *e_token = *buf_ptr++;
+ if (buf_ptr >= buf_end)
+ fill_buffer();
+ if (*e_token == BACKSLASH) { /* if escape, copy extra char */
+ if (*buf_ptr == '\n') /* check for escaped newline */
+ ++line_no;
+ if (troff) {
+ *++e_token = BACKSLASH;
+ if (*buf_ptr == BACKSLASH)
+ *++e_token = BACKSLASH;
+ }
+ *++e_token = *buf_ptr++;
+ ++e_token; /* we must increment this again because we
+ * copied two chars */
+ if (buf_ptr >= buf_end)
+ fill_buffer();
+ }
+ else
+ break; /* we copied one character */
+ } /* end of while (1) */
+ } while (*e_token++ != qchar);
+ if (troff) {
+ e_token = chfont(&stringf, &bodyf, e_token - 1);
+ if (qchar == '"')
+ *e_token++ = '\'';
+ }
+stop_lit:
+ code = ident;
+ break;
+
+ case ('('):
+ case ('['):
+ unary_delim = true;
+ code = lparen;
+ break;
+
+ case (')'):
+ case (']'):
+ code = rparen;
+ break;
+
+ case '#':
+ unary_delim = ps.last_u_d;
+ code = preesc;
+ break;
+
+ case '?':
+ unary_delim = true;
+ code = question;
+ break;
+
+ case (':'):
+ code = colon;
+ unary_delim = true;
+ break;
+
+ case (';'):
+ unary_delim = true;
+ code = semicolon;
+ break;
+
+ case ('{'):
+ unary_delim = true;
+
+ /*
+ * if (ps.in_or_st) ps.block_init = 1;
+ */
+ /* ? code = ps.block_init ? lparen : lbrace; */
+ code = lbrace;
+ break;
+
+ case ('}'):
+ unary_delim = true;
+ /* ? code = ps.block_init ? rparen : rbrace; */
+ code = rbrace;
+ break;
+
+ case 014: /* a form feed */
+ unary_delim = ps.last_u_d;
+ ps.last_nl = true; /* remember this so we can set 'ps.col_1'
+ * right */
+ code = form_feed;
+ break;
+
+ case (','):
+ unary_delim = true;
+ code = comma;
+ break;
+
+ case '.':
+ unary_delim = false;
+ code = period;
+ break;
+
+ case '-':
+ case '+': /* check for -, +, --, ++ */
+ code = (ps.last_u_d ? unary_op : binary_op);
+ unary_delim = true;
+
+ if (*buf_ptr == token[0]) {
+ /* check for doubled character */
+ *e_token++ = *buf_ptr++;
+ /* buffer overflow will be checked at end of loop */
+ if (last_code == ident || last_code == rparen) {
+ code = (ps.last_u_d ? unary_op : postop);
+ /* check for following ++ or -- */
+ unary_delim = false;
+ }
+ }
+ else if (*buf_ptr == '=')
+ /* check for operator += */
+ *e_token++ = *buf_ptr++;
+ else if (*buf_ptr == '>') {
+ /* check for operator -> */
+ *e_token++ = *buf_ptr++;
+ if (!pointer_as_binop) {
+ unary_delim = false;
+ code = unary_op;
+ ps.want_blank = false;
+ }
+ }
+ break; /* buffer overflow will be checked at end of
+ * switch */
+
+ case '=':
+ if (ps.in_or_st)
+ ps.block_init = 1;
+#ifdef undef
+ if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
+ e_token[-1] = *buf_ptr++;
+ if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
+ *e_token++ = *buf_ptr++;
+ *e_token++ = '='; /* Flip =+ to += */
+ *e_token = 0;
+ }
+#else
+ if (*buf_ptr == '=') {/* == */
+ *e_token++ = '='; /* Flip =+ to += */
+ buf_ptr++;
+ *e_token = 0;
+ }
+#endif
+ code = binary_op;
+ unary_delim = true;
+ break;
+ /* can drop thru!!! */
+
+ case '>':
+ case '<':
+ case '!': /* ops like <, <<, <=, !=, etc */
+ if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
+ *e_token++ = *buf_ptr;
+ if (++buf_ptr >= buf_end)
+ fill_buffer();
+ }
+ if (*buf_ptr == '=')
+ *e_token++ = *buf_ptr++;
+ code = (ps.last_u_d ? unary_op : binary_op);
+ unary_delim = true;
+ break;
+
+ default:
+ if (token[0] == '/' && *buf_ptr == '*') {
+ /* it is start of comment */
+ *e_token++ = '*';
+
+ if (++buf_ptr >= buf_end)
+ fill_buffer();
+
+ code = comment;
+ unary_delim = ps.last_u_d;
+ break;
+ }
+ while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
+ /*
+ * handle ||, &&, etc, and also things as in int *****i
+ */
+ *e_token++ = *buf_ptr;
+ if (++buf_ptr >= buf_end)
+ fill_buffer();
+ }
+ code = (ps.last_u_d ? unary_op : binary_op);
+ unary_delim = true;
+
+
+ } /* end of switch */
+ if (code != newline) {
+ l_struct = false;
+ last_code = code;
+ }
+ if (buf_ptr >= buf_end) /* check for input buffer empty */
+ fill_buffer();
+ ps.last_u_d = unary_delim;
+ *e_token = '\0'; /* null terminate the token */
+ return (code);
+}
+
+/*
+ * Add the given keyword to the keyword table, using val as the keyword type
+ */
+addkey(key, val)
+ char *key;
+{
+ register struct templ *p = specials;
+ while (p->rwd)
+ if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
+ return;
+ else
+ p++;
+ if (p >= specials + sizeof specials / sizeof specials[0])
+ return; /* For now, table overflows are silently
+ * ignored */
+ p->rwd = key;
+ p->rwcode = val;
+ p[1].rwd = 0;
+ p[1].rwcode = 0;
+ return;
+}
OpenPOWER on IntegriCloud