diff options
Diffstat (limited to 'contrib/one-true-awk/b.c')
-rw-r--r-- | contrib/one-true-awk/b.c | 78 |
1 files changed, 59 insertions, 19 deletions
diff --git a/contrib/one-true-awk/b.c b/contrib/one-true-awk/b.c index aa2ad15..df3aaa9 100644 --- a/contrib/one-true-awk/b.c +++ b/contrib/one-true-awk/b.c @@ -282,9 +282,24 @@ int quoted(char **pp) /* pick up next thing after a \\ */ return c; } +static int collate_range_cmp(int a, int b) +{ + int r; + static char s[2][2]; + + if ((uschar)a == (uschar)b) + return 0; + s[0][0] = a; + s[1][0] = b; + if ((r = strcoll(s[0], s[1])) == 0) + r = (uschar)a - (uschar)b; + return r; +} + char *cclenter(const char *argp) /* add a character class */ { int i, c, c2; + int j; uschar *p = (uschar *) argp; uschar *op, *bp; static uschar *buf = 0; @@ -303,15 +318,18 @@ char *cclenter(const char *argp) /* add a character class */ c2 = *p++; if (c2 == '\\') c2 = quoted((char **) &p); - if (c > c2) { /* empty; ignore */ + if (collate_range_cmp(c, c2) > 0) { /* empty; ignore */ bp--; i--; continue; } - while (c < c2) { + for (j = 0; j < NCHARS; j++) { + if ((collate_range_cmp(c, j) > 0) || + collate_range_cmp(j, c2) > 0) + continue; if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0)) FATAL("out of space for character class [%.10s...] 2", p); - *bp++ = ++c; + *bp++ = j; i++; } continue; @@ -695,23 +713,39 @@ Node *unary(Node *np) * relex(), the expanded character class (prior to range expansion) * must be less than twice the size of their full name. */ + +/* Because isblank doesn't show up in any of the header files on any + * system i use, it's defined here. if some other locale has a richer + * definition of "blank", define HAS_ISBLANK and provide your own + * version. + */ + +#ifndef HAS_ISBLANK + +int isblank(int c) +{ + return c==' ' || c=='\t'; +} + +#endif + struct charclass { const char *cc_name; int cc_namelen; - const char *cc_expand; + int (*cc_func)(int); } charclasses[] = { - { "alnum", 5, "0-9A-Za-z" }, - { "alpha", 5, "A-Za-z" }, - { "blank", 5, " \t" }, - { "cntrl", 5, "\000-\037\177" }, - { "digit", 5, "0-9" }, - { "graph", 5, "\041-\176" }, - { "lower", 5, "a-z" }, - { "print", 5, " \041-\176" }, - { "punct", 5, "\041-\057\072-\100\133-\140\173-\176" }, - { "space", 5, " \f\n\r\t\v" }, - { "upper", 5, "A-Z" }, - { "xdigit", 6, "0-9A-Fa-f" }, + { "alnum", 5, isalnum }, + { "alpha", 5, isalpha }, + { "blank", 5, isblank }, + { "cntrl", 5, iscntrl }, + { "digit", 5, isdigit }, + { "graph", 5, isgraph }, + { "lower", 5, islower }, + { "print", 5, isprint }, + { "punct", 5, ispunct }, + { "space", 5, isspace }, + { "upper", 5, isupper }, + { "xdigit", 6, isxdigit }, { NULL, 0, NULL }, }; @@ -724,7 +758,7 @@ int relex(void) /* lexical analyzer for reparse */ static int bufsz = 100; uschar *bp; struct charclass *cc; - const uschar *p; + int i; switch (c = *prestr++) { case '|': return OR; @@ -773,8 +807,14 @@ int relex(void) /* lexical analyzer for reparse */ if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' && prestr[2 + cc->cc_namelen] == ']') { prestr += cc->cc_namelen + 3; - for (p = (const uschar *) cc->cc_expand; *p; p++) - *bp++ = *p; + for (i = 0; i < NCHARS; i++) { + if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0)) + FATAL("out of space for reg expr %.10s...", lastre); + if (cc->cc_func(i)) { + *bp++ = i; + n++; + } + } } else *bp++ = c; } else if (c == '\0') { |