diff options
Diffstat (limited to 'gnu/usr.bin/awk/array.c')
-rw-r--r-- | gnu/usr.bin/awk/array.c | 515 |
1 files changed, 0 insertions, 515 deletions
diff --git a/gnu/usr.bin/awk/array.c b/gnu/usr.bin/awk/array.c deleted file mode 100644 index 9166c4e..0000000 --- a/gnu/usr.bin/awk/array.c +++ /dev/null @@ -1,515 +0,0 @@ -/* - * array.c - routines for associative arrays. - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Progamming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GAWK; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * Tree walks (``for (iggy in foo)'') and array deletions use expensive - * linear searching. So what we do is start out with small arrays and - * grow them as needed, so that our arrays are hopefully small enough, - * most of the time, that they're pretty full and we're not looking at - * wasted space. - * - * The decision is made to grow the array if the average chain length is - * ``too big''. This is defined as the total number of entries in the table - * divided by the size of the array being greater than some constant. - */ - -#define AVG_CHAIN_MAX 10 /* don't want to linear search more than this */ - -#include "awk.h" - -static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1)); -static void grow_table P((NODE *symbol)); - -NODE * -concat_exp(tree) -register NODE *tree; -{ - register NODE *r; - char *str; - char *s; - size_t len; - int offset; - size_t subseplen; - char *subsep; - - if (tree->type != Node_expression_list) - return force_string(tree_eval(tree)); - r = force_string(tree_eval(tree->lnode)); - if (tree->rnode == NULL) - return r; - subseplen = SUBSEP_node->lnode->stlen; - subsep = SUBSEP_node->lnode->stptr; - len = r->stlen + subseplen + 2; - emalloc(str, char *, len, "concat_exp"); - memcpy(str, r->stptr, r->stlen+1); - s = str + r->stlen; - free_temp(r); - tree = tree->rnode; - while (tree) { - if (subseplen == 1) - *s++ = *subsep; - else { - memcpy(s, subsep, subseplen+1); - s += subseplen; - } - r = force_string(tree_eval(tree->lnode)); - len += r->stlen + subseplen; - offset = s - str; - erealloc(str, char *, len, "concat_exp"); - s = str + offset; - memcpy(s, r->stptr, r->stlen+1); - s += r->stlen; - free_temp(r); - tree = tree->rnode; - } - r = make_str_node(str, s - str, ALREADY_MALLOCED); - r->flags |= TEMP; - return r; -} - -/* Flush all the values in symbol[] before doing a split() */ -void -assoc_clear(symbol) -NODE *symbol; -{ - int i; - NODE *bucket, *next; - - if (symbol->var_array == 0) - return; - for (i = 0; i < symbol->array_size; i++) { - for (bucket = symbol->var_array[i]; bucket; bucket = next) { - next = bucket->ahnext; - unref(bucket->ahname); - unref(bucket->ahvalue); - freenode(bucket); - } - symbol->var_array[i] = 0; - } - free(symbol->var_array); - symbol->var_array = NULL; - symbol->array_size = symbol->table_size = 0; - symbol->flags &= ~ARRAYMAXED; -} - -/* - * calculate the hash function of the string in subs - */ -unsigned int -hash(s, len, hsize) -register const char *s; -register size_t len; -unsigned long hsize; -{ - register unsigned long h = 0; - -#ifdef this_is_really_slow - - register unsigned long g; - - while (len--) { - h = (h << 4) + *s++; - g = (h & 0xf0000000); - if (g) { - h = h ^ (g >> 24); - h = h ^ g; - } - } - -#else /* this_is_really_slow */ -/* - * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte - * units. On the first time through the loop we get the "leftover bytes" - * (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle - * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If - * this routine is heavily used enough, it's worth the ugly coding. - * - * OZ's original sdbm hash, copied from Margo Seltzers db package. - * - */ - -/* Even more speed: */ -/* #define HASHC h = *s++ + 65599 * h */ -/* Because 65599 = pow(2,6) + pow(2,16) - 1 we multiply by shifts */ -#define HASHC htmp = (h << 6); \ - h = *s++ + htmp + (htmp << 10) - h - - unsigned long htmp; - - h = 0; - -#if defined(VAXC) -/* - * [This was an implementation of "Duff's Device", but it has been - * redone, separating the switch for extra iterations from the loop. - * This is necessary because the DEC VAX-C compiler is STOOPID.] - */ - switch (len & (8 - 1)) { - case 7: HASHC; - case 6: HASHC; - case 5: HASHC; - case 4: HASHC; - case 3: HASHC; - case 2: HASHC; - case 1: HASHC; - default: break; - } - - if (len > (8 - 1)) { - register size_t loop = len >> 3; - do { - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - } while (--loop); - } -#else /* !VAXC */ - /* "Duff's Device" for those who can handle it */ - if (len > 0) { - register size_t loop = (len + 8 - 1) >> 3; - - switch (len & (8 - 1)) { - case 0: - do { /* All fall throughs */ - HASHC; - case 7: HASHC; - case 6: HASHC; - case 5: HASHC; - case 4: HASHC; - case 3: HASHC; - case 2: HASHC; - case 1: HASHC; - } while (--loop); - } - } -#endif /* !VAXC */ -#endif /* this_is_really_slow - not */ - - if (h >= hsize) - h %= hsize; - return h; -} - -/* - * locate symbol[subs] - */ -static NODE * /* NULL if not found */ -assoc_find(symbol, subs, hash1) -NODE *symbol; -register NODE *subs; -int hash1; -{ - register NODE *bucket, *prev = 0; - - for (bucket = symbol->var_array[hash1]; bucket; bucket = bucket->ahnext) { - if (cmp_nodes(bucket->ahname, subs) == 0) { -#if 0 - /* - * Disable this code for now. It screws things up if we have - * a ``for (iggy in foo)'' in progress. Interestingly enough, - * this was not a problem in 2.15.3, only in 2.15.4. I'm not - * sure why it works in 2.15.3. - */ - if (prev) { /* move found to front of chain */ - prev->ahnext = bucket->ahnext; - bucket->ahnext = symbol->var_array[hash1]; - symbol->var_array[hash1] = bucket; - } -#endif - return bucket; - } else - prev = bucket; /* save previous list entry */ - } - return NULL; -} - -/* - * test whether the array element symbol[subs] exists or not - */ -int -in_array(symbol, subs) -NODE *symbol, *subs; -{ - register int hash1; - - if (symbol->type == Node_param_list) - symbol = stack_ptr[symbol->param_cnt]; - if (symbol->var_array == 0) - return 0; - subs = concat_exp(subs); /* concat_exp returns a string node */ - hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size); - if (assoc_find(symbol, subs, hash1) == NULL) { - free_temp(subs); - return 0; - } else { - free_temp(subs); - return 1; - } -} - -/* - * SYMBOL is the address of the node (or other pointer) being dereferenced. - * SUBS is a number or string used as the subscript. - * - * Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it - * isn't there. Returns a pointer ala get_lhs to where its value is stored - */ -NODE ** -assoc_lookup(symbol, subs) -NODE *symbol, *subs; -{ - register int hash1; - register NODE *bucket; - - (void) force_string(subs); - - if (symbol->var_array == 0) { - symbol->type = Node_var_array; - symbol->array_size = symbol->table_size = 0; /* sanity */ - symbol->flags &= ~ARRAYMAXED; - grow_table(symbol); - hash1 = hash(subs->stptr, subs->stlen, - (unsigned long) symbol->array_size); - } else { - hash1 = hash(subs->stptr, subs->stlen, - (unsigned long) symbol->array_size); - bucket = assoc_find(symbol, subs, hash1); - if (bucket != NULL) { - free_temp(subs); - return &(bucket->ahvalue); - } - } - - /* It's not there, install it. */ - if (do_lint && subs->stlen == 0) - warning("subscript of array `%s' is null string", - symbol->vname); - - /* first see if we would need to grow the array, before installing */ - symbol->table_size++; - if ((symbol->flags & ARRAYMAXED) == 0 - && symbol->table_size/symbol->array_size > AVG_CHAIN_MAX) { - grow_table(symbol); - /* have to recompute hash value for new size */ - hash1 = hash(subs->stptr, subs->stlen, - (unsigned long) symbol->array_size); - } - - getnode(bucket); - bucket->type = Node_ahash; - if (subs->flags & TEMP) - bucket->ahname = dupnode(subs); - else { - unsigned int saveflags = subs->flags; - - subs->flags &= ~MALLOC; - bucket->ahname = dupnode(subs); - subs->flags = saveflags; - } - free_temp(subs); - - /* array subscripts are strings */ - bucket->ahname->flags &= ~NUMBER; - bucket->ahname->flags |= STRING; - bucket->ahvalue = Nnull_string; - bucket->ahnext = symbol->var_array[hash1]; - symbol->var_array[hash1] = bucket; - return &(bucket->ahvalue); -} - -void -do_delete(symbol, tree) -NODE *symbol, *tree; -{ - register int hash1; - register NODE *bucket, *last; - NODE *subs; - - if (symbol->type == Node_param_list) - symbol = stack_ptr[symbol->param_cnt]; - if (symbol->var_array == 0) - return; - subs = concat_exp(tree); /* concat_exp returns string node */ - hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size); - - last = NULL; - for (bucket = symbol->var_array[hash1]; bucket; last = bucket, bucket = bucket->ahnext) - if (cmp_nodes(bucket->ahname, subs) == 0) - break; - free_temp(subs); - if (bucket == NULL) - return; - if (last) - last->ahnext = bucket->ahnext; - else - symbol->var_array[hash1] = bucket->ahnext; - unref(bucket->ahname); - unref(bucket->ahvalue); - freenode(bucket); - symbol->table_size--; - if (symbol->table_size <= 0) { - memset(symbol->var_array, '\0', - sizeof(NODE *) * symbol->array_size); - symbol->table_size = symbol->array_size = 0; - symbol->flags &= ~ARRAYMAXED; - free((char *) symbol->var_array); - symbol->var_array = NULL; - } -} - -void -assoc_scan(symbol, lookat) -NODE *symbol; -struct search *lookat; -{ - lookat->sym = symbol; - lookat->idx = 0; - lookat->bucket = NULL; - lookat->retval = NULL; - if (symbol->var_array != NULL) - assoc_next(lookat); -} - -void -assoc_next(lookat) -struct search *lookat; -{ - register NODE *symbol = lookat->sym; - - if (symbol == NULL) - fatal("null symbol in assoc_next"); - if (symbol->var_array == NULL || lookat->idx > symbol->array_size) { - lookat->retval = NULL; - return; - } - /* - * This is theoretically unsafe. The element bucket might have - * been freed if the body of the scan did a delete on the next - * element of the bucket. The only way to do that is by array - * reference, which is unlikely. Basically, if the user is doing - * anything other than an operation on the current element of an - * assoc array while walking through it sequentially, all bets are - * off. (The safe way is to register all search structs on an - * array with the array, and update all of them on a delete or - * insert) - */ - if (lookat->bucket != NULL) { - lookat->retval = lookat->bucket->ahname; - lookat->bucket = lookat->bucket->ahnext; - return; - } - for (; lookat->idx < symbol->array_size; lookat->idx++) { - NODE *bucket; - - if ((bucket = symbol->var_array[lookat->idx]) != NULL) { - lookat->retval = bucket->ahname; - lookat->bucket = bucket->ahnext; - lookat->idx++; - return; - } - } - lookat->retval = NULL; - lookat->bucket = NULL; - return; -} - -/* grow_table --- grow a hash table */ - -static void -grow_table(symbol) -NODE *symbol; -{ - NODE **old, **new, *chain, *next; - int i, j; - unsigned long hash1; - unsigned long oldsize, newsize; - /* - * This is an array of primes. We grow the table by an order of - * magnitude each time (not just doubling) so that growing is a - * rare operation. We expect, on average, that it won't happen - * more than twice. The final size is also chosen to be small - * enough so that MS-DOG mallocs can handle it. When things are - * very large (> 8K), we just double more or less, instead of - * just jumping from 8K to 64K. - */ - static long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497 }; - - /* find next biggest hash size */ - oldsize = symbol->array_size; - newsize = 0; - for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) { - if (oldsize < sizes[i]) { - newsize = sizes[i]; - break; - } - } - - if (newsize == oldsize) { /* table already at max (!) */ - symbol->flags |= ARRAYMAXED; - return; - } - - /* allocate new table */ - emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table"); - memset(new, '\0', newsize * sizeof(NODE *)); - - /* brand new hash table, set things up and return */ - if (symbol->var_array == NULL) { - symbol->table_size = 0; - goto done; - } - - /* old hash table there, move stuff to new, free old */ - old = symbol->var_array; - for (i = 0; i < oldsize; i++) { - if (old[i] == NULL) - continue; - - for (chain = old[i]; chain != NULL; chain = next) { - next = chain->ahnext; - hash1 = hash(chain->ahname->stptr, - chain->ahname->stlen, newsize); - - /* remove from old list, add to new */ - chain->ahnext = new[hash1]; - new[hash1] = chain; - - } - } - free(old); - -done: - /* - * note that symbol->table_size does not change if an old array, - * and is explicitly set to 0 if a new one. - */ - symbol->var_array = new; - symbol->array_size = newsize; -} |