#ifndef lint static char *rcsid = "$Header: /home/ncvs/src/lib/libc/stdlib/strhash.c,v 1.5 1995/10/22 14:53:17 phk Exp $"; #endif /* * * Copyright 1990 * Terry Jones & Jordan Hubbard * * PCS Computer Systeme, GmbH. * Munich, West Germany * * * All rights reserved. * * This is unsupported software and is subject to change without notice. * the author makes no representations about the suitability of this software * for any purpose. It is supplied "as is" without express or implied * warranty. * * Permission to use, copy, modify, and distribute this software and its * documentation for any purpose and without fee is hereby granted, provided * that the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation, and that the name of the author not be used in * advertising or publicity pertaining to distribution of the software * without specific, written prior permission. * */ /* * This is a fairly simple open addressing hash scheme. * Terry did all the code, I just did the spec. * Thanks again, you crazy Aussie.. * */ /* * $Log: strhash.c,v $ * Revision 1.5 1995/10/22 14:53:17 phk * Mino cleanup, #includes & unused vars. * * Revision 1.4 1995/05/30 05:41:55 rgrimes * Remove trailing whitespace. * * Revision 1.3 1995/03/28 08:41:02 jkh * Fix a missing _hash() to prevent namespace pollution with the db/hash routines. * Grrr. If the dbhash routines weren't grossly overengineered I wouldn't * even need to do this! :-( * * Also now export the hash_stats routine. Manpage coming RSN - I promise. * * Revision 1.2 1995/03/26 19:32:24 ache * Hash 8bit chars without sign extension * * Revision 1.1 1995/03/26 10:21:55 jkh * Add the strhash family of routines. They provide a number of features * that the db/hash functions don't, and they're much simpler to use for * low-overhead string hashing. * * Revision 1.1 1995/02/25 02:16:34 jkh * Second version of this - now support the essentials of a basic * attributed file system for storing menu information and command * templates. This is not finished yet, but it does compile so I can * commit it to the tree now and continue working on it. * * Revision 2.0 90/03/26 01:44:26 jkh * pre-beta check-in * * Revision 1.8 90/03/09 19:22:35 jkh * Fixed bogus comment. * * Revision 1.7 90/03/09 19:01:08 jkh * Added comments, GPL. * * Revision 1.6 90/03/08 17:55:58 terry * Rearranged hash_purge to be a tiny bit more efficient. * Added verbose option to hash_stats. * * Revision 1.5 90/03/08 17:19:54 terry * Added hash_purge. Added arg to hash_traverse. Changed all * void * to Generic. * * Revision 1.4 90/03/08 12:02:35 terry * Fixed problems with allocation that I screwed up last night. * Changed bucket lists to be singly linked. Thanks to JKH, my hero. * * Revision 1.3 90/03/07 21:33:33 terry * Cleaned up a few decls to keep gcc -Wall quiet. * * Revision 1.2 90/03/07 21:14:53 terry * Comments. Added HASH_STATS define. Removed hash_find() * and new_node(). * * Revision 1.1 90/03/07 20:49:45 terry * Initial revision * * */ #include #include #include #include #include #define HASH_NULL (hash_table *)0 #define NODE_NULL (hash_node *)0 #define GENERIC_NULL (void *)0 #define HASH_SZ 97 static int _hash(int size, char *key); static hash_node *list_find(caddr_t key, hash_node *head); /* * hash_create() * * Malloc room for a new hash table and then room for its * bucket pointers. Then set all the buckets to * point to 0. Return the address of the new table. */ hash_table * hash_create(int size) { register int i; hash_table *new = (hash_table *)malloc(sizeof(hash_table)); if (!new || size < 0){ return HASH_NULL; } if (size == 0){ size = HASH_SZ; } if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){ return HASH_NULL; } for (i = 0; i < size; i++){ new->buckets[i] = NODE_NULL; } new->size = size; return new; } /* * list_find() * * Find the key in the linked list pointed to by head. */ static hash_node * list_find(caddr_t key, hash_node *head) { while (head){ if (!strcmp(head->key, key)){ return head; } head = head->next; } return NODE_NULL; } /* * _hash() * * Compute the hash value for the given key. */ static int _hash(int size, char *key) { unsigned int h = 0x0; while (*key){ h = (h << 1) ^ (h ^ (unsigned char) *key++); } h %= size; return h; } /* * hash_destroy() * * Find the key and (if it's there) remove it entirely. * The function (*nukefunc)() is in charge of disposing * of the storage help by the data associated with the node. */ void hash_destroy(hash_table *table, char *key, void (*nukefunc)()) { int bucket = _hash(table->size, key); hash_node *found = table->buckets[bucket]; hash_node *to_free = NODE_NULL; if (!found) { return; } if (!strcmp(found->key, key)) { /* * It was the head of the list. */ table->buckets[bucket] = found->next; to_free = found; } else { /* * Walk the list, looking one ahead. */ while (found->next) { if (!strcmp(found->next->key, key)) { to_free = found->next; found->next = found->next->next; break; } found = found->next; } if (!to_free){ return; } } if (nukefunc) (*nukefunc)(to_free->key, to_free->data); free(to_free); return; } /* * hash_search() * * Search the table for the given key. Then: * * 1) If you find it and there is no replacement function, just * return what you found. (This is a simple search). * 2) If you find it and there is a replacement function, run * the function on the data you found, and replace the old * data with whatever is passed in datum. Return 0. * 3) If you don't find it and there is some datum, insert a * new item into the table. Insertions go at the front of * the bucket. Return 0. * 4) Otherwise just return 0. * */ void * hash_search(hash_table *table, caddr_t key, void *datum, void (*replace_func)()) { int bucket = _hash(table->size, key); hash_node *found = list_find(key, table->buckets[bucket]); if (found){ if (!replace_func){ return found->data; } else{ (*replace_func)(found->data); found->data = datum; } } else{ if (datum){ static int assign_key(); hash_node *new = (hash_node *)malloc(sizeof(hash_node)); if (!new || !assign_key(key, new)){ return GENERIC_NULL; } new->data = datum; new->next = table->buckets[bucket]; table->buckets[bucket] = new; return new; } } return GENERIC_NULL; } /* * assign_key() * * Set the key value of a node to be 'key'. Get some space from * malloc and copy it in etc. Return 1 if all is well, 0 otherwise. */ static int assign_key(char *key, hash_node *node) { if (!node || !key){ return 0; } if (!(node->key = (char *)malloc(strlen(key) + 1))){ return 0; } node->key[0] = '\0'; strcat(node->key, key); return 1; } /* * hash_traverse() * * Traverse the hash table and run the function func on the * data found at each node and the argument we're passed for it. */ void hash_traverse(hash_table *table, int (*func)(), void *arg) { register int i; register int size = table->size; if (!func) return; for (i = 0; i < size; i++) { hash_node *n = table->buckets[i]; while (n) { if ((*func)(n->key, n->data, arg) == 0) return; n = n->next; } } return; } /* * hash_purge() * * Run through the entire hash table. Call purge_func * on the data found at each node, and then free the node. * Set all the bucket pointers to 0. */ void hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2)) { register int i; register int size = table->size; for (i = 0; i < size; i++) { hash_node *n = table->buckets[i]; if (n) { do { hash_node *to_free = n; if (purge_func) { (*purge_func)(n->key, n->data); } n = n->next; free(to_free); } while (n); table->buckets[i] = NODE_NULL; } } } #undef min #define min(a, b) (a) < (b) ? (a) : (b) /* * hash_stats() * * Print statistics about the current table allocation to stdout. */ void hash_stats(hash_table *table, int verbose) { register int i; int total_elements = 0; int non_empty_buckets = 0; int max_count = 0; int max_repeats = 0; int *counts; int size = table->size; if (!(counts = (int *)malloc(size * sizeof(int)))){ fprintf(stderr, "malloc returns 0\n"); exit(1); } for (i = 0; i < size; i++){ int x = 0; hash_node *n = table->buckets[i]; counts[i] = 0; while (n){ if (!x){ x = 1; non_empty_buckets++; if (verbose){ printf("bucket %2d: ", i); } } if (verbose){ printf(" %s", n->key); } counts[i]++; n = n->next; } total_elements += counts[i]; if (counts[i] > max_count){ max_count = counts[i]; max_repeats = 1; } else if (counts[i] == max_count){ max_repeats++; } if (counts[i] && verbose){ printf(" (%d)\n", counts[i]); } } printf("\n"); printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s"); if (total_elements){ printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size, (double)100 * (double)non_empty_buckets / (double)(size)); printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats); printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets); printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements))); } return; }