summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorjkh <jkh@FreeBSD.org>1995-03-26 10:21:55 +0000
committerjkh <jkh@FreeBSD.org>1995-03-26 10:21:55 +0000
commit8851a8c5ccaef8ebb9a76b5a5ae05260acf1987e (patch)
treeb02a52631e83f2411bed1eb2060d437826e9dcbb /lib
parent00fee885a61fb4de883f0a94ae38b8d9971d7439 (diff)
downloadFreeBSD-src-8851a8c5ccaef8ebb9a76b5a5ae05260acf1987e.zip
FreeBSD-src-8851a8c5ccaef8ebb9a76b5a5ae05260acf1987e.tar.gz
Add the strhash family of routines. They provide a number of features
that the db/hash functions don't, and they're much simpler to use for low-overhead string hashing.
Diffstat (limited to 'lib')
-rw-r--r--lib/libc/stdlib/Makefile.inc4
-rw-r--r--lib/libc/stdlib/strhash.c414
2 files changed, 416 insertions, 2 deletions
diff --git a/lib/libc/stdlib/Makefile.inc b/lib/libc/stdlib/Makefile.inc
index 0185680..de3d864 100644
--- a/lib/libc/stdlib/Makefile.inc
+++ b/lib/libc/stdlib/Makefile.inc
@@ -4,8 +4,8 @@
.PATH: ${.CURDIR}/${MACHINE}/stdlib ${.CURDIR}/stdlib
SRCS+= abort.c atexit.c atof.c atoi.c atol.c bsearch.c calloc.c div.c \
- exit.c getenv.c getopt.c getsubopt.c heapsort.c labs.c ldiv.c \
- malloc.c merge.c putenv.c qsort.c radixsort.c rand.c random.c \
+ exit.c getenv.c getopt.c getsubopt.c strhash.c heapsort.c labs.c \
+ ldiv.c malloc.c merge.c putenv.c qsort.c radixsort.c rand.c random.c \
realpath.c setenv.c strtod.c strtol.c strtoq.c strtoul.c \
strtouq.c system.c
diff --git a/lib/libc/stdlib/strhash.c b/lib/libc/stdlib/strhash.c
new file mode 100644
index 0000000..04a97d6
--- /dev/null
+++ b/lib/libc/stdlib/strhash.c
@@ -0,0 +1,414 @@
+#ifndef lint
+static char *rcsid = "$Header: /home/ncvs/src/usr.bin/dmenu/hash.c,v 1.1 1995/02/25 02:16:34 jkh Exp $";
+#endif
+
+/*
+ *
+ * Copyright 1990
+ * Terry Jones & Jordan Hubbard
+ *
+ * PCS Computer Systeme, GmbH.
+ * Munich, West Germany
+ *
+ *
+ * All rights reserved.
+ *
+ * This is unsupported software and is subject to change without notice.
+ * the author makes no representations about the suitability of this software
+ * for any purpose. It is supplied "as is" without express or implied
+ * warranty.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose and without fee is hereby granted, provided
+ * that the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of the author not be used in
+ * advertising or publicity pertaining to distribution of the software
+ * without specific, written prior permission.
+ *
+ */
+
+/*
+ * This is a fairly simple open addressing hash scheme.
+ * Terry did all the code, I just did the spec.
+ * Thanks again, you crazy Aussie..
+ *
+ */
+
+/*
+ * $Log: hash.c,v $
+ * Revision 1.1 1995/02/25 02:16:34 jkh
+ * Second version of this - now support the essentials of a basic
+ * attributed file system for storing menu information and command
+ * templates. This is not finished yet, but it does compile so I can
+ * commit it to the tree now and continue working on it.
+ *
+ * Revision 2.0 90/03/26 01:44:26 jkh
+ * pre-beta check-in
+ *
+ * Revision 1.8 90/03/09 19:22:35 jkh
+ * Fixed bogus comment.
+ *
+ * Revision 1.7 90/03/09 19:01:08 jkh
+ * Added comments, GPL.
+ *
+ * Revision 1.6 90/03/08 17:55:58 terry
+ * Rearranged hash_purge to be a tiny bit more efficient.
+ * Added verbose option to hash_stats.
+ *
+ * Revision 1.5 90/03/08 17:19:54 terry
+ * Added hash_purge. Added arg to hash_traverse. Changed all
+ * void * to Generic.
+ *
+ * Revision 1.4 90/03/08 12:02:35 terry
+ * Fixed problems with allocation that I screwed up last night.
+ * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
+ *
+ * Revision 1.3 90/03/07 21:33:33 terry
+ * Cleaned up a few decls to keep gcc -Wall quiet.
+ *
+ * Revision 1.2 90/03/07 21:14:53 terry
+ * Comments. Added HASH_STATS define. Removed hash_find()
+ * and new_node().
+ *
+ * Revision 1.1 90/03/07 20:49:45 terry
+ * Initial revision
+ *
+ *
+ */
+
+
+#include <string.h>
+#include <sys/types.h>
+#include <strhash.h>
+
+#define HASH_NULL (hash_table *)0
+#define NODE_NULL (hash_node *)0
+#define GENERIC_NULL (void *)0
+
+#define HASH_SZ 97
+
+
+static int _hash(int size, char *key);
+static hash_node *list_find(caddr_t key, hash_node *head);
+
+
+/*
+ * hash_create()
+ *
+ * Malloc room for a new hash table and then room for its
+ * bucket pointers. Then set all the buckets to
+ * point to 0. Return the address of the new table.
+ */
+hash_table *
+hash_create(int size)
+{
+ register int i;
+ hash_table *new = (hash_table *)malloc(sizeof(hash_table));
+
+ if (!new || size < 0){
+ return HASH_NULL;
+ }
+
+ if (size == 0){
+ size = HASH_SZ;
+ }
+
+ if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
+ return HASH_NULL;
+ }
+
+ for (i = 0; i < size; i++){
+ new->buckets[i] = NODE_NULL;
+ }
+ new->size = size;
+
+ return new;
+}
+
+
+/*
+ * list_find()
+ *
+ * Find the key in the linked list pointed to by head.
+ */
+static hash_node *
+list_find(caddr_t key, hash_node *head)
+{
+ while (head){
+ if (!strcmp(head->key, key)){
+ return head;
+ }
+ head = head->next;
+ }
+ return NODE_NULL;
+}
+
+
+/*
+ * _hash()
+ *
+ * Compute the hash value for the given key.
+ */
+static int
+_hash(int size, char *key)
+{
+ unsigned int h = 0x0;
+
+ while (*key){
+ h = (h << 1) ^ (h ^ *key++);
+ }
+
+ h %= size;
+ return h;
+}
+
+/*
+ * hash_destroy()
+ *
+ * Find the key and (if it's there) remove it entirely.
+ * The function (*nukefunc)() is in charge of disposing
+ * of the storage help by the data associated with the node.
+ */
+void
+hash_destroy(hash_table *table, char *key, void (*nukefunc)())
+{
+ int bucket = _hash(table->size, key);
+ hash_node *found = table->buckets[bucket];
+ hash_node *to_free = NODE_NULL;
+
+ if (!found) {
+ return;
+ }
+
+ if (!strcmp(found->key, key)) {
+ /*
+ * It was the head of the list.
+ */
+ table->buckets[bucket] = found->next;
+ to_free = found;
+ }
+ else {
+ /*
+ * Walk the list, looking one ahead.
+ */
+ while (found->next) {
+ if (!strcmp(found->next->key, key)) {
+ to_free = found->next;
+ found->next = found->next->next;
+ break;
+ }
+ found = found->next;
+ }
+
+ if (!to_free){
+ return;
+ }
+ }
+
+ if (nukefunc)
+ (*nukefunc)(to_free->key, to_free->data);
+ free(to_free);
+ return;
+}
+
+
+/*
+ * hash_search()
+ *
+ * Search the table for the given key. Then:
+ *
+ * 1) If you find it and there is no replacement function, just
+ * return what you found. (This is a simple search).
+ * 2) If you find it and there is a replacement function, run
+ * the function on the data you found, and replace the old
+ * data with whatever is passed in datum. Return 0.
+ * 3) If you don't find it and there is some datum, insert a
+ * new item into the table. Insertions go at the front of
+ * the bucket. Return 0.
+ * 4) Otherwise just return 0.
+ *
+ */
+void *
+hash_search(hash_table *table, caddr_t key, void *datum,
+ void (*replace_func)())
+{
+ int bucket = hash(table->size, key);
+ hash_node *found = list_find(key, table->buckets[bucket]);
+
+ if (found){
+ if (!replace_func){
+ return found->data;
+ }
+ else{
+ (*replace_func)(found->data);
+ found->data = datum;
+ }
+ }
+ else{
+ if (datum){
+
+ static int assign_key();
+
+ hash_node *new = (hash_node *)malloc(sizeof(hash_node));
+
+ if (!new || !assign_key(key, new)){
+ return GENERIC_NULL;
+ }
+ new->data = datum;
+ new->next = table->buckets[bucket];
+ table->buckets[bucket] = new;
+ }
+ }
+ return GENERIC_NULL;
+}
+
+
+/*
+ * assign_key()
+ *
+ * Set the key value of a node to be 'key'. Get some space from
+ * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
+ */
+static int
+assign_key(char *key, hash_node *node)
+{
+ if (!node || !key){
+ return 0;
+ }
+
+ if (!(node->key = (char *)malloc(strlen(key) + 1))){
+ return 0;
+ }
+
+ node->key[0] = '\0';
+ strcat(node->key, key);
+ return 1;
+}
+
+/*
+ * hash_traverse()
+ *
+ * Traverse the hash table and run the function func on the
+ * data found at each node and the argument we're passed for it.
+ */
+void
+hash_traverse(hash_table *table, int (*func)(), void *arg)
+{
+ register int i;
+ register int size = table->size;
+
+ if (!func)
+ return;
+
+ for (i = 0; i < size; i++) {
+ hash_node *n = table->buckets[i];
+ while (n) {
+ if ((*func)(n->key, n->data, arg) == 0)
+ return;
+ n = n->next;
+ }
+ }
+ return;
+}
+
+/*
+ * hash_purge()
+ *
+ * Run through the entire hash table. Call purge_func
+ * on the data found at each node, and then free the node.
+ * Set all the bucket pointers to 0.
+ */
+void
+hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
+{
+ register int i;
+ register int size = table->size;
+
+ for (i = 0; i < size; i++) {
+ hash_node *n = table->buckets[i];
+ if (n) {
+ do {
+ hash_node *to_free = n;
+ if (purge_func) {
+ (*purge_func)(n->key, n->data);
+ }
+ n = n->next;
+ free(to_free);
+ } while (n);
+ table->buckets[i] = NODE_NULL;
+ }
+ }
+}
+
+#ifdef HASH_STATS
+#include <stdio.h>
+#define min(a, b) (a) < (b) ? (a) : (b)
+
+/*
+ * hash_stats()
+ *
+ * Print statistics about the current table allocation to stdout.
+ */
+void
+hash_stats(hash_table *table, int verbose)
+{
+ register int i;
+ int total_elements = 0;
+ int non_empty_buckets = 0;
+ int max_count = 0;
+ int max_repeats = 0;
+ int *counts;
+ int size = table->size;
+
+ if (!(counts = (int *)malloc(size * sizeof(int)))){
+ fprintf(stderr, "malloc returns 0\n");
+ exit(1);
+ }
+
+ for (i = 0; i < size; i++){
+ int x = 0;
+ hash_node *n = table->buckets[i];
+ counts[i] = 0;
+ while (n){
+ if (!x){
+ x = 1;
+ non_empty_buckets++;
+ if (verbose){
+ printf("bucket %2d: ", i);
+ }
+ }
+ if (verbose){
+ printf(" %s", n->key);
+ }
+ counts[i]++;
+ n = n->next;
+ }
+
+ total_elements += counts[i];
+ if (counts[i] > max_count){
+ max_count = counts[i];
+ max_repeats = 1;
+ }
+ else if (counts[i] == max_count){
+ max_repeats++;
+ }
+
+ if (counts[i] && verbose){
+ printf(" (%d)\n", counts[i]);
+ }
+ }
+
+ printf("\n");
+ printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");
+
+ if (total_elements){
+ printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
+ (double)100 * (double)non_empty_buckets / (double)(size));
+ printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
+ printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
+ printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
+ }
+ return;
+}
+#endif /* HASH_STATS */
OpenPOWER on IntegriCloud