summaryrefslogtreecommitdiffstats
path: root/lib/libc/stdlib/strhash.c
blob: 8de4f3f144360b5d1119083b773c3b21a1d4160e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
#ifndef lint
static const char *rcsid =
"$FreeBSD$";
#endif

/*
 *
 *                      Copyright 1990
 *               Terry Jones & Jordan Hubbard
 *
 *		  PCS Computer Systeme, GmbH.
 *	             Munich, West Germany
 *
 *
 *  All rights reserved.
 *
 *  This is unsupported software and is subject to change without notice.
 *  the author makes no representations about the suitability of this software
 *  for any purpose. It is supplied "as is" without express or implied
 *  warranty.
 *
 *  Permission to use, copy, modify, and distribute this software and its
 *  documentation for any purpose and without fee is hereby granted, provided
 *  that the above copyright notice appear in all copies and that both that
 *  copyright notice and this permission notice appear in supporting
 *  documentation, and that the name of the author not be used in
 *  advertising or publicity pertaining to distribution of the software
 *  without specific, written prior permission.
 *
 */

/*
 * This is a fairly simple open addressing hash scheme.
 * Terry did all the code, I just did the spec.
 * Thanks again, you crazy Aussie..
 *
 */

/*
 * $Log: strhash.c,v $
 * Revision 2.0  90/03/26  01:44:26  jkh
 * pre-beta check-in
 *
 * Revision 1.8  90/03/09  19:22:35  jkh
 * Fixed bogus comment.
 *
 * Revision 1.7  90/03/09  19:01:08  jkh
 * Added comments, GPL.
 *
 * Revision 1.6  90/03/08  17:55:58  terry
 * Rearranged hash_purge to be a tiny bit more efficient.
 * Added verbose option to hash_stats.
 *
 * Revision 1.5  90/03/08  17:19:54  terry
 * Added hash_purge. Added arg to hash_traverse. Changed all
 * void * to Generic.
 *
 * Revision 1.4  90/03/08  12:02:35  terry
 * Fixed problems with allocation that I screwed up last night.
 * Changed bucket lists to be singly linked. Thanks to JKH, my hero.
 *
 * Revision 1.3  90/03/07  21:33:33  terry
 * Cleaned up a few decls to keep gcc -Wall quiet.
 *
 * Revision 1.2  90/03/07  21:14:53  terry
 * Comments. Added HASH_STATS define. Removed hash_find()
 * and new_node().
 *
 * Revision 1.1  90/03/07  20:49:45  terry
 * Initial revision
 *
 *
 */


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <strhash.h>

#define HASH_NULL    (hash_table *)0
#define NODE_NULL    (hash_node *)0
#define GENERIC_NULL (void *)0

#define HASH_SZ 97


static int _hash(int size, char *key);
static hash_node *list_find(caddr_t key, hash_node *head);


/*
 * hash_create()
 *
 * Malloc room for a new hash table and then room for its
 * bucket pointers. Then set all the buckets to
 * point to 0. Return the address of the new table.
 */
hash_table *
hash_create(int size)
{
    register int i;
    hash_table *new = (hash_table *)malloc(sizeof(hash_table));

    if (!new || size < 0){
	return HASH_NULL;
    }

    if (size == 0){
	size = HASH_SZ;
    }

    if (!(new->buckets = (hash_node **)malloc(size * sizeof(hash_node *)))){
	return HASH_NULL;
    }

    for (i = 0; i < size; i++){
	new->buckets[i] = NODE_NULL;
    }
    new->size = size;

    return new;
}


/*
 * list_find()
 *
 * Find the key in the linked list pointed to by head.
 */
static hash_node *
list_find(caddr_t key, hash_node *head)
{
    while (head){
	if (!strcmp(head->key, key)){
	    return head;
	}
	head = head->next;
    }
    return NODE_NULL;
}


/*
 * _hash()
 *
 * Compute the hash value for the given key.
 */
static int
_hash(int size, char *key)
{
    unsigned int h = 0x0;

    while (*key){
	h = (h << 1) ^ (h ^ (unsigned char) *key++);
    }

    h %= size;
    return h;
}

/*
 * hash_destroy()
 *
 * Find the key and (if it's there) remove it entirely.
 * The function (*nukefunc)() is in charge of disposing
 * of the storage help by the data associated with the node.
 */
void
hash_destroy(hash_table *table, char *key, void (*nukefunc)())
{
    int bucket = _hash(table->size, key);
    hash_node *found = table->buckets[bucket];
    hash_node *to_free = NODE_NULL;

    if (!found) {
	return;
    }

    if (!strcmp(found->key, key)) {
	/*
	 * It was the head of the list.
	 */
	table->buckets[bucket] = found->next;
	to_free = found;
    }
    else {
	/*
	 * Walk the list, looking one ahead.
	 */
	while (found->next) {
	    if (!strcmp(found->next->key, key)) {
		to_free = found->next;
		found->next = found->next->next;
		break;
	    }
	    found = found->next;
	}

	if (!to_free){
	    return;
	}
    }

    if (nukefunc)
	(*nukefunc)(to_free->key, to_free->data);
    free(to_free);
    return;
}


/*
 * hash_search()
 *
 * Search the table for the given key. Then:
 *
 * 1) If you find it and there is no replacement function, just
 *    return what you found. (This is a simple search).
 * 2) If you find it and there is a replacement function, run
 *    the function on the data you found, and replace the old
 *    data with whatever is passed in datum. Return 0.
 * 3) If you don't find it and there is some datum, insert a
 *    new item into the table. Insertions go at the front of
 *    the bucket. Return 0.
 * 4) Otherwise just return 0.
 *
 */
void *
hash_search(hash_table *table, caddr_t key, void *datum,
	    void (*replace_func)())
{
    int bucket = _hash(table->size, key);
    hash_node *found = list_find(key, table->buckets[bucket]);

    if (found){
	if (!replace_func){
	    return found->data;
	}
	else{
	    (*replace_func)(found->data);
	    found->data = datum;
	}
    }
    else{
	if (datum){

	    static int assign_key();

	    hash_node *new = (hash_node *)malloc(sizeof(hash_node));

	    if (!new || !assign_key(key, new)){
		return GENERIC_NULL;
	    }
	    new->data = datum;
	    new->next = table->buckets[bucket];
	    table->buckets[bucket] = new;
	    return new;
	}
    }
    return GENERIC_NULL;
}


/*
 * assign_key()
 *
 * Set the key value of a node to be 'key'. Get some space from
 * malloc and copy it in etc. Return 1 if all is well, 0 otherwise.
 */
static int
assign_key(char *key, hash_node *node)
{
    if (!node || !key){
	return 0;
    }

    if (!(node->key = (char *)malloc(strlen(key) + 1))){
	return 0;
    }

    node->key[0] = '\0';
    strcat(node->key, key);
    return 1;
}

/*
 * hash_traverse()
 *
 * Traverse the hash table and run the function func on the
 * data found at each node and the argument we're passed for it.
 */
void
hash_traverse(hash_table *table, int (*func)(), void *arg)
{
    register int i;
    register int size = table->size;

    if (!func)
	return;

    for (i = 0; i < size; i++) {
	hash_node *n = table->buckets[i];
	while (n) {
	    if ((*func)(n->key, n->data, arg) == 0)
		return;
	    n = n->next;
	}
    }
    return;
}

/*
 * hash_purge()
 *
 * Run through the entire hash table. Call purge_func
 * on the data found at each node, and then free the node.
 * Set all the bucket pointers to 0.
 */
void
hash_purge(hash_table *table, void (*purge_func)(char *p1, void *p2))
{
    register int i;
    register int size = table->size;

    for (i = 0; i < size; i++) {
	hash_node *n = table->buckets[i];
	if (n) {
	    do {
		hash_node *to_free = n;
		if (purge_func) {
		    (*purge_func)(n->key, n->data);
		}
		n = n->next;
		free(to_free);
	    } while (n);
	    table->buckets[i] = NODE_NULL;
	}
    }
}

#undef min
#define min(a, b) (a) < (b) ? (a) : (b)

/*
 * hash_stats()
 *
 * Print statistics about the current table allocation to stdout.
 */
void
hash_stats(hash_table *table, int verbose)
{
    register int i;
    int total_elements = 0;
    int non_empty_buckets = 0;
    int max_count = 0;
    int max_repeats = 0;
    int *counts;
    int size = table->size;

    if (!(counts = (int *)malloc(size * sizeof(int)))){
	fprintf(stderr, "malloc returns 0\n");
	exit(1);
    }

    for (i = 0; i < size; i++){
	int x = 0;
	hash_node *n = table->buckets[i];
	counts[i] = 0;
	while (n){
	    if (!x){
		x = 1;
		non_empty_buckets++;
		if (verbose){
		    printf("bucket %2d: ", i);
		}
	    }
	    if (verbose){
		printf(" %s", n->key);
	    }
	    counts[i]++;
	    n = n->next;
	}

	total_elements += counts[i];
	if (counts[i] > max_count){
	    max_count = counts[i];
	    max_repeats = 1;
	}
	else if (counts[i] == max_count){
	    max_repeats++;
	}

	if (counts[i] && verbose){
	    printf(" (%d)\n", counts[i]);
	}
    }

    printf("\n");
    printf("%d element%s in storage.\n", total_elements, total_elements == 1 ? "" : "s");

    if (total_elements){
	printf("%d of %d (%.2f%%) buckets are in use\n", non_empty_buckets, size,
	       (double)100 * (double)non_empty_buckets / (double)(size));
	printf("the maximum number of elements in a bucket is %d (%d times)\n", max_count, max_repeats);
	printf("average per bucket is %f\n", (double)total_elements / (double)non_empty_buckets);
	printf("optimal would be %f\n", (double)total_elements / (double)(min(size, total_elements)));
    }
    return;
}
OpenPOWER on IntegriCloud