diff options
Diffstat (limited to 'lib/libc/db')
71 files changed, 40026 insertions, 0 deletions
diff --git a/lib/libc/db/Makefile.inc b/lib/libc/db/Makefile.inc new file mode 100644 index 0000000..77af9c5 --- /dev/null +++ b/lib/libc/db/Makefile.inc @@ -0,0 +1,10 @@ +# @(#)Makefile.inc 8.2 (Berkeley) 2/21/94 +# +CFLAGS+=-D__DBINTERFACE_PRIVATE + +.include "${.CURDIR}/db/btree/Makefile.inc" +.include "${.CURDIR}/db/db/Makefile.inc" +.include "${.CURDIR}/db/hash/Makefile.inc" +.include "${.CURDIR}/db/man/Makefile.inc" +.include "${.CURDIR}/db/mpool/Makefile.inc" +.include "${.CURDIR}/db/recno/Makefile.inc" diff --git a/lib/libc/db/VERSION b/lib/libc/db/VERSION new file mode 100644 index 0000000..be08b78 --- /dev/null +++ b/lib/libc/db/VERSION @@ -0,0 +1,97 @@ +# @(#)VERSION 8.9 (Berkeley) 4/2/94 + +This is version 1.74 of the Berkeley DB code. + +If your version of the DB code doesn't have a copy +of this version file, it's really old, please update it! + +New versions of this software are periodically made +available by anonymous ftp from ftp.cs.berkeley.edu, +in the file ucb/4bsd/db.tar.Z, or from ftp.uu.net. +If you'd like to get announcements of future releases +of this software, send email to the contact address +below. + +Email questions may be addressed to Keith Bostic at +bostic@cs.berkeley.edu. + +============================================ +Distribution contents: + +Makefile.inc Ignore this, it's the 4.4BSD subsystem Makefile. +PORT The per OS/architecture directories to use to build + libdb.a, if you're not running 4.4BSD. See the file + PORT/README for more information. +README This file. +VERSION This file. +btree B+tree routines. +db Dbopen(3) interface routine. +doc USENIX papers, formatted manual pages. +hash Extended linear hashing routines. +man Unformatted manual pages. +mpool Memory pool routines. +recno Fixed/variable length record routines. +test Test package. + +============================================ +1.73 -> 1.74 + recno: Don't put the record if rec_search fails, in rec_rdelete. + Create fixed-length intermediate records past "end" of DB + correctly. + Realloc bug when reading in fixed records. + all: First cut at port to Alpha (64-bit architecture) using + 4.4BSD basic integral types typedef's. + Cast allocation pointers to shut up old compilers. + Rework PORT directory into OS/machine directories. + +1.72 -> 1.73 + btree: If enough duplicate records were inserted and then deleted + that internal pages had references to empty pages of the + duplicate keys, the search function ended up on the wrong + page. + +1.7 -> 1.72 12 Oct 1993 + hash: Support NET/2 hash formats. + +1.7 -> 1.71 16 Sep 1993 + btree/recno: + Fix bug in internal search routines that caused + return of invalid pointers. + +1.6 -> 1.7 07 Sep 1993 + hash: Fixed big key overflow bugs. + test: Portability hacks, rewrite test script, Makefile. + btree/recno: + Stop copying non-overflow key/data pairs. + PORT: Break PORT directory up into per architecture/OS + subdirectories. + +1.5 -> 1.6 06 Jun 1993 + hash: In PAIRFITS, the first comparison should look at (P)[2]. + The hash_realloc function was walking off the end of memory. + The overflow page number was wrong when bumping splitpoint. + +1.4 -> 1.5 23 May 1993 + hash: Set hash default fill factor dynamically. + recno: Fixed bug in sorted page splits. + Add page size parameter support. + Allow recno to specify the name of the underlying btree; + used for vi recovery. + btree/recno: + Support 64K pages. + btree/hash/recno: + Provide access to an underlying file descriptor. + Change sync routines to take a flag argument, recno + uses this to sync out the underlying btree. + +1.3 -> 1.4 10 May 1993 + recno: Delete the R_CURSORLOG flag from the recno interface. + Zero-length record fix for non-mmap reads. + Try and make SIZE_T_MAX test in open portable. + +1.2 -> 1.3 01 May 1993 + btree: Ignore user byte-order setting when reading already + existing database. Fixes to byte-order conversions. + +1.1 -> 1.2 15 Apr 1993 + No bug fixes, only compatibility hacks. diff --git a/lib/libc/db/btree/Makefile.inc b/lib/libc/db/btree/Makefile.inc new file mode 100644 index 0000000..71f8dfc --- /dev/null +++ b/lib/libc/db/btree/Makefile.inc @@ -0,0 +1,7 @@ +# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 + +.PATH: ${.CURDIR}/db/btree + +SRCS+= bt_close.c bt_conv.c bt_debug.c bt_delete.c bt_get.c bt_open.c \ + bt_overflow.c bt_page.c bt_put.c bt_search.c bt_seq.c bt_split.c \ + bt_stack.c bt_utils.c diff --git a/lib/libc/db/btree/bt_close.c b/lib/libc/db/btree/bt_close.c new file mode 100644 index 0000000..66d8fc1 --- /dev/null +++ b/lib/libc/db/btree/bt_close.c @@ -0,0 +1,204 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_close.c 8.3 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <db.h> +#include "btree.h" + +static int bt_meta __P((BTREE *)); + +/* + * BT_CLOSE -- Close a btree. + * + * Parameters: + * dbp: pointer to access method + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__bt_close(dbp) + DB *dbp; +{ + BTREE *t; + int fd; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* + * Delete any already deleted record that we've been saving + * because the cursor pointed to it. + */ + if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor)) + return (RET_ERROR); + + if (__bt_sync(dbp, 0) == RET_ERROR) + return (RET_ERROR); + + if (mpool_close(t->bt_mp) == RET_ERROR) + return (RET_ERROR); + + if (t->bt_stack) + free(t->bt_stack); + if (t->bt_kbuf) + free(t->bt_kbuf); + if (t->bt_dbuf) + free(t->bt_dbuf); + + fd = t->bt_fd; + free(t); + free(dbp); + return (close(fd) ? RET_ERROR : RET_SUCCESS); +} + +/* + * BT_SYNC -- sync the btree to disk. + * + * Parameters: + * dbp: pointer to access method + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__bt_sync(dbp, flags) + const DB *dbp; + u_int flags; +{ + BTREE *t; + int status; + PAGE *h; + void *p; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* Sync doesn't currently take any flags. */ + if (flags != 0) { + errno = EINVAL; + return (RET_ERROR); + } + + if (ISSET(t, B_INMEM | B_RDONLY) || !ISSET(t, B_MODIFIED)) + return (RET_SUCCESS); + + if (ISSET(t, B_METADIRTY) && bt_meta(t) == RET_ERROR) + return (RET_ERROR); + + /* + * Nastiness. If the cursor has been marked for deletion, but not + * actually deleted, we have to make a copy of the page, delete the + * key/data item, sync the file, and then restore the original page + * contents. + */ + if (ISSET(t, B_DELCRSR)) { + if ((p = (void *)malloc(t->bt_psize)) == NULL) + return (RET_ERROR); + if ((h = mpool_get(t->bt_mp, t->bt_bcursor.pgno, 0)) == NULL) + return (RET_ERROR); + memmove(p, h, t->bt_psize); + if ((status = + __bt_dleaf(t, h, t->bt_bcursor.index)) == RET_ERROR) + goto ecrsr; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + } + + if ((status = mpool_sync(t->bt_mp)) == RET_SUCCESS) + CLR(t, B_MODIFIED); + +ecrsr: if (ISSET(t, B_DELCRSR)) { + if ((h = mpool_get(t->bt_mp, t->bt_bcursor.pgno, 0)) == NULL) + return (RET_ERROR); + memmove(h, p, t->bt_psize); + free(p); + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + } + return (status); +} + +/* + * BT_META -- write the tree meta data to disk. + * + * Parameters: + * t: tree + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +static int +bt_meta(t) + BTREE *t; +{ + BTMETA m; + void *p; + + if ((p = mpool_get(t->bt_mp, P_META, 0)) == NULL) + return (RET_ERROR); + + /* Fill in metadata. */ + m.m_magic = BTREEMAGIC; + m.m_version = BTREEVERSION; + m.m_psize = t->bt_psize; + m.m_free = t->bt_free; + m.m_nrecs = t->bt_nrecs; + m.m_flags = t->bt_flags & SAVEMETA; + + memmove(p, &m, sizeof(BTMETA)); + mpool_put(t->bt_mp, p, MPOOL_DIRTY); + return (RET_SUCCESS); +} diff --git a/lib/libc/db/btree/bt_conv.c b/lib/libc/db/btree/bt_conv.c new file mode 100644 index 0000000..eb49340 --- /dev/null +++ b/lib/libc/db/btree/bt_conv.c @@ -0,0 +1,221 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_conv.c 8.2 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <stdio.h> + +#include <db.h> +#include "btree.h" + +static void mswap __P((PAGE *)); + +/* + * __BT_BPGIN, __BT_BPGOUT -- + * Convert host-specific number layout to/from the host-independent + * format stored on disk. + * + * Parameters: + * t: tree + * pg: page number + * h: page to convert + */ +void +__bt_pgin(t, pg, pp) + void *t; + pgno_t pg; + void *pp; +{ + PAGE *h; + indx_t i, top; + u_char flags; + char *p; + + if (!ISSET(((BTREE *)t), B_NEEDSWAP)) + return; + if (pg == P_META) { + mswap(pp); + return; + } + + h = pp; + M_32_SWAP(h->pgno); + M_32_SWAP(h->prevpg); + M_32_SWAP(h->nextpg); + M_32_SWAP(h->flags); + M_16_SWAP(h->lower); + M_16_SWAP(h->upper); + + top = NEXTINDEX(h); + if ((h->flags & P_TYPE) == P_BINTERNAL) + for (i = 0; i < top; i++) { + M_16_SWAP(h->linp[i]); + p = (char *)GETBINTERNAL(h, i); + P_32_SWAP(p); + p += sizeof(size_t); + P_32_SWAP(p); + p += sizeof(pgno_t); + if (*(u_char *)p & P_BIGKEY) { + p += sizeof(u_char); + P_32_SWAP(p); + p += sizeof(pgno_t); + P_32_SWAP(p); + } + } + else if ((h->flags & P_TYPE) == P_BLEAF) + for (i = 0; i < top; i++) { + M_16_SWAP(h->linp[i]); + p = (char *)GETBLEAF(h, i); + P_32_SWAP(p); + p += sizeof(size_t); + P_32_SWAP(p); + p += sizeof(size_t); + flags = *(u_char *)p; + if (flags & (P_BIGKEY | P_BIGDATA)) { + p += sizeof(u_char); + if (flags & P_BIGKEY) { + P_32_SWAP(p); + p += sizeof(pgno_t); + P_32_SWAP(p); + } + if (flags & P_BIGDATA) { + p += sizeof(size_t); + P_32_SWAP(p); + p += sizeof(pgno_t); + P_32_SWAP(p); + } + } + } +} + +void +__bt_pgout(t, pg, pp) + void *t; + pgno_t pg; + void *pp; +{ + PAGE *h; + indx_t i, top; + u_char flags; + char *p; + + if (!ISSET(((BTREE *)t), B_NEEDSWAP)) + return; + if (pg == P_META) { + mswap(pp); + return; + } + + h = pp; + top = NEXTINDEX(h); + if ((h->flags & P_TYPE) == P_BINTERNAL) + for (i = 0; i < top; i++) { + p = (char *)GETBINTERNAL(h, i); + P_32_SWAP(p); + p += sizeof(size_t); + P_32_SWAP(p); + p += sizeof(pgno_t); + if (*(u_char *)p & P_BIGKEY) { + p += sizeof(u_char); + P_32_SWAP(p); + p += sizeof(pgno_t); + P_32_SWAP(p); + } + M_16_SWAP(h->linp[i]); + } + else if ((h->flags & P_TYPE) == P_BLEAF) + for (i = 0; i < top; i++) { + p = (char *)GETBLEAF(h, i); + P_32_SWAP(p); + p += sizeof(size_t); + P_32_SWAP(p); + p += sizeof(size_t); + flags = *(u_char *)p; + if (flags & (P_BIGKEY | P_BIGDATA)) { + p += sizeof(u_char); + if (flags & P_BIGKEY) { + P_32_SWAP(p); + p += sizeof(pgno_t); + P_32_SWAP(p); + } + if (flags & P_BIGDATA) { + p += sizeof(size_t); + P_32_SWAP(p); + p += sizeof(pgno_t); + P_32_SWAP(p); + } + } + M_16_SWAP(h->linp[i]); + } + + M_32_SWAP(h->pgno); + M_32_SWAP(h->prevpg); + M_32_SWAP(h->nextpg); + M_32_SWAP(h->flags); + M_16_SWAP(h->lower); + M_16_SWAP(h->upper); +} + +/* + * MSWAP -- Actually swap the bytes on the meta page. + * + * Parameters: + * p: page to convert + */ +static void +mswap(pg) + PAGE *pg; +{ + char *p; + + p = (char *)pg; + P_32_SWAP(p); /* m_magic */ + p += sizeof(u_int32_t); + P_32_SWAP(p); /* m_version */ + p += sizeof(u_int32_t); + P_32_SWAP(p); /* m_psize */ + p += sizeof(u_int32_t); + P_32_SWAP(p); /* m_free */ + p += sizeof(u_int32_t); + P_32_SWAP(p); /* m_nrecs */ + p += sizeof(u_int32_t); + P_32_SWAP(p); /* m_flags */ + p += sizeof(u_int32_t); +} diff --git a/lib/libc/db/btree/bt_debug.c b/lib/libc/db/btree/bt_debug.c new file mode 100644 index 0000000..5f9ac1d --- /dev/null +++ b/lib/libc/db/btree/bt_debug.c @@ -0,0 +1,331 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_debug.c 8.2 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <db.h> +#include "btree.h" + +#ifdef DEBUG +/* + * BT_DUMP -- Dump the tree + * + * Parameters: + * dbp: pointer to the DB + */ +void +__bt_dump(dbp) + DB *dbp; +{ + BTREE *t; + PAGE *h; + pgno_t i; + char *sep; + + t = dbp->internal; + (void)fprintf(stderr, "%s: pgsz %d", + ISSET(t, B_INMEM) ? "memory" : "disk", t->bt_psize); + if (ISSET(t, R_RECNO)) + (void)fprintf(stderr, " keys %lu", t->bt_nrecs); +#undef X +#define X(flag, name) \ + if (ISSET(t, flag)) { \ + (void)fprintf(stderr, "%s%s", sep, name); \ + sep = ", "; \ + } + if (t->bt_flags) { + sep = " flags ("; + X(B_DELCRSR, "DELCRSR"); + X(R_FIXLEN, "FIXLEN"); + X(B_INMEM, "INMEM"); + X(B_NODUPS, "NODUPS"); + X(B_RDONLY, "RDONLY"); + X(R_RECNO, "RECNO"); + X(B_SEQINIT, "SEQINIT"); + X(B_METADIRTY,"METADIRTY"); + (void)fprintf(stderr, ")\n"); + } +#undef X + + for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) { + __bt_dpage(h); + (void)mpool_put(t->bt_mp, h, 0); + } +} + +/* + * BT_DMPAGE -- Dump the meta page + * + * Parameters: + * h: pointer to the PAGE + */ +void +__bt_dmpage(h) + PAGE *h; +{ + BTMETA *m; + char *sep; + + m = (BTMETA *)h; + (void)fprintf(stderr, "magic %lx\n", m->m_magic); + (void)fprintf(stderr, "version %lu\n", m->m_version); + (void)fprintf(stderr, "psize %lu\n", m->m_psize); + (void)fprintf(stderr, "free %lu\n", m->m_free); + (void)fprintf(stderr, "nrecs %lu\n", m->m_nrecs); + (void)fprintf(stderr, "flags %lu", m->m_flags); +#undef X +#define X(flag, name) \ + if (m->m_flags & flag) { \ + (void)fprintf(stderr, "%s%s", sep, name); \ + sep = ", "; \ + } + if (m->m_flags) { + sep = " ("; + X(B_NODUPS, "NODUPS"); + X(R_RECNO, "RECNO"); + (void)fprintf(stderr, ")"); + } +} + +/* + * BT_DNPAGE -- Dump the page + * + * Parameters: + * n: page number to dump. + */ +void +__bt_dnpage(dbp, pgno) + DB *dbp; + pgno_t pgno; +{ + BTREE *t; + PAGE *h; + + t = dbp->internal; + if ((h = mpool_get(t->bt_mp, pgno, 0)) != NULL) { + __bt_dpage(h); + (void)mpool_put(t->bt_mp, h, 0); + } +} + +/* + * BT_DPAGE -- Dump the page + * + * Parameters: + * h: pointer to the PAGE + */ +void +__bt_dpage(h) + PAGE *h; +{ + BINTERNAL *bi; + BLEAF *bl; + RINTERNAL *ri; + RLEAF *rl; + indx_t cur, top; + char *sep; + + (void)fprintf(stderr, " page %d: (", h->pgno); +#undef X +#define X(flag, name) \ + if (h->flags & flag) { \ + (void)fprintf(stderr, "%s%s", sep, name); \ + sep = ", "; \ + } + sep = ""; + X(P_BINTERNAL, "BINTERNAL") /* types */ + X(P_BLEAF, "BLEAF") + X(P_RINTERNAL, "RINTERNAL") /* types */ + X(P_RLEAF, "RLEAF") + X(P_OVERFLOW, "OVERFLOW") + X(P_PRESERVE, "PRESERVE"); + (void)fprintf(stderr, ")\n"); +#undef X + + (void)fprintf(stderr, "\tprev %2d next %2d", h->prevpg, h->nextpg); + if (h->flags & P_OVERFLOW) + return; + + top = NEXTINDEX(h); + (void)fprintf(stderr, " lower %3d upper %3d nextind %d\n", + h->lower, h->upper, top); + for (cur = 0; cur < top; cur++) { + (void)fprintf(stderr, "\t[%03d] %4d ", cur, h->linp[cur]); + switch(h->flags & P_TYPE) { + case P_BINTERNAL: + bi = GETBINTERNAL(h, cur); + (void)fprintf(stderr, + "size %03d pgno %03d", bi->ksize, bi->pgno); + if (bi->flags & P_BIGKEY) + (void)fprintf(stderr, " (indirect)"); + else if (bi->ksize) + (void)fprintf(stderr, + " {%.*s}", (int)bi->ksize, bi->bytes); + break; + case P_RINTERNAL: + ri = GETRINTERNAL(h, cur); + (void)fprintf(stderr, "entries %03d pgno %03d", + ri->nrecs, ri->pgno); + break; + case P_BLEAF: + bl = GETBLEAF(h, cur); + if (bl->flags & P_BIGKEY) + (void)fprintf(stderr, + "big key page %lu size %u/", + *(pgno_t *)bl->bytes, + *(size_t *)(bl->bytes + sizeof(pgno_t))); + else if (bl->ksize) + (void)fprintf(stderr, "%s/", bl->bytes); + if (bl->flags & P_BIGDATA) + (void)fprintf(stderr, + "big data page %lu size %u", + *(pgno_t *)(bl->bytes + bl->ksize), + *(size_t *)(bl->bytes + bl->ksize + + sizeof(pgno_t))); + else if (bl->dsize) + (void)fprintf(stderr, "%.*s", + (int)bl->dsize, bl->bytes + bl->ksize); + break; + case P_RLEAF: + rl = GETRLEAF(h, cur); + if (rl->flags & P_BIGDATA) + (void)fprintf(stderr, + "big data page %lu size %u", + *(pgno_t *)rl->bytes, + *(size_t *)(rl->bytes + sizeof(pgno_t))); + else if (rl->dsize) + (void)fprintf(stderr, + "%.*s", (int)rl->dsize, rl->bytes); + break; + } + (void)fprintf(stderr, "\n"); + } +} +#endif + +#ifdef STATISTICS +/* + * BT_STAT -- Gather/print the tree statistics + * + * Parameters: + * dbp: pointer to the DB + */ +void +__bt_stat(dbp) + DB *dbp; +{ + extern u_long bt_cache_hit, bt_cache_miss, bt_pfxsaved, bt_rootsplit; + extern u_long bt_sortsplit, bt_split; + BTREE *t; + PAGE *h; + pgno_t i, pcont, pinternal, pleaf; + u_long ifree, lfree, nkeys; + int levels; + + t = dbp->internal; + pcont = pinternal = pleaf = 0; + nkeys = ifree = lfree = 0; + for (i = P_ROOT; (h = mpool_get(t->bt_mp, i, 0)) != NULL; ++i) { + switch(h->flags & P_TYPE) { + case P_BINTERNAL: + case P_RINTERNAL: + ++pinternal; + ifree += h->upper - h->lower; + break; + case P_BLEAF: + case P_RLEAF: + ++pleaf; + lfree += h->upper - h->lower; + nkeys += NEXTINDEX(h); + break; + case P_OVERFLOW: + ++pcont; + break; + } + (void)mpool_put(t->bt_mp, h, 0); + } + + /* Count the levels of the tree. */ + for (i = P_ROOT, levels = 0 ;; ++levels) { + h = mpool_get(t->bt_mp, i, 0); + if (h->flags & (P_BLEAF|P_RLEAF)) { + if (levels == 0) + levels = 1; + (void)mpool_put(t->bt_mp, h, 0); + break; + } + i = ISSET(t, R_RECNO) ? + GETRINTERNAL(h, 0)->pgno : + GETBINTERNAL(h, 0)->pgno; + (void)mpool_put(t->bt_mp, h, 0); + } + + (void)fprintf(stderr, "%d level%s with %ld keys", + levels, levels == 1 ? "" : "s", nkeys); + if (ISSET(t, R_RECNO)) + (void)fprintf(stderr, " (%ld header count)", t->bt_nrecs); + (void)fprintf(stderr, + "\n%lu pages (leaf %ld, internal %ld, overflow %ld)\n", + pinternal + pleaf + pcont, pleaf, pinternal, pcont); + (void)fprintf(stderr, "%ld cache hits, %ld cache misses\n", + bt_cache_hit, bt_cache_miss); + (void)fprintf(stderr, "%ld splits (%ld root splits, %ld sort splits)\n", + bt_split, bt_rootsplit, bt_sortsplit); + pleaf *= t->bt_psize - BTDATAOFF; + if (pleaf) + (void)fprintf(stderr, + "%.0f%% leaf fill (%ld bytes used, %ld bytes free)\n", + ((double)(pleaf - lfree) / pleaf) * 100, + pleaf - lfree, lfree); + pinternal *= t->bt_psize - BTDATAOFF; + if (pinternal) + (void)fprintf(stderr, + "%.0f%% internal fill (%ld bytes used, %ld bytes free\n", + ((double)(pinternal - ifree) / pinternal) * 100, + pinternal - ifree, ifree); + if (bt_pfxsaved) + (void)fprintf(stderr, "prefix checking removed %lu bytes.\n", + bt_pfxsaved); +} +#endif diff --git a/lib/libc/db/btree/bt_delete.c b/lib/libc/db/btree/bt_delete.c new file mode 100644 index 0000000..5dba534 --- /dev/null +++ b/lib/libc/db/btree/bt_delete.c @@ -0,0 +1,324 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_delete.c 8.3 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <string.h> + +#include <db.h> +#include "btree.h" + +static int bt_bdelete __P((BTREE *, const DBT *)); + +/* + * __BT_DELETE -- Delete the item(s) referenced by a key. + * + * Parameters: + * dbp: pointer to access method + * key: key to delete + * flags: R_CURSOR if deleting what the cursor references + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + */ +int +__bt_delete(dbp, key, flags) + const DB *dbp; + const DBT *key; + u_int flags; +{ + BTREE *t; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + if (ISSET(t, B_RDONLY)) { + errno = EPERM; + return (RET_ERROR); + } + + switch(flags) { + case 0: + status = bt_bdelete(t, key); + break; + case R_CURSOR: + /* + * If flags is R_CURSOR, delete the cursor; must already have + * started a scan and not have already deleted the record. For + * the delete cursor bit to have been set requires that the + * scan be initialized, so no reason to check. + */ + if (!ISSET(t, B_SEQINIT)) + goto einval; + status = ISSET(t, B_DELCRSR) ? + RET_SPECIAL : __bt_crsrdel(t, &t->bt_bcursor); + break; + default: +einval: errno = EINVAL; + return (RET_ERROR); + } + if (status == RET_SUCCESS) + SET(t, B_MODIFIED); + return (status); +} + +/* + * BT_BDELETE -- Delete all key/data pairs matching the specified key. + * + * Parameters: + * tree: tree + * key: key to delete + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + */ +static int +bt_bdelete(t, key) + BTREE *t; + const DBT *key; +{ + EPG *e, save; + PAGE *h; + pgno_t cpgno, pg; + indx_t cindex; + int deleted, dirty1, dirty2, exact; + + /* Find any matching record; __bt_search pins the page. */ + if ((e = __bt_search(t, key, &exact)) == NULL) + return (RET_ERROR); + if (!exact) { + mpool_put(t->bt_mp, e->page, 0); + return (RET_SPECIAL); + } + + /* + * Delete forward, then delete backward, from the found key. The + * ordering is so that the deletions don't mess up the page refs. + * The first loop deletes the key from the original page, the second + * unpins the original page. In the first loop, dirty1 is set if + * the original page is modified, and dirty2 is set if any subsequent + * pages are modified. In the second loop, dirty1 starts off set if + * the original page has been modified, and is set if any subsequent + * pages are modified. + * + * If find the key referenced by the cursor, don't delete it, just + * flag it for future deletion. The cursor page number is P_INVALID + * unless the sequential scan is initialized, so no reason to check. + * A special case is when the already deleted cursor record was the + * only record found. If so, then the delete opertion fails as no + * records were deleted. + * + * Cycle in place in the current page until the current record doesn't + * match the key or the page is empty. If the latter, walk forward, + * skipping empty pages and repeating until a record doesn't match + * the key or the end of the tree is reached. + */ + cpgno = t->bt_bcursor.pgno; + cindex = t->bt_bcursor.index; + save = *e; + dirty1 = 0; + for (h = e->page, deleted = 0;;) { + dirty2 = 0; + do { + if (h->pgno == cpgno && e->index == cindex) { + if (!ISSET(t, B_DELCRSR)) { + SET(t, B_DELCRSR); + deleted = 1; + } + ++e->index; + } else { + if (__bt_dleaf(t, h, e->index)) { + if (h->pgno != save.page->pgno) + mpool_put(t->bt_mp, h, dirty2); + mpool_put(t->bt_mp, save.page, dirty1); + return (RET_ERROR); + } + if (h->pgno == save.page->pgno) + dirty1 = MPOOL_DIRTY; + else + dirty2 = MPOOL_DIRTY; + deleted = 1; + } + } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0); + + /* + * Quit if didn't find a match, no next page, or first key on + * the next page doesn't match. Don't unpin the original page + * unless an error occurs. + */ + if (e->index < NEXTINDEX(h)) + break; + for (;;) { + if ((pg = h->nextpg) == P_INVALID) + goto done1; + if (h->pgno != save.page->pgno) + mpool_put(t->bt_mp, h, dirty2); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { + mpool_put(t->bt_mp, save.page, dirty1); + return (RET_ERROR); + } + if (NEXTINDEX(h) != 0) { + e->page = h; + e->index = 0; + break; + } + } + + if (__bt_cmp(t, key, e) != 0) + break; + } + + /* + * Reach here with the original page and the last page referenced + * pinned (they may be the same). Release it if not the original. + */ +done1: if (h->pgno != save.page->pgno) + mpool_put(t->bt_mp, h, dirty2); + + /* + * Walk backwards from the record previous to the record returned by + * __bt_search, skipping empty pages, until a record doesn't match + * the key or reach the beginning of the tree. + */ + *e = save; + for (;;) { + if (e->index) + --e->index; + for (h = e->page; e->index; --e->index) { + if (__bt_cmp(t, key, e) != 0) + goto done2; + if (h->pgno == cpgno && e->index == cindex) { + if (!ISSET(t, B_DELCRSR)) { + SET(t, B_DELCRSR); + deleted = 1; + } + } else { + if (__bt_dleaf(t, h, e->index) == RET_ERROR) { + mpool_put(t->bt_mp, h, dirty1); + return (RET_ERROR); + } + if (h->pgno == save.page->pgno) + dirty1 = MPOOL_DIRTY; + deleted = 1; + } + } + + if ((pg = h->prevpg) == P_INVALID) + goto done2; + mpool_put(t->bt_mp, h, dirty1); + dirty1 = 0; + if ((e->page = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + e->index = NEXTINDEX(e->page); + } + + /* + * Reach here with the last page that was looked at pinned. Release + * it. + */ +done2: mpool_put(t->bt_mp, h, dirty1); + return (deleted ? RET_SUCCESS : RET_SPECIAL); +} + +/* + * __BT_DLEAF -- Delete a single record from a leaf page. + * + * Parameters: + * t: tree + * index: index on current page to delete + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__bt_dleaf(t, h, index) + BTREE *t; + PAGE *h; + indx_t index; +{ + register BLEAF *bl; + register indx_t cnt, *ip, offset; + register size_t nbytes; + char *from; + void *to; + + /* + * Delete a record from a btree leaf page. Internal records are never + * deleted from internal pages, regardless of the records that caused + * them to be added being deleted. Pages made empty by deletion are + * not reclaimed. They are, however, made available for reuse. + * + * Pack the remaining entries at the end of the page, shift the indices + * down, overwriting the deleted record and its index. If the record + * uses overflow pages, make them available for reuse. + */ + to = bl = GETBLEAF(h, index); + if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR) + return (RET_ERROR); + if (bl->flags & P_BIGDATA && + __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR) + return (RET_ERROR); + nbytes = NBLEAF(bl); + + /* + * Compress the key/data pairs. Compress and adjust the [BR]LEAF + * offsets. Reset the headers. + */ + from = (char *)h + h->upper; + memmove(from + nbytes, from, (char *)to - from); + h->upper += nbytes; + + offset = h->linp[index]; + for (cnt = index, ip = &h->linp[0]; cnt--; ++ip) + if (ip[0] < offset) + ip[0] += nbytes; + for (cnt = NEXTINDEX(h) - index; --cnt; ++ip) + ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; + h->lower -= sizeof(indx_t); + return (RET_SUCCESS); +} diff --git a/lib/libc/db/btree/bt_get.c b/lib/libc/db/btree/bt_get.c new file mode 100644 index 0000000..28b2d60 --- /dev/null +++ b/lib/libc/db/btree/bt_get.c @@ -0,0 +1,238 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_get.c 8.2 (Berkeley) 9/7/93"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> + +#include <db.h> +#include "btree.h" + +/* + * __BT_GET -- Get a record from the btree. + * + * Parameters: + * dbp: pointer to access method + * key: key to find + * data: data to return + * flag: currently unused + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + */ +int +__bt_get(dbp, key, data, flags) + const DB *dbp; + const DBT *key; + DBT *data; + u_int flags; +{ + BTREE *t; + EPG *e; + int exact, status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* Get currently doesn't take any flags. */ + if (flags) { + errno = EINVAL; + return (RET_ERROR); + } + + if ((e = __bt_search(t, key, &exact)) == NULL) + return (RET_ERROR); + if (!exact) { + mpool_put(t->bt_mp, e->page, 0); + return (RET_SPECIAL); + } + + /* + * A special case is if we found the record but it's flagged for + * deletion. In this case, we want to find another record with the + * same key, if it exists. Rather than look around the tree we call + * __bt_first and have it redo the search, as __bt_first will not + * return keys marked for deletion. Slow, but should never happen. + */ + if (ISSET(t, B_DELCRSR) && e->page->pgno == t->bt_bcursor.pgno && + e->index == t->bt_bcursor.index) { + mpool_put(t->bt_mp, e->page, 0); + if ((e = __bt_first(t, key, &exact)) == NULL) + return (RET_ERROR); + if (!exact) + return (RET_SPECIAL); + } + + status = __bt_ret(t, e, NULL, data); + /* + * If the user is doing concurrent access, we copied the + * key/data, toss the page. + */ + if (ISSET(t, B_DB_LOCK)) + mpool_put(t->bt_mp, e->page, 0); + else + t->bt_pinned = e->page; + return (status); +} + +/* + * __BT_FIRST -- Find the first entry. + * + * Parameters: + * t: the tree + * key: the key + * + * Returns: + * The first entry in the tree greater than or equal to key. + */ +EPG * +__bt_first(t, key, exactp) + BTREE *t; + const DBT *key; + int *exactp; +{ + register PAGE *h; + register EPG *e; + EPG save; + pgno_t cpgno, pg; + indx_t cindex; + int found; + + /* + * Find any matching record; __bt_search pins the page. Only exact + * matches are tricky, otherwise just return the location of the key + * if it were to be inserted into the tree. + */ + if ((e = __bt_search(t, key, exactp)) == NULL) + return (NULL); + if (!*exactp) + return (e); + + if (ISSET(t, B_DELCRSR)) { + cpgno = t->bt_bcursor.pgno; + cindex = t->bt_bcursor.index; + } else { + cpgno = P_INVALID; + cindex = 0; /* GCC thinks it's uninitialized. */ + } + + /* + * Walk backwards, skipping empty pages, as long as the entry matches + * and there are keys left in the tree. Save a copy of each match in + * case we go too far. A special case is that we don't return a match + * on records that the cursor references that have already been flagged + * for deletion. + */ + save = *e; + h = e->page; + found = 0; + do { + if (cpgno != h->pgno || cindex != e->index) { + if (save.page->pgno != e->page->pgno) { + mpool_put(t->bt_mp, save.page, 0); + save = *e; + } else + save.index = e->index; + found = 1; + } + /* + * Make a special effort not to unpin the page the last (or + * original) match was on, but also make sure it's unpinned + * if an error occurs. + */ + while (e->index == 0) { + if (h->prevpg == P_INVALID) + goto done1; + if (h->pgno != save.page->pgno) + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL) { + if (h->pgno == save.page->pgno) + mpool_put(t->bt_mp, save.page, 0); + return (NULL); + } + e->page = h; + e->index = NEXTINDEX(h); + } + --e->index; + } while (__bt_cmp(t, key, e) == 0); + + /* + * Reach here with the last page that was looked at pinned, which may + * or may not be the same as the last (or original) match page. If + * it's not useful, release it. + */ +done1: if (h->pgno != save.page->pgno) + mpool_put(t->bt_mp, h, 0); + + /* + * If still haven't found a record, the only possibility left is the + * next one. Move forward one slot, skipping empty pages and check. + */ + if (!found) { + h = save.page; + if (++save.index == NEXTINDEX(h)) { + do { + pg = h->nextpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) { + *exactp = 0; + return (e); + } + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (NULL); + } while ((save.index = NEXTINDEX(h)) == 0); + save.page = h; + } + if (__bt_cmp(t, key, &save) != 0) { + *exactp = 0; + return (e); + } + } + *e = save; + *exactp = 1; + return (e); +} diff --git a/lib/libc/db/btree/bt_open.c b/lib/libc/db/btree/bt_open.c new file mode 100644 index 0000000..ec79cf7 --- /dev/null +++ b/lib/libc/db/btree/bt_open.c @@ -0,0 +1,440 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_open.c 8.5 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +/* + * Implementation of btree access method for 4.4BSD. + * + * The design here was originally based on that of the btree access method + * used in the Postgres database system at UC Berkeley. This implementation + * is wholly independent of the Postgres code. + */ + +#include <sys/param.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <db.h> +#include "btree.h" + +static int byteorder __P((void)); +static int nroot __P((BTREE *)); +static int tmp __P((void)); + +/* + * __BT_OPEN -- Open a btree. + * + * Creates and fills a DB struct, and calls the routine that actually + * opens the btree. + * + * Parameters: + * fname: filename (NULL for in-memory trees) + * flags: open flag bits + * mode: open permission bits + * b: BTREEINFO pointer + * + * Returns: + * NULL on failure, pointer to DB on success. + * + */ +DB * +__bt_open(fname, flags, mode, openinfo, dflags) + const char *fname; + int flags, mode, dflags; + const BTREEINFO *openinfo; +{ + struct stat sb; + BTMETA m; + BTREE *t; + BTREEINFO b; + DB *dbp; + pgno_t ncache; + ssize_t nr; + int machine_lorder; + + t = NULL; + + /* + * Intention is to make sure all of the user's selections are okay + * here and then use them without checking. Can't be complete, since + * we don't know the right page size, lorder or flags until the backing + * file is opened. Also, the file's page size can cause the cachesize + * to change. + */ + machine_lorder = byteorder(); + if (openinfo) { + b = *openinfo; + + /* Flags: R_DUP. */ + if (b.flags & ~(R_DUP)) + goto einval; + + /* + * Page size must be indx_t aligned and >= MINPSIZE. Default + * page size is set farther on, based on the underlying file + * transfer size. + */ + if (b.psize && + (b.psize < MINPSIZE || b.psize > MAX_PAGE_OFFSET + 1 || + b.psize & sizeof(indx_t) - 1)) + goto einval; + + /* Minimum number of keys per page; absolute minimum is 2. */ + if (b.minkeypage) { + if (b.minkeypage < 2) + goto einval; + } else + b.minkeypage = DEFMINKEYPAGE; + + /* If no comparison, use default comparison and prefix. */ + if (b.compare == NULL) { + b.compare = __bt_defcmp; + if (b.prefix == NULL) + b.prefix = __bt_defpfx; + } + + if (b.lorder == 0) + b.lorder = machine_lorder; + } else { + b.compare = __bt_defcmp; + b.cachesize = 0; + b.flags = 0; + b.lorder = machine_lorder; + b.minkeypage = DEFMINKEYPAGE; + b.prefix = __bt_defpfx; + b.psize = 0; + } + + /* Check for the ubiquitous PDP-11. */ + if (b.lorder != BIG_ENDIAN && b.lorder != LITTLE_ENDIAN) + goto einval; + + /* Allocate and initialize DB and BTREE structures. */ + if ((t = (BTREE *)malloc(sizeof(BTREE))) == NULL) + goto err; + memset(t, 0, sizeof(BTREE)); + t->bt_bcursor.pgno = P_INVALID; + t->bt_fd = -1; /* Don't close unopened fd on error. */ + t->bt_lorder = b.lorder; + t->bt_order = NOT; + t->bt_cmp = b.compare; + t->bt_pfx = b.prefix; + t->bt_rfd = -1; + + if ((t->bt_dbp = dbp = (DB *)malloc(sizeof(DB))) == NULL) + goto err; + t->bt_flags = 0; + if (t->bt_lorder != machine_lorder) + SET(t, B_NEEDSWAP); + + dbp->type = DB_BTREE; + dbp->internal = t; + dbp->close = __bt_close; + dbp->del = __bt_delete; + dbp->fd = __bt_fd; + dbp->get = __bt_get; + dbp->put = __bt_put; + dbp->seq = __bt_seq; + dbp->sync = __bt_sync; + + /* + * If no file name was supplied, this is an in-memory btree and we + * open a backing temporary file. Otherwise, it's a disk-based tree. + */ + if (fname) { + switch(flags & O_ACCMODE) { + case O_RDONLY: + SET(t, B_RDONLY); + break; + case O_RDWR: + break; + case O_WRONLY: + default: + goto einval; + } + + if ((t->bt_fd = open(fname, flags, mode)) < 0) + goto err; + + } else { + if ((flags & O_ACCMODE) != O_RDWR) + goto einval; + if ((t->bt_fd = tmp()) == -1) + goto err; + SET(t, B_INMEM); + } + + if (fcntl(t->bt_fd, F_SETFD, 1) == -1) + goto err; + + if (fstat(t->bt_fd, &sb)) + goto err; + if (sb.st_size) { + nr = read(t->bt_fd, &m, sizeof(BTMETA)); + if (nr < 0) + goto err; + if (nr != sizeof(BTMETA)) + goto eftype; + + /* + * Read in the meta-data. This can change the notion of what + * the lorder, page size and flags are, and, when the page size + * changes, the cachesize value can change too. If the user + * specified the wrong byte order for an existing database, we + * don't bother to return an error, we just clear the NEEDSWAP + * bit. + */ + if (m.m_magic == BTREEMAGIC) + CLR(t, B_NEEDSWAP); + else { + SET(t, B_NEEDSWAP); + M_32_SWAP(m.m_magic); + M_32_SWAP(m.m_version); + M_32_SWAP(m.m_psize); + M_32_SWAP(m.m_free); + M_32_SWAP(m.m_nrecs); + M_32_SWAP(m.m_flags); + } + if (m.m_magic != BTREEMAGIC || m.m_version != BTREEVERSION) + goto eftype; + if (m.m_psize < MINPSIZE || m.m_psize > MAX_PAGE_OFFSET + 1 || + m.m_psize & sizeof(indx_t) - 1) + goto eftype; + if (m.m_flags & ~SAVEMETA) + goto eftype; + b.psize = m.m_psize; + t->bt_flags |= m.m_flags; + t->bt_free = m.m_free; + t->bt_nrecs = m.m_nrecs; + } else { + /* + * Set the page size to the best value for I/O to this file. + * Don't overflow the page offset type. + */ + if (b.psize == 0) { + b.psize = sb.st_blksize; + if (b.psize < MINPSIZE) + b.psize = MINPSIZE; + if (b.psize > MAX_PAGE_OFFSET + 1) + b.psize = MAX_PAGE_OFFSET + 1; + } + + /* Set flag if duplicates permitted. */ + if (!(b.flags & R_DUP)) + SET(t, B_NODUPS); + + t->bt_free = P_INVALID; + t->bt_nrecs = 0; + SET(t, B_METADIRTY); + } + + t->bt_psize = b.psize; + + /* Set the cache size; must be a multiple of the page size. */ + if (b.cachesize && b.cachesize & b.psize - 1) + b.cachesize += (~b.cachesize & b.psize - 1) + 1; + if (b.cachesize < b.psize * MINCACHE) + b.cachesize = b.psize * MINCACHE; + + /* Calculate number of pages to cache. */ + ncache = (b.cachesize + t->bt_psize - 1) / t->bt_psize; + + /* + * The btree data structure requires that at least two keys can fit on + * a page, but other than that there's no fixed requirement. The user + * specified a minimum number per page, and we translated that into the + * number of bytes a key/data pair can use before being placed on an + * overflow page. This calculation includes the page header, the size + * of the index referencing the leaf item and the size of the leaf item + * structure. Also, don't let the user specify a minkeypage such that + * a key/data pair won't fit even if both key and data are on overflow + * pages. + */ + t->bt_ovflsize = (t->bt_psize - BTDATAOFF) / b.minkeypage - + (sizeof(indx_t) + NBLEAFDBT(0, 0)); + if (t->bt_ovflsize < NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t)) + t->bt_ovflsize = + NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t); + + /* Initialize the buffer pool. */ + if ((t->bt_mp = + mpool_open(NULL, t->bt_fd, t->bt_psize, ncache)) == NULL) + goto err; + if (!ISSET(t, B_INMEM)) + mpool_filter(t->bt_mp, __bt_pgin, __bt_pgout, t); + + /* Create a root page if new tree. */ + if (nroot(t) == RET_ERROR) + goto err; + + /* Global flags. */ + if (dflags & DB_LOCK) + SET(t, B_DB_LOCK); + if (dflags & DB_SHMEM) + SET(t, B_DB_SHMEM); + if (dflags & DB_TXN) + SET(t, B_DB_TXN); + + return (dbp); + +einval: errno = EINVAL; + goto err; + +eftype: errno = EFTYPE; + goto err; + +err: if (t) { + if (t->bt_dbp) + free(t->bt_dbp); + if (t->bt_fd != -1) + (void)close(t->bt_fd); + free(t); + } + return (NULL); +} + +/* + * NROOT -- Create the root of a new tree. + * + * Parameters: + * t: tree + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +static int +nroot(t) + BTREE *t; +{ + PAGE *meta, *root; + pgno_t npg; + + if ((meta = mpool_get(t->bt_mp, 0, 0)) != NULL) { + mpool_put(t->bt_mp, meta, 0); + return (RET_SUCCESS); + } + if (errno != EINVAL) + return (RET_ERROR); + + if ((meta = mpool_new(t->bt_mp, &npg)) == NULL) + return (RET_ERROR); + + if ((root = mpool_new(t->bt_mp, &npg)) == NULL) + return (RET_ERROR); + + if (npg != P_ROOT) + return (RET_ERROR); + root->pgno = npg; + root->prevpg = root->nextpg = P_INVALID; + root->lower = BTDATAOFF; + root->upper = t->bt_psize; + root->flags = P_BLEAF; + memset(meta, 0, t->bt_psize); + mpool_put(t->bt_mp, meta, MPOOL_DIRTY); + mpool_put(t->bt_mp, root, MPOOL_DIRTY); + return (RET_SUCCESS); +} + +static int +tmp() +{ + sigset_t set, oset; + int fd; + char *envtmp; + char path[MAXPATHLEN]; + + envtmp = getenv("TMPDIR"); + (void)snprintf(path, + sizeof(path), "%s/bt.XXXXXX", envtmp ? envtmp : "/tmp"); + + (void)sigfillset(&set); + (void)sigprocmask(SIG_BLOCK, &set, &oset); + if ((fd = mkstemp(path)) != -1) + (void)unlink(path); + (void)sigprocmask(SIG_SETMASK, &oset, NULL); + return(fd); +} + +static int +byteorder() +{ + u_int32_t x; + u_char *p; + + x = 0x01020304; + p = (u_char *)&x; + switch (*p) { + case 1: + return (BIG_ENDIAN); + case 4: + return (LITTLE_ENDIAN); + default: + return (0); + } +} + +int +__bt_fd(dbp) + const DB *dbp; +{ + BTREE *t; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* In-memory database can't have a file descriptor. */ + if (ISSET(t, B_INMEM)) { + errno = ENOENT; + return (-1); + } + return (t->bt_fd); +} diff --git a/lib/libc/db/btree/bt_overflow.c b/lib/libc/db/btree/bt_overflow.c new file mode 100644 index 0000000..0057a03 --- /dev/null +++ b/lib/libc/db/btree/bt_overflow.c @@ -0,0 +1,224 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_overflow.c 8.2 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <db.h> +#include "btree.h" + +/* + * Big key/data code. + * + * Big key and data entries are stored on linked lists of pages. The initial + * reference is byte string stored with the key or data and is the page number + * and size. The actual record is stored in a chain of pages linked by the + * nextpg field of the PAGE header. + * + * The first page of the chain has a special property. If the record is used + * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set + * in the header. + * + * XXX + * A single DBT is written to each chain, so a lot of space on the last page + * is wasted. This is a fairly major bug for some data sets. + */ + +/* + * __OVFL_GET -- Get an overflow key/data item. + * + * Parameters: + * t: tree + * p: pointer to { pgno_t, size_t } + * buf: storage address + * bufsz: storage size + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__ovfl_get(t, p, ssz, buf, bufsz) + BTREE *t; + void *p; + size_t *ssz; + char **buf; + size_t *bufsz; +{ + PAGE *h; + pgno_t pg; + size_t nb, plen, sz; + + memmove(&pg, p, sizeof(pgno_t)); + memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(size_t)); + *ssz = sz; + +#ifdef DEBUG + if (pg == P_INVALID || sz == 0) + abort(); +#endif + /* Make the buffer bigger as necessary. */ + if (*bufsz < sz) { + if ((*buf = (char *)realloc(*buf, sz)) == NULL) + return (RET_ERROR); + *bufsz = sz; + } + + /* + * Step through the linked list of pages, copying the data on each one + * into the buffer. Never copy more than the data's length. + */ + plen = t->bt_psize - BTDATAOFF; + for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + + nb = MIN(sz, plen); + memmove(p, (char *)h + BTDATAOFF, nb); + mpool_put(t->bt_mp, h, 0); + + if ((sz -= nb) == 0) + break; + } + return (RET_SUCCESS); +} + +/* + * __OVFL_PUT -- Store an overflow key/data item. + * + * Parameters: + * t: tree + * data: DBT to store + * pgno: storage page number + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__ovfl_put(t, dbt, pg) + BTREE *t; + const DBT *dbt; + pgno_t *pg; +{ + PAGE *h, *last; + void *p; + pgno_t npg; + size_t nb, plen, sz; + + /* + * Allocate pages and copy the key/data record into them. Store the + * number of the first page in the chain. + */ + plen = t->bt_psize - BTDATAOFF; + for (last = NULL, p = dbt->data, sz = dbt->size;; + p = (char *)p + plen, last = h) { + if ((h = __bt_new(t, &npg)) == NULL) + return (RET_ERROR); + + h->pgno = npg; + h->nextpg = h->prevpg = P_INVALID; + h->flags = P_OVERFLOW; + h->lower = h->upper = 0; + + nb = MIN(sz, plen); + memmove((char *)h + BTDATAOFF, p, nb); + + if (last) { + last->nextpg = h->pgno; + mpool_put(t->bt_mp, last, MPOOL_DIRTY); + } else + *pg = h->pgno; + + if ((sz -= nb) == 0) { + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + break; + } + } + return (RET_SUCCESS); +} + +/* + * __OVFL_DELETE -- Delete an overflow chain. + * + * Parameters: + * t: tree + * p: pointer to { pgno_t, size_t } + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__ovfl_delete(t, p) + BTREE *t; + void *p; +{ + PAGE *h; + pgno_t pg; + size_t plen, sz; + + memmove(&pg, p, sizeof(pgno_t)); + memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(size_t)); + +#ifdef DEBUG + if (pg == P_INVALID || sz == 0) + abort(); +#endif + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + + /* Don't delete chains used by internal pages. */ + if (h->flags & P_PRESERVE) { + mpool_put(t->bt_mp, h, 0); + return (RET_SUCCESS); + } + + /* Step through the chain, calling the free routine for each page. */ + for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) { + pg = h->nextpg; + __bt_free(t, h); + if (sz <= plen) + break; + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + } + return (RET_SUCCESS); +} diff --git a/lib/libc/db/btree/bt_page.c b/lib/libc/db/btree/bt_page.c new file mode 100644 index 0000000..f71a40d --- /dev/null +++ b/lib/libc/db/btree/bt_page.c @@ -0,0 +1,93 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_page.c 8.2 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <stdio.h> + +#include <db.h> +#include "btree.h" + +/* + * __BT_FREE -- Put a page on the freelist. + * + * Parameters: + * t: tree + * h: page to free + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__bt_free(t, h) + BTREE *t; + PAGE *h; +{ + /* Insert the page at the start of the free list. */ + h->prevpg = P_INVALID; + h->nextpg = t->bt_free; + t->bt_free = h->pgno; + + /* Make sure the page gets written back. */ + return (mpool_put(t->bt_mp, h, MPOOL_DIRTY)); +} + +/* + * __BT_NEW -- Get a new page, preferably from the freelist. + * + * Parameters: + * t: tree + * npg: storage for page number. + * + * Returns: + * Pointer to a page, NULL on error. + */ +PAGE * +__bt_new(t, npg) + BTREE *t; + pgno_t *npg; +{ + PAGE *h; + + if (t->bt_free != P_INVALID && + (h = mpool_get(t->bt_mp, t->bt_free, 0)) != NULL) { + *npg = t->bt_free; + t->bt_free = h->nextpg; + return (h); + } + return (mpool_new(t->bt_mp, npg)); +} diff --git a/lib/libc/db/btree/bt_put.c b/lib/libc/db/btree/bt_put.c new file mode 100644 index 0000000..11a211b --- /dev/null +++ b/lib/libc/db/btree/bt_put.c @@ -0,0 +1,318 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_put.c 8.3 (Berkeley) 9/16/93"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <db.h> +#include "btree.h" + +static EPG *bt_fast __P((BTREE *, const DBT *, const DBT *, int *)); + +/* + * __BT_PUT -- Add a btree item to the tree. + * + * Parameters: + * dbp: pointer to access method + * key: key + * data: data + * flag: R_NOOVERWRITE + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the + * tree and R_NOOVERWRITE specified. + */ +int +__bt_put(dbp, key, data, flags) + const DB *dbp; + DBT *key; + const DBT *data; + u_int flags; +{ + BTREE *t; + DBT tkey, tdata; + EPG *e; + PAGE *h; + indx_t index, nxtindex; + pgno_t pg; + size_t nbytes; + int dflags, exact, status; + char *dest, db[NOVFLSIZE], kb[NOVFLSIZE]; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + switch (flags) { + case R_CURSOR: + if (!ISSET(t, B_SEQINIT)) + goto einval; + if (ISSET(t, B_DELCRSR)) + goto einval; + break; + case 0: + case R_NOOVERWRITE: + break; + default: +einval: errno = EINVAL; + return (RET_ERROR); + } + + if (ISSET(t, B_RDONLY)) { + errno = EPERM; + return (RET_ERROR); + } + + /* + * If the key/data won't fit on a page, store it on indirect pages. + * Only store the key on the overflow page if it's too big after the + * data is on an overflow page. + * + * XXX + * If the insert fails later on, these pages aren't recovered. + */ + dflags = 0; + if (key->size + data->size > t->bt_ovflsize) { + if (key->size > t->bt_ovflsize) { +storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR) + return (RET_ERROR); + tkey.data = kb; + tkey.size = NOVFLSIZE; + memmove(kb, &pg, sizeof(pgno_t)); + memmove(kb + sizeof(pgno_t), + &key->size, sizeof(size_t)); + dflags |= P_BIGKEY; + key = &tkey; + } + if (key->size + data->size > t->bt_ovflsize) { + if (__ovfl_put(t, data, &pg) == RET_ERROR) + return (RET_ERROR); + tdata.data = db; + tdata.size = NOVFLSIZE; + memmove(db, &pg, sizeof(pgno_t)); + memmove(db + sizeof(pgno_t), + &data->size, sizeof(size_t)); + dflags |= P_BIGDATA; + data = &tdata; + } + if (key->size + data->size > t->bt_ovflsize) + goto storekey; + } + + /* Replace the cursor. */ + if (flags == R_CURSOR) { + if ((h = mpool_get(t->bt_mp, t->bt_bcursor.pgno, 0)) == NULL) + return (RET_ERROR); + index = t->bt_bcursor.index; + goto delete; + } + + /* + * Find the key to delete, or, the location at which to insert. Bt_fast + * and __bt_search pin the returned page. + */ + if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL) + if ((e = __bt_search(t, key, &exact)) == NULL) + return (RET_ERROR); + h = e->page; + index = e->index; + + /* + * Add the specified key/data pair to the tree. If an identical key + * is already in the tree, and R_NOOVERWRITE is set, an error is + * returned. If R_NOOVERWRITE is not set, the key is either added (if + * duplicates are permitted) or an error is returned. + * + * Pages are split as required. + */ + switch (flags) { + case R_NOOVERWRITE: + if (!exact) + break; + /* + * One special case is if the cursor references the record and + * it's been flagged for deletion. Then, we delete the record, + * leaving the cursor there -- this means that the inserted + * record will not be seen in a cursor scan. + */ + if (ISSET(t, B_DELCRSR) && t->bt_bcursor.pgno == h->pgno && + t->bt_bcursor.index == index) { + CLR(t, B_DELCRSR); + goto delete; + } + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + default: + if (!exact || !ISSET(t, B_NODUPS)) + break; +delete: if (__bt_dleaf(t, h, index) == RET_ERROR) { + mpool_put(t->bt_mp, h, 0); + return (RET_ERROR); + } + break; + } + + /* + * If not enough room, or the user has put a ceiling on the number of + * keys permitted in the page, split the page. The split code will + * insert the key and data and unpin the current page. If inserting + * into the offset array, shift the pointers up. + */ + nbytes = NBLEAFDBT(key->size, data->size); + if (h->upper - h->lower < nbytes + sizeof(indx_t)) { + if ((status = __bt_split(t, h, key, + data, dflags, nbytes, index)) != RET_SUCCESS) + return (status); + goto success; + } + + if (index < (nxtindex = NEXTINDEX(h))) + memmove(h->linp + index + 1, h->linp + index, + (nxtindex - index) * sizeof(indx_t)); + h->lower += sizeof(indx_t); + + h->linp[index] = h->upper -= nbytes; + dest = (char *)h + h->upper; + WR_BLEAF(dest, key, data, dflags); + + if (t->bt_order == NOT) + if (h->nextpg == P_INVALID) { + if (index == NEXTINDEX(h) - 1) { + t->bt_order = FORWARD; + t->bt_last.index = index; + t->bt_last.pgno = h->pgno; + } + } else if (h->prevpg == P_INVALID) { + if (index == 0) { + t->bt_order = BACK; + t->bt_last.index = 0; + t->bt_last.pgno = h->pgno; + } + } + + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + +success: + if (flags == R_SETCURSOR) { + t->bt_bcursor.pgno = e->page->pgno; + t->bt_bcursor.index = e->index; + } + SET(t, B_MODIFIED); + return (RET_SUCCESS); +} + +#ifdef STATISTICS +u_long bt_cache_hit, bt_cache_miss; +#endif + +/* + * BT_FAST -- Do a quick check for sorted data. + * + * Parameters: + * t: tree + * key: key to insert + * + * Returns: + * EPG for new record or NULL if not found. + */ +static EPG * +bt_fast(t, key, data, exactp) + BTREE *t; + const DBT *key, *data; + int *exactp; +{ + PAGE *h; + size_t nbytes; + int cmp; + + if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) { + t->bt_order = NOT; + return (NULL); + } + t->bt_cur.page = h; + t->bt_cur.index = t->bt_last.index; + + /* + * If won't fit in this page or have too many keys in this page, have + * to search to get split stack. + */ + nbytes = NBLEAFDBT(key->size, data->size); + if (h->upper - h->lower < nbytes + sizeof(indx_t)) + goto miss; + + if (t->bt_order == FORWARD) { + if (t->bt_cur.page->nextpg != P_INVALID) + goto miss; + if (t->bt_cur.index != NEXTINDEX(h) - 1) + goto miss; + if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0) + goto miss; + t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index; + } else { + if (t->bt_cur.page->prevpg != P_INVALID) + goto miss; + if (t->bt_cur.index != 0) + goto miss; + if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0) + goto miss; + t->bt_last.index = 0; + } + *exactp = cmp == 0; +#ifdef STATISTICS + ++bt_cache_hit; +#endif + return (&t->bt_cur); + +miss: +#ifdef STATISTICS + ++bt_cache_miss; +#endif + t->bt_order = NOT; + mpool_put(t->bt_mp, h, 0); + return (NULL); +} diff --git a/lib/libc/db/btree/bt_search.c b/lib/libc/db/btree/bt_search.c new file mode 100644 index 0000000..ff33421 --- /dev/null +++ b/lib/libc/db/btree/bt_search.c @@ -0,0 +1,235 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_search.c 8.6 (Berkeley) 3/15/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <stdio.h> + +#include <db.h> +#include "btree.h" + +static int bt_snext __P((BTREE *, PAGE *, const DBT *, int *)); +static int bt_sprev __P((BTREE *, PAGE *, const DBT *, int *)); + +/* + * __BT_SEARCH -- Search a btree for a key. + * + * Parameters: + * t: tree to search + * key: key to find + * exactp: pointer to exact match flag + * + * Returns: + * The EPG for matching record, if any, or the EPG for the location + * of the key, if it were inserted into the tree, is entered into + * the bt_cur field of the tree. A pointer to the field is returned. + */ +EPG * +__bt_search(t, key, exactp) + BTREE *t; + const DBT *key; + int *exactp; +{ + PAGE *h; + indx_t base, index, lim; + pgno_t pg; + int cmp; + + BT_CLR(t); + for (pg = P_ROOT;;) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (NULL); + + /* Do a binary search on the current page. */ + t->bt_cur.page = h; + for (base = 0, lim = NEXTINDEX(h); lim; lim >>= 1) { + t->bt_cur.index = index = base + (lim >> 1); + if ((cmp = __bt_cmp(t, key, &t->bt_cur)) == 0) { + if (h->flags & P_BLEAF) { + *exactp = 1; + return (&t->bt_cur); + } + goto next; + } + if (cmp > 0) { + base = index + 1; + --lim; + } + } + + /* + * If it's a leaf page, and duplicates aren't allowed, we're + * done. If duplicates are allowed, it's possible that there + * were duplicate keys on duplicate pages, and they were later + * deleted, so we could be on a page with no matches while + * there are matches on other pages. If we're at the start or + * end of a page, check on both sides. + */ + if (h->flags & P_BLEAF) { + t->bt_cur.index = base; + *exactp = 0; + if (!ISSET(t, B_NODUPS)) { + if (base == 0 && + bt_sprev(t, h, key, exactp)) + return (&t->bt_cur); + if (base == NEXTINDEX(h) && + bt_snext(t, h, key, exactp)) + return (&t->bt_cur); + } + return (&t->bt_cur); + } + + /* + * No match found. Base is the smallest index greater than + * key and may be zero or a last + 1 index. If it's non-zero, + * decrement by one, and record the internal page which should + * be a parent page for the key. If a split later occurs, the + * inserted page will be to the right of the saved page. + */ + index = base ? base - 1 : base; + +next: if (__bt_push(t, h->pgno, index) == RET_ERROR) + return (NULL); + pg = GETBINTERNAL(h, index)->pgno; + mpool_put(t->bt_mp, h, 0); + } +} + +/* + * BT_SNEXT -- Check for an exact match after the key. + * + * Parameters: + * t: tree to search + * h: current page. + * key: key to find + * exactp: pointer to exact match flag + * + * Returns: + * If an exact match found. + */ +static int +bt_snext(t, h, key, exactp) + BTREE *t; + PAGE *h; + const DBT *key; + int *exactp; +{ + EPG e; + PAGE *tp; + pgno_t pg; + + /* Skip until reach the end of the tree or a key. */ + for (pg = h->nextpg; pg != P_INVALID;) { + if ((tp = mpool_get(t->bt_mp, pg, 0)) == NULL) { + mpool_put(t->bt_mp, h, 0); + return (NULL); + } + if (NEXTINDEX(tp) != 0) + break; + pg = tp->prevpg; + mpool_put(t->bt_mp, tp, 0); + } + /* + * The key is either an exact match, or not as good as + * the one we already have. + */ + if (pg != P_INVALID) { + e.page = tp; + e.index = NEXTINDEX(tp) - 1; + if (__bt_cmp(t, key, &e) == 0) { + mpool_put(t->bt_mp, h, 0); + t->bt_cur = e; + *exactp = 1; + return (1); + } + } + return (0); +} + +/* + * BT_SPREV -- Check for an exact match before the key. + * + * Parameters: + * t: tree to search + * h: current page. + * key: key to find + * exactp: pointer to exact match flag + * + * Returns: + * If an exact match found. + */ +static int +bt_sprev(t, h, key, exactp) + BTREE *t; + PAGE *h; + const DBT *key; + int *exactp; +{ + EPG e; + PAGE *tp; + pgno_t pg; + + /* Skip until reach the beginning of the tree or a key. */ + for (pg = h->prevpg; pg != P_INVALID;) { + if ((tp = mpool_get(t->bt_mp, pg, 0)) == NULL) { + mpool_put(t->bt_mp, h, 0); + return (NULL); + } + if (NEXTINDEX(tp) != 0) + break; + pg = tp->prevpg; + mpool_put(t->bt_mp, tp, 0); + } + /* + * The key is either an exact match, or not as good as + * the one we already have. + */ + if (pg != P_INVALID) { + e.page = tp; + e.index = NEXTINDEX(tp) - 1; + if (__bt_cmp(t, key, &e) == 0) { + mpool_put(t->bt_mp, h, 0); + t->bt_cur = e; + *exactp = 1; + return (1); + } + } + return (0); +} diff --git a/lib/libc/db/btree/bt_seq.c b/lib/libc/db/btree/bt_seq.c new file mode 100644 index 0000000..182ef70 --- /dev/null +++ b/lib/libc/db/btree/bt_seq.c @@ -0,0 +1,380 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_seq.c 8.2 (Berkeley) 9/7/93"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> + +#include <db.h> +#include "btree.h" + +static int bt_seqadv __P((BTREE *, EPG *, int)); +static int bt_seqset __P((BTREE *, EPG *, DBT *, int)); + +/* + * Sequential scan support. + * + * The tree can be scanned sequentially, starting from either end of the tree + * or from any specific key. A scan request before any scanning is done is + * initialized as starting from the least node. + * + * Each tree has an EPGNO which has the current position of the cursor. The + * cursor has to survive deletions/insertions in the tree without losing its + * position. This is done by noting deletions without doing them, and then + * doing them when the cursor moves (or the tree is closed). + */ + +/* + * __BT_SEQ -- Btree sequential scan interface. + * + * Parameters: + * dbp: pointer to access method + * key: key for positioning and return value + * data: data return value + * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. + * + * Returns: + * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. + */ +int +__bt_seq(dbp, key, data, flags) + const DB *dbp; + DBT *key, *data; + u_int flags; +{ + BTREE *t; + EPG e; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* + * If scan unitialized as yet, or starting at a specific record, set + * the scan to a specific key. Both bt_seqset and bt_seqadv pin the + * page the cursor references if they're successful. + */ + switch(flags) { + case R_NEXT: + case R_PREV: + if (ISSET(t, B_SEQINIT)) { + status = bt_seqadv(t, &e, flags); + break; + } + /* FALLTHROUGH */ + case R_CURSOR: + case R_FIRST: + case R_LAST: + status = bt_seqset(t, &e, key, flags); + break; + default: + errno = EINVAL; + return (RET_ERROR); + } + + if (status == RET_SUCCESS) { + status = __bt_ret(t, &e, key, data); + + /* Update the actual cursor. */ + t->bt_bcursor.pgno = e.page->pgno; + t->bt_bcursor.index = e.index; + + /* + * If the user is doing concurrent access, we copied the + * key/data, toss the page. + */ + if (ISSET(t, B_DB_LOCK)) + mpool_put(t->bt_mp, e.page, 0); + else + t->bt_pinned = e.page; + SET(t, B_SEQINIT); + } + return (status); +} + +/* + * BT_SEQSET -- Set the sequential scan to a specific key. + * + * Parameters: + * t: tree + * ep: storage for returned key + * key: key for initial scan position + * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV + * + * Side effects: + * Pins the page the cursor references. + * + * Returns: + * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. + */ +static int +bt_seqset(t, ep, key, flags) + BTREE *t; + EPG *ep; + DBT *key; + int flags; +{ + EPG *e; + PAGE *h; + pgno_t pg; + int exact; + + /* + * Delete any already deleted record that we've been saving because + * the cursor pointed to it. Since going to a specific key, should + * delete any logically deleted records so they aren't found. + */ + if (ISSET(t, B_DELCRSR) && __bt_crsrdel(t, &t->bt_bcursor)) + return (RET_ERROR); + + /* + * Find the first, last or specific key in the tree and point the cursor + * at it. The cursor may not be moved until a new key has been found. + */ + switch(flags) { + case R_CURSOR: /* Keyed scan. */ + /* + * Find the first instance of the key or the smallest key which + * is greater than or equal to the specified key. If run out + * of keys, return RET_SPECIAL. + */ + if (key->data == NULL || key->size == 0) { + errno = EINVAL; + return (RET_ERROR); + } + e = __bt_first(t, key, &exact); /* Returns pinned page. */ + if (e == NULL) + return (RET_ERROR); + /* + * If at the end of a page, skip any empty pages and find the + * next entry. + */ + if (e->index == NEXTINDEX(e->page)) { + h = e->page; + do { + pg = h->nextpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + } while (NEXTINDEX(h) == 0); + e->index = 0; + e->page = h; + } + *ep = *e; + break; + case R_FIRST: /* First record. */ + case R_NEXT: + /* Walk down the left-hand side of the tree. */ + for (pg = P_ROOT;;) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + if (h->flags & (P_BLEAF | P_RLEAF)) + break; + pg = GETBINTERNAL(h, 0)->pgno; + mpool_put(t->bt_mp, h, 0); + } + + /* Skip any empty pages. */ + while (NEXTINDEX(h) == 0 && h->nextpg != P_INVALID) { + pg = h->nextpg; + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + } + + if (NEXTINDEX(h) == 0) { + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + } + + ep->page = h; + ep->index = 0; + break; + case R_LAST: /* Last record. */ + case R_PREV: + /* Walk down the right-hand side of the tree. */ + for (pg = P_ROOT;;) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + if (h->flags & (P_BLEAF | P_RLEAF)) + break; + pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno; + mpool_put(t->bt_mp, h, 0); + } + + /* Skip any empty pages. */ + while (NEXTINDEX(h) == 0 && h->prevpg != P_INVALID) { + pg = h->prevpg; + mpool_put(t->bt_mp, h, 0); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + } + + if (NEXTINDEX(h) == 0) { + mpool_put(t->bt_mp, h, 0); + return (RET_SPECIAL); + } + + ep->page = h; + ep->index = NEXTINDEX(h) - 1; + break; + } + return (RET_SUCCESS); +} + +/* + * BT_SEQADVANCE -- Advance the sequential scan. + * + * Parameters: + * t: tree + * flags: R_NEXT, R_PREV + * + * Side effects: + * Pins the page the new key/data record is on. + * + * Returns: + * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. + */ +static int +bt_seqadv(t, e, flags) + BTREE *t; + EPG *e; + int flags; +{ + EPGNO *c, delc; + PAGE *h; + indx_t index; + pgno_t pg; + + /* Save the current cursor if going to delete it. */ + c = &t->bt_bcursor; + if (ISSET(t, B_DELCRSR)) + delc = *c; + + if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) + return (RET_ERROR); + + /* + * Find the next/previous record in the tree and point the cursor at it. + * The cursor may not be moved until a new key has been found. + */ + index = c->index; + switch(flags) { + case R_NEXT: /* Next record. */ + if (++index == NEXTINDEX(h)) { + do { + pg = h->nextpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + } while (NEXTINDEX(h) == 0); + index = 0; + } + break; + case R_PREV: /* Previous record. */ + if (index-- == 0) { + do { + pg = h->prevpg; + mpool_put(t->bt_mp, h, 0); + if (pg == P_INVALID) + return (RET_SPECIAL); + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + } while (NEXTINDEX(h) == 0); + index = NEXTINDEX(h) - 1; + } + break; + } + + e->page = h; + e->index = index; + + /* + * Delete any already deleted record that we've been saving because the + * cursor pointed to it. This could cause the new index to be shifted + * down by one if the record we're deleting is on the same page and has + * a larger index. + */ + if (ISSET(t, B_DELCRSR)) { + CLR(t, B_DELCRSR); /* Don't try twice. */ + if (c->pgno == delc.pgno && c->index > delc.index) + --c->index; + if (__bt_crsrdel(t, &delc)) + return (RET_ERROR); + } + return (RET_SUCCESS); +} + +/* + * __BT_CRSRDEL -- Delete the record referenced by the cursor. + * + * Parameters: + * t: tree + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__bt_crsrdel(t, c) + BTREE *t; + EPGNO *c; +{ + PAGE *h; + int status; + + CLR(t, B_DELCRSR); /* Don't try twice. */ + if ((h = mpool_get(t->bt_mp, c->pgno, 0)) == NULL) + return (RET_ERROR); + status = __bt_dleaf(t, h, c->index); + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + return (status); +} diff --git a/lib/libc/db/btree/bt_split.c b/lib/libc/db/btree/bt_split.c new file mode 100644 index 0000000..4a572c0 --- /dev/null +++ b/lib/libc/db/btree/bt_split.c @@ -0,0 +1,825 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_split.c 8.3 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <db.h> +#include "btree.h" + +static int bt_broot __P((BTREE *, PAGE *, PAGE *, PAGE *)); +static PAGE *bt_page + __P((BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t)); +static int bt_preserve __P((BTREE *, pgno_t)); +static PAGE *bt_psplit + __P((BTREE *, PAGE *, PAGE *, PAGE *, indx_t *, size_t)); +static PAGE *bt_root + __P((BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t)); +static int bt_rroot __P((BTREE *, PAGE *, PAGE *, PAGE *)); +static recno_t rec_total __P((PAGE *)); + +#ifdef STATISTICS +u_long bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved; +#endif + +/* + * __BT_SPLIT -- Split the tree. + * + * Parameters: + * t: tree + * sp: page to split + * key: key to insert + * data: data to insert + * flags: BIGKEY/BIGDATA flags + * ilen: insert length + * skip: index to leave open + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__bt_split(t, sp, key, data, flags, ilen, skip) + BTREE *t; + PAGE *sp; + const DBT *key, *data; + int flags; + size_t ilen; + indx_t skip; +{ + BINTERNAL *bi; + BLEAF *bl, *tbl; + DBT a, b; + EPGNO *parent; + PAGE *h, *l, *r, *lchild, *rchild; + indx_t nxtindex; + size_t n, nbytes, nksize; + int parentsplit; + char *dest; + + /* + * Split the page into two pages, l and r. The split routines return + * a pointer to the page into which the key should be inserted and with + * skip set to the offset which should be used. Additionally, l and r + * are pinned. + */ + h = sp->pgno == P_ROOT ? + bt_root(t, sp, &l, &r, &skip, ilen) : + bt_page(t, sp, &l, &r, &skip, ilen); + if (h == NULL) + return (RET_ERROR); + + /* + * Insert the new key/data pair into the leaf page. (Key inserts + * always cause a leaf page to split first.) + */ + h->linp[skip] = h->upper -= ilen; + dest = (char *)h + h->upper; + if (ISSET(t, R_RECNO)) + WR_RLEAF(dest, data, flags) + else + WR_BLEAF(dest, key, data, flags) + + /* If the root page was split, make it look right. */ + if (sp->pgno == P_ROOT && + (ISSET(t, R_RECNO) ? + bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) + goto err2; + + /* + * Now we walk the parent page stack -- a LIFO stack of the pages that + * were traversed when we searched for the page that split. Each stack + * entry is a page number and a page index offset. The offset is for + * the page traversed on the search. We've just split a page, so we + * have to insert a new key into the parent page. + * + * If the insert into the parent page causes it to split, may have to + * continue splitting all the way up the tree. We stop if the root + * splits or the page inserted into didn't have to split to hold the + * new key. Some algorithms replace the key for the old page as well + * as the new page. We don't, as there's no reason to believe that the + * first key on the old page is any better than the key we have, and, + * in the case of a key being placed at index 0 causing the split, the + * key is unavailable. + * + * There are a maximum of 5 pages pinned at any time. We keep the left + * and right pages pinned while working on the parent. The 5 are the + * two children, left parent and right parent (when the parent splits) + * and the root page or the overflow key page when calling bt_preserve. + * This code must make sure that all pins are released other than the + * root page or overflow page which is unlocked elsewhere. + */ + while ((parent = BT_POP(t)) != NULL) { + lchild = l; + rchild = r; + + /* Get the parent page. */ + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + goto err2; + + /* + * The new key goes ONE AFTER the index, because the split + * was to the right. + */ + skip = parent->index + 1; + + /* + * Calculate the space needed on the parent page. + * + * Prefix trees: space hack when inserting into BINTERNAL + * pages. Retain only what's needed to distinguish between + * the new entry and the LAST entry on the page to its left. + * If the keys compare equal, retain the entire key. Note, + * we don't touch overflow keys, and the entire key must be + * retained for the next-to-left most key on the leftmost + * page of each level, or the search will fail. Applicable + * ONLY to internal pages that have leaf pages as children. + * Further reduction of the key between pairs of internal + * pages loses too much information. + */ + switch (rchild->flags & P_TYPE) { + case P_BINTERNAL: + bi = GETBINTERNAL(rchild, 0); + nbytes = NBINTERNAL(bi->ksize); + break; + case P_BLEAF: + bl = GETBLEAF(rchild, 0); + nbytes = NBINTERNAL(bl->ksize); + if (t->bt_pfx && !(bl->flags & P_BIGKEY) && + (h->prevpg != P_INVALID || skip > 1)) { + tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1); + a.size = tbl->ksize; + a.data = tbl->bytes; + b.size = bl->ksize; + b.data = bl->bytes; + nksize = t->bt_pfx(&a, &b); + n = NBINTERNAL(nksize); + if (n < nbytes) { +#ifdef STATISTICS + bt_pfxsaved += nbytes - n; +#endif + nbytes = n; + } else + nksize = 0; + } else + nksize = 0; + break; + case P_RINTERNAL: + case P_RLEAF: + nbytes = NRINTERNAL; + break; + default: + abort(); + } + + /* Split the parent page if necessary or shift the indices. */ + if (h->upper - h->lower < nbytes + sizeof(indx_t)) { + sp = h; + h = h->pgno == P_ROOT ? + bt_root(t, h, &l, &r, &skip, nbytes) : + bt_page(t, h, &l, &r, &skip, nbytes); + if (h == NULL) + goto err1; + parentsplit = 1; + } else { + if (skip < (nxtindex = NEXTINDEX(h))) + memmove(h->linp + skip + 1, h->linp + skip, + (nxtindex - skip) * sizeof(indx_t)); + h->lower += sizeof(indx_t); + parentsplit = 0; + } + + /* Insert the key into the parent page. */ + switch(rchild->flags & P_TYPE) { + case P_BINTERNAL: + h->linp[skip] = h->upper -= nbytes; + dest = (char *)h + h->linp[skip]; + memmove(dest, bi, nbytes); + ((BINTERNAL *)dest)->pgno = rchild->pgno; + break; + case P_BLEAF: + h->linp[skip] = h->upper -= nbytes; + dest = (char *)h + h->linp[skip]; + WR_BINTERNAL(dest, nksize ? nksize : bl->ksize, + rchild->pgno, bl->flags & P_BIGKEY); + memmove(dest, bl->bytes, nksize ? nksize : bl->ksize); + if (bl->flags & P_BIGKEY && + bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) + goto err1; + break; + case P_RINTERNAL: + /* + * Update the left page count. If split + * added at index 0, fix the correct page. + */ + if (skip > 0) + dest = (char *)h + h->linp[skip - 1]; + else + dest = (char *)l + l->linp[NEXTINDEX(l) - 1]; + ((RINTERNAL *)dest)->nrecs = rec_total(lchild); + ((RINTERNAL *)dest)->pgno = lchild->pgno; + + /* Update the right page count. */ + h->linp[skip] = h->upper -= nbytes; + dest = (char *)h + h->linp[skip]; + ((RINTERNAL *)dest)->nrecs = rec_total(rchild); + ((RINTERNAL *)dest)->pgno = rchild->pgno; + break; + case P_RLEAF: + /* + * Update the left page count. If split + * added at index 0, fix the correct page. + */ + if (skip > 0) + dest = (char *)h + h->linp[skip - 1]; + else + dest = (char *)l + l->linp[NEXTINDEX(l) - 1]; + ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild); + ((RINTERNAL *)dest)->pgno = lchild->pgno; + + /* Update the right page count. */ + h->linp[skip] = h->upper -= nbytes; + dest = (char *)h + h->linp[skip]; + ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild); + ((RINTERNAL *)dest)->pgno = rchild->pgno; + break; + default: + abort(); + } + + /* Unpin the held pages. */ + if (!parentsplit) { + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + break; + } + + /* If the root page was split, make it look right. */ + if (sp->pgno == P_ROOT && + (ISSET(t, R_RECNO) ? + bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR) + goto err1; + + mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); + mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); + } + + /* Unpin the held pages. */ + mpool_put(t->bt_mp, l, MPOOL_DIRTY); + mpool_put(t->bt_mp, r, MPOOL_DIRTY); + + /* Clear any pages left on the stack. */ + return (RET_SUCCESS); + + /* + * If something fails in the above loop we were already walking back + * up the tree and the tree is now inconsistent. Nothing much we can + * do about it but release any memory we're holding. + */ +err1: mpool_put(t->bt_mp, lchild, MPOOL_DIRTY); + mpool_put(t->bt_mp, rchild, MPOOL_DIRTY); + +err2: mpool_put(t->bt_mp, l, 0); + mpool_put(t->bt_mp, r, 0); + __dbpanic(t->bt_dbp); + return (RET_ERROR); +} + +/* + * BT_PAGE -- Split a non-root page of a btree. + * + * Parameters: + * t: tree + * h: root page + * lp: pointer to left page pointer + * rp: pointer to right page pointer + * skip: pointer to index to leave open + * ilen: insert length + * + * Returns: + * Pointer to page in which to insert or NULL on error. + */ +static PAGE * +bt_page(t, h, lp, rp, skip, ilen) + BTREE *t; + PAGE *h, **lp, **rp; + indx_t *skip; + size_t ilen; +{ + PAGE *l, *r, *tp; + pgno_t npg; + +#ifdef STATISTICS + ++bt_split; +#endif + /* Put the new right page for the split into place. */ + if ((r = __bt_new(t, &npg)) == NULL) + return (NULL); + r->pgno = npg; + r->lower = BTDATAOFF; + r->upper = t->bt_psize; + r->nextpg = h->nextpg; + r->prevpg = h->pgno; + r->flags = h->flags & P_TYPE; + + /* + * If we're splitting the last page on a level because we're appending + * a key to it (skip is NEXTINDEX()), it's likely that the data is + * sorted. Adding an empty page on the side of the level is less work + * and can push the fill factor much higher than normal. If we're + * wrong it's no big deal, we'll just do the split the right way next + * time. It may look like it's equally easy to do a similar hack for + * reverse sorted data, that is, split the tree left, but it's not. + * Don't even try. + */ + if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) { +#ifdef STATISTICS + ++bt_sortsplit; +#endif + h->nextpg = r->pgno; + r->lower = BTDATAOFF + sizeof(indx_t); + *skip = 0; + *lp = h; + *rp = r; + return (r); + } + + /* Put the new left page for the split into place. */ + if ((l = (PAGE *)malloc(t->bt_psize)) == NULL) { + mpool_put(t->bt_mp, r, 0); + return (NULL); + } + l->pgno = h->pgno; + l->nextpg = r->pgno; + l->prevpg = h->prevpg; + l->lower = BTDATAOFF; + l->upper = t->bt_psize; + l->flags = h->flags & P_TYPE; + + /* Fix up the previous pointer of the page after the split page. */ + if (h->nextpg != P_INVALID) { + if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) { + free(l); + /* XXX mpool_free(t->bt_mp, r->pgno); */ + return (NULL); + } + tp->prevpg = r->pgno; + mpool_put(t->bt_mp, tp, 0); + } + + /* + * Split right. The key/data pairs aren't sorted in the btree page so + * it's simpler to copy the data from the split page onto two new pages + * instead of copying half the data to the right page and compacting + * the left page in place. Since the left page can't change, we have + * to swap the original and the allocated left page after the split. + */ + tp = bt_psplit(t, h, l, r, skip, ilen); + + /* Move the new left page onto the old left page. */ + memmove(h, l, t->bt_psize); + if (tp == l) + tp = h; + free(l); + + *lp = h; + *rp = r; + return (tp); +} + +/* + * BT_ROOT -- Split the root page of a btree. + * + * Parameters: + * t: tree + * h: root page + * lp: pointer to left page pointer + * rp: pointer to right page pointer + * skip: pointer to index to leave open + * ilen: insert length + * + * Returns: + * Pointer to page in which to insert or NULL on error. + */ +static PAGE * +bt_root(t, h, lp, rp, skip, ilen) + BTREE *t; + PAGE *h, **lp, **rp; + indx_t *skip; + size_t ilen; +{ + PAGE *l, *r, *tp; + pgno_t lnpg, rnpg; + +#ifdef STATISTICS + ++bt_split; + ++bt_rootsplit; +#endif + /* Put the new left and right pages for the split into place. */ + if ((l = __bt_new(t, &lnpg)) == NULL || + (r = __bt_new(t, &rnpg)) == NULL) + return (NULL); + l->pgno = lnpg; + r->pgno = rnpg; + l->nextpg = r->pgno; + r->prevpg = l->pgno; + l->prevpg = r->nextpg = P_INVALID; + l->lower = r->lower = BTDATAOFF; + l->upper = r->upper = t->bt_psize; + l->flags = r->flags = h->flags & P_TYPE; + + /* Split the root page. */ + tp = bt_psplit(t, h, l, r, skip, ilen); + + *lp = l; + *rp = r; + return (tp); +} + +/* + * BT_RROOT -- Fix up the recno root page after it has been split. + * + * Parameters: + * t: tree + * h: root page + * l: left page + * r: right page + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +static int +bt_rroot(t, h, l, r) + BTREE *t; + PAGE *h, *l, *r; +{ + char *dest; + + /* Insert the left and right keys, set the header information. */ + h->linp[0] = h->upper = t->bt_psize - NRINTERNAL; + dest = (char *)h + h->upper; + WR_RINTERNAL(dest, + l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno); + + h->linp[1] = h->upper -= NRINTERNAL; + dest = (char *)h + h->upper; + WR_RINTERNAL(dest, + r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno); + + h->lower = BTDATAOFF + 2 * sizeof(indx_t); + + /* Unpin the root page, set to recno internal page. */ + h->flags &= ~P_TYPE; + h->flags |= P_RINTERNAL; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + + return (RET_SUCCESS); +} + +/* + * BT_BROOT -- Fix up the btree root page after it has been split. + * + * Parameters: + * t: tree + * h: root page + * l: left page + * r: right page + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +static int +bt_broot(t, h, l, r) + BTREE *t; + PAGE *h, *l, *r; +{ + BINTERNAL *bi; + BLEAF *bl; + size_t nbytes; + char *dest; + + /* + * If the root page was a leaf page, change it into an internal page. + * We copy the key we split on (but not the key's data, in the case of + * a leaf page) to the new root page. + * + * The btree comparison code guarantees that the left-most key on any + * level of the tree is never used, so it doesn't need to be filled in. + */ + nbytes = NBINTERNAL(0); + h->linp[0] = h->upper = t->bt_psize - nbytes; + dest = (char *)h + h->upper; + WR_BINTERNAL(dest, 0, l->pgno, 0); + + switch(h->flags & P_TYPE) { + case P_BLEAF: + bl = GETBLEAF(r, 0); + nbytes = NBINTERNAL(bl->ksize); + h->linp[1] = h->upper -= nbytes; + dest = (char *)h + h->upper; + WR_BINTERNAL(dest, bl->ksize, r->pgno, 0); + memmove(dest, bl->bytes, bl->ksize); + + /* + * If the key is on an overflow page, mark the overflow chain + * so it isn't deleted when the leaf copy of the key is deleted. + */ + if (bl->flags & P_BIGKEY && + bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR) + return (RET_ERROR); + break; + case P_BINTERNAL: + bi = GETBINTERNAL(r, 0); + nbytes = NBINTERNAL(bi->ksize); + h->linp[1] = h->upper -= nbytes; + dest = (char *)h + h->upper; + memmove(dest, bi, nbytes); + ((BINTERNAL *)dest)->pgno = r->pgno; + break; + default: + abort(); + } + + /* There are two keys on the page. */ + h->lower = BTDATAOFF + 2 * sizeof(indx_t); + + /* Unpin the root page, set to btree internal page. */ + h->flags &= ~P_TYPE; + h->flags |= P_BINTERNAL; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + + return (RET_SUCCESS); +} + +/* + * BT_PSPLIT -- Do the real work of splitting the page. + * + * Parameters: + * t: tree + * h: page to be split + * l: page to put lower half of data + * r: page to put upper half of data + * pskip: pointer to index to leave open + * ilen: insert length + * + * Returns: + * Pointer to page in which to insert. + */ +static PAGE * +bt_psplit(t, h, l, r, pskip, ilen) + BTREE *t; + PAGE *h, *l, *r; + indx_t *pskip; + size_t ilen; +{ + BINTERNAL *bi; + BLEAF *bl; + RLEAF *rl; + EPGNO *c; + PAGE *rval; + void *src; + indx_t full, half, nxt, off, skip, top, used; + size_t nbytes; + int bigkeycnt, isbigkey; + + /* + * Split the data to the left and right pages. Leave the skip index + * open. Additionally, make some effort not to split on an overflow + * key. This makes internal page processing faster and can save + * space as overflow keys used by internal pages are never deleted. + */ + bigkeycnt = 0; + skip = *pskip; + full = t->bt_psize - BTDATAOFF; + half = full / 2; + used = 0; + for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) { + if (skip == off) { + nbytes = ilen; + isbigkey = 0; /* XXX: not really known. */ + } else + switch (h->flags & P_TYPE) { + case P_BINTERNAL: + src = bi = GETBINTERNAL(h, nxt); + nbytes = NBINTERNAL(bi->ksize); + isbigkey = bi->flags & P_BIGKEY; + break; + case P_BLEAF: + src = bl = GETBLEAF(h, nxt); + nbytes = NBLEAF(bl); + isbigkey = bl->flags & P_BIGKEY; + break; + case P_RINTERNAL: + src = GETRINTERNAL(h, nxt); + nbytes = NRINTERNAL; + isbigkey = 0; + break; + case P_RLEAF: + src = rl = GETRLEAF(h, nxt); + nbytes = NRLEAF(rl); + isbigkey = 0; + break; + default: + abort(); + } + + /* + * If the key/data pairs are substantial fractions of the max + * possible size for the page, it's possible to get situations + * where we decide to try and copy too much onto the left page. + * Make sure that doesn't happen. + */ + if (skip <= off && used + nbytes >= full) { + --off; + break; + } + + /* Copy the key/data pair, if not the skipped index. */ + if (skip != off) { + ++nxt; + + l->linp[off] = l->upper -= nbytes; + memmove((char *)l + l->upper, src, nbytes); + } + + used += nbytes; + if (used >= half) { + if (!isbigkey || bigkeycnt == 3) + break; + else + ++bigkeycnt; + } + } + + /* + * Off is the last offset that's valid for the left page. + * Nxt is the first offset to be placed on the right page. + */ + l->lower += (off + 1) * sizeof(indx_t); + + /* + * If splitting the page that the cursor was on, the cursor has to be + * adjusted to point to the same record as before the split. If the + * cursor is at or past the skipped slot, the cursor is incremented by + * one. If the cursor is on the right page, it is decremented by the + * number of records split to the left page. + * + * Don't bother checking for the B_SEQINIT flag, the page number will + * be P_INVALID. + */ + c = &t->bt_bcursor; + if (c->pgno == h->pgno) { + if (c->index >= skip) + ++c->index; + if (c->index < nxt) /* Left page. */ + c->pgno = l->pgno; + else { /* Right page. */ + c->pgno = r->pgno; + c->index -= nxt; + } + } + + /* + * If the skipped index was on the left page, just return that page. + * Otherwise, adjust the skip index to reflect the new position on + * the right page. + */ + if (skip <= off) { + skip = 0; + rval = l; + } else { + rval = r; + *pskip -= nxt; + } + + for (off = 0; nxt < top; ++off) { + if (skip == nxt) { + ++off; + skip = 0; + } + switch (h->flags & P_TYPE) { + case P_BINTERNAL: + src = bi = GETBINTERNAL(h, nxt); + nbytes = NBINTERNAL(bi->ksize); + break; + case P_BLEAF: + src = bl = GETBLEAF(h, nxt); + nbytes = NBLEAF(bl); + break; + case P_RINTERNAL: + src = GETRINTERNAL(h, nxt); + nbytes = NRINTERNAL; + break; + case P_RLEAF: + src = rl = GETRLEAF(h, nxt); + nbytes = NRLEAF(rl); + break; + default: + abort(); + } + ++nxt; + r->linp[off] = r->upper -= nbytes; + memmove((char *)r + r->upper, src, nbytes); + } + r->lower += off * sizeof(indx_t); + + /* If the key is being appended to the page, adjust the index. */ + if (skip == top) + r->lower += sizeof(indx_t); + + return (rval); +} + +/* + * BT_PRESERVE -- Mark a chain of pages as used by an internal node. + * + * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the + * record that references them gets deleted. Chains pointed to by internal + * pages never get deleted. This routine marks a chain as pointed to by an + * internal page. + * + * Parameters: + * t: tree + * pg: page number of first page in the chain. + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +static int +bt_preserve(t, pg) + BTREE *t; + pgno_t pg; +{ + PAGE *h; + + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + return (RET_ERROR); + h->flags |= P_PRESERVE; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + return (RET_SUCCESS); +} + +/* + * REC_TOTAL -- Return the number of recno entries below a page. + * + * Parameters: + * h: page + * + * Returns: + * The number of recno entries below a page. + * + * XXX + * These values could be set by the bt_psplit routine. The problem is that the + * entry has to be popped off of the stack etc. or the values have to be passed + * all the way back to bt_split/bt_rroot and it's not very clean. + */ +static recno_t +rec_total(h) + PAGE *h; +{ + recno_t recs; + indx_t nxt, top; + + for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt) + recs += GETRINTERNAL(h, nxt)->nrecs; + return (recs); +} diff --git a/lib/libc/db/btree/bt_stack.c b/lib/libc/db/btree/bt_stack.c new file mode 100644 index 0000000..5c7fab9 --- /dev/null +++ b/lib/libc/db/btree/bt_stack.c @@ -0,0 +1,92 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_stack.c 8.3 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> + +#include <db.h> +#include "btree.h" + +/* + * When a page splits, a new record has to be inserted into its parent page. + * This page may have to split as well, all the way up to the root. Since + * parent pointers in each page would be expensive, we maintain a stack of + * parent pages as we descend the tree. + * + * XXX + * This is a concurrency problem -- if user a builds a stack, then user b + * splits the tree, then user a tries to split the tree, there's a new level + * in the tree that user a doesn't know about. + */ + +/* + * __BT_PUSH -- Push parent page info onto the stack (LIFO). + * + * Parameters: + * t: tree + * pgno: page + * index: page index + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__bt_push(t, pgno, index) + BTREE *t; + pgno_t pgno; + indx_t index; +{ + if (t->bt_sp == t->bt_maxstack) { + t->bt_maxstack += 50; + if ((t->bt_stack = (EPGNO *)realloc(t->bt_stack, + t->bt_maxstack * sizeof(EPGNO))) == NULL) { + t->bt_maxstack -= 50; + return (RET_ERROR); + } + } + + t->bt_stack[t->bt_sp].pgno = pgno; + t->bt_stack[t->bt_sp].index = index; + ++t->bt_sp; + return (RET_SUCCESS); +} diff --git a/lib/libc/db/btree/bt_utils.c b/lib/libc/db/btree/bt_utils.c new file mode 100644 index 0000000..d2d1f73 --- /dev/null +++ b/lib/libc/db/btree/bt_utils.c @@ -0,0 +1,247 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)bt_utils.c 8.4 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <db.h> +#include "btree.h" + +/* + * __BT_RET -- Build return key/data pair as a result of search or scan. + * + * Parameters: + * t: tree + * d: LEAF to be returned to the user. + * key: user's key structure (NULL if not to be filled in) + * data: user's data structure + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__bt_ret(t, e, key, data) + BTREE *t; + EPG *e; + DBT *key, *data; +{ + register BLEAF *bl; + register void *p; + + bl = GETBLEAF(e->page, e->index); + + /* + * We always copy big keys/data to make them contigous. Otherwise, + * we leave the page pinned and don't copy unless the user specified + * concurrent access. + */ + if (bl->flags & P_BIGDATA) { + if (__ovfl_get(t, bl->bytes + bl->ksize, + &data->size, &t->bt_dbuf, &t->bt_dbufsz)) + return (RET_ERROR); + data->data = t->bt_dbuf; + } else if (ISSET(t, B_DB_LOCK)) { + /* Use +1 in case the first record retrieved is 0 length. */ + if (bl->dsize + 1 > t->bt_dbufsz) { + if ((p = + (void *)realloc(t->bt_dbuf, bl->dsize + 1)) == NULL) + return (RET_ERROR); + t->bt_dbuf = p; + t->bt_dbufsz = bl->dsize + 1; + } + memmove(t->bt_dbuf, bl->bytes + bl->ksize, bl->dsize); + data->size = bl->dsize; + data->data = t->bt_dbuf; + } else { + data->size = bl->dsize; + data->data = bl->bytes + bl->ksize; + } + + if (key == NULL) + return (RET_SUCCESS); + + if (bl->flags & P_BIGKEY) { + if (__ovfl_get(t, bl->bytes, + &key->size, &t->bt_kbuf, &t->bt_kbufsz)) + return (RET_ERROR); + key->data = t->bt_kbuf; + } else if (ISSET(t, B_DB_LOCK)) { + if (bl->ksize > t->bt_kbufsz) { + if ((p = + (void *)realloc(t->bt_kbuf, bl->ksize)) == NULL) + return (RET_ERROR); + t->bt_kbuf = p; + t->bt_kbufsz = bl->ksize; + } + memmove(t->bt_kbuf, bl->bytes, bl->ksize); + key->size = bl->ksize; + key->data = t->bt_kbuf; + } else { + key->size = bl->ksize; + key->data = bl->bytes; + } + return (RET_SUCCESS); +} + +/* + * __BT_CMP -- Compare a key to a given record. + * + * Parameters: + * t: tree + * k1: DBT pointer of first arg to comparison + * e: pointer to EPG for comparison + * + * Returns: + * < 0 if k1 is < record + * = 0 if k1 is = record + * > 0 if k1 is > record + */ +int +__bt_cmp(t, k1, e) + BTREE *t; + const DBT *k1; + EPG *e; +{ + BINTERNAL *bi; + BLEAF *bl; + DBT k2; + PAGE *h; + void *bigkey; + + /* + * The left-most key on internal pages, at any level of the tree, is + * guaranteed by the following code to be less than any user key. + * This saves us from having to update the leftmost key on an internal + * page when the user inserts a new key in the tree smaller than + * anything we've yet seen. + */ + h = e->page; + if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & P_BLEAF)) + return (1); + + bigkey = NULL; + if (h->flags & P_BLEAF) { + bl = GETBLEAF(h, e->index); + if (bl->flags & P_BIGKEY) + bigkey = bl->bytes; + else { + k2.data = bl->bytes; + k2.size = bl->ksize; + } + } else { + bi = GETBINTERNAL(h, e->index); + if (bi->flags & P_BIGKEY) + bigkey = bi->bytes; + else { + k2.data = bi->bytes; + k2.size = bi->ksize; + } + } + + if (bigkey) { + if (__ovfl_get(t, bigkey, + &k2.size, &t->bt_dbuf, &t->bt_dbufsz)) + return (RET_ERROR); + k2.data = t->bt_dbuf; + } + return ((*t->bt_cmp)(k1, &k2)); +} + +/* + * __BT_DEFCMP -- Default comparison routine. + * + * Parameters: + * a: DBT #1 + * b: DBT #2 + * + * Returns: + * < 0 if a is < b + * = 0 if a is = b + * > 0 if a is > b + */ +int +__bt_defcmp(a, b) + const DBT *a, *b; +{ + register size_t len; + register u_char *p1, *p2; + + /* + * XXX + * If a size_t doesn't fit in an int, this routine can lose. + * What we need is a integral type which is guaranteed to be + * larger than a size_t, and there is no such thing. + */ + len = MIN(a->size, b->size); + for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2) + if (*p1 != *p2) + return ((int)*p1 - (int)*p2); + return ((int)a->size - (int)b->size); +} + +/* + * __BT_DEFPFX -- Default prefix routine. + * + * Parameters: + * a: DBT #1 + * b: DBT #2 + * + * Returns: + * Number of bytes needed to distinguish b from a. + */ +size_t +__bt_defpfx(a, b) + const DBT *a, *b; +{ + register u_char *p1, *p2; + register size_t cnt, len; + + cnt = 1; + len = MIN(a->size, b->size); + for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt) + if (*p1 != *p2) + return (cnt); + + /* a->size must be <= b->size, or they wouldn't be in this order. */ + return (a->size < b->size ? a->size + 1 : a->size); +} diff --git a/lib/libc/db/btree/btree.h b/lib/libc/db/btree/btree.h new file mode 100644 index 0000000..dd798ec --- /dev/null +++ b/lib/libc/db/btree/btree.h @@ -0,0 +1,353 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)btree.h 8.5 (Berkeley) 2/21/94 + */ + +#include <mpool.h> + +#define DEFMINKEYPAGE (2) /* Minimum keys per page */ +#define MINCACHE (5) /* Minimum cached pages */ +#define MINPSIZE (512) /* Minimum page size */ + +/* + * Page 0 of a btree file contains a copy of the meta-data. This page is also + * used as an out-of-band page, i.e. page pointers that point to nowhere point + * to page 0. Page 1 is the root of the btree. + */ +#define P_INVALID 0 /* Invalid tree page number. */ +#define P_META 0 /* Tree metadata page number. */ +#define P_ROOT 1 /* Tree root page number. */ + +/* + * There are five page layouts in the btree: btree internal pages (BINTERNAL), + * btree leaf pages (BLEAF), recno internal pages (RINTERNAL), recno leaf pages + * (RLEAF) and overflow pages. All five page types have a page header (PAGE). + * This implementation requires that values within structures NOT be padded. + * (ANSI C permits random padding.) If your compiler pads randomly you'll have + * to do some work to get this package to run. + */ +typedef struct _page { + pgno_t pgno; /* this page's page number */ + pgno_t prevpg; /* left sibling */ + pgno_t nextpg; /* right sibling */ + +#define P_BINTERNAL 0x01 /* btree internal page */ +#define P_BLEAF 0x02 /* leaf page */ +#define P_OVERFLOW 0x04 /* overflow page */ +#define P_RINTERNAL 0x08 /* recno internal page */ +#define P_RLEAF 0x10 /* leaf page */ +#define P_TYPE 0x1f /* type mask */ +#define P_PRESERVE 0x20 /* never delete this chain of pages */ + u_int32_t flags; + + indx_t lower; /* lower bound of free space on page */ + indx_t upper; /* upper bound of free space on page */ + indx_t linp[1]; /* indx_t-aligned VAR. LENGTH DATA */ +} PAGE; + +/* First and next index. */ +#define BTDATAOFF (sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \ + sizeof(u_int32_t) + sizeof(indx_t) + sizeof(indx_t)) +#define NEXTINDEX(p) (((p)->lower - BTDATAOFF) / sizeof(indx_t)) + +/* + * For pages other than overflow pages, there is an array of offsets into the + * rest of the page immediately following the page header. Each offset is to + * an item which is unique to the type of page. The h_lower offset is just + * past the last filled-in index. The h_upper offset is the first item on the + * page. Offsets are from the beginning of the page. + * + * If an item is too big to store on a single page, a flag is set and the item + * is a { page, size } pair such that the page is the first page of an overflow + * chain with size bytes of item. Overflow pages are simply bytes without any + * external structure. + * + * The page number and size fields in the items are pgno_t-aligned so they can + * be manipulated without copying. (This presumes that 32 bit items can be + * manipulated on this system.) + */ +#define LALIGN(n) \ + (((n) + sizeof(pgno_t) - 1) & ~(sizeof(pgno_t) - 1)) +#define NOVFLSIZE (sizeof(pgno_t) + sizeof(size_t)) + +/* + * For the btree internal pages, the item is a key. BINTERNALs are {key, pgno} + * pairs, such that the key compares less than or equal to all of the records + * on that page. For a tree without duplicate keys, an internal page with two + * consecutive keys, a and b, will have all records greater than or equal to a + * and less than b stored on the page associated with a. Duplicate keys are + * somewhat special and can cause duplicate internal and leaf page records and + * some minor modifications of the above rule. + */ +typedef struct _binternal { + size_t ksize; /* key size */ + pgno_t pgno; /* page number stored on */ +#define P_BIGDATA 0x01 /* overflow data */ +#define P_BIGKEY 0x02 /* overflow key */ + u_char flags; + char bytes[1]; /* data */ +} BINTERNAL; + +/* Get the page's BINTERNAL structure at index indx. */ +#define GETBINTERNAL(pg, indx) \ + ((BINTERNAL *)((char *)(pg) + (pg)->linp[indx])) + +/* Get the number of bytes in the entry. */ +#define NBINTERNAL(len) \ + LALIGN(sizeof(size_t) + sizeof(pgno_t) + sizeof(u_char) + (len)) + +/* Copy a BINTERNAL entry to the page. */ +#define WR_BINTERNAL(p, size, pgno, flags) { \ + *(size_t *)p = size; \ + p += sizeof(size_t); \ + *(pgno_t *)p = pgno; \ + p += sizeof(pgno_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ +} + +/* + * For the recno internal pages, the item is a page number with the number of + * keys found on that page and below. + */ +typedef struct _rinternal { + recno_t nrecs; /* number of records */ + pgno_t pgno; /* page number stored below */ +} RINTERNAL; + +/* Get the page's RINTERNAL structure at index indx. */ +#define GETRINTERNAL(pg, indx) \ + ((RINTERNAL *)((char *)(pg) + (pg)->linp[indx])) + +/* Get the number of bytes in the entry. */ +#define NRINTERNAL \ + LALIGN(sizeof(recno_t) + sizeof(pgno_t)) + +/* Copy a RINTERAL entry to the page. */ +#define WR_RINTERNAL(p, nrecs, pgno) { \ + *(recno_t *)p = nrecs; \ + p += sizeof(recno_t); \ + *(pgno_t *)p = pgno; \ +} + +/* For the btree leaf pages, the item is a key and data pair. */ +typedef struct _bleaf { + size_t ksize; /* size of key */ + size_t dsize; /* size of data */ + u_char flags; /* P_BIGDATA, P_BIGKEY */ + char bytes[1]; /* data */ +} BLEAF; + +/* Get the page's BLEAF structure at index indx. */ +#define GETBLEAF(pg, indx) \ + ((BLEAF *)((char *)(pg) + (pg)->linp[indx])) + +/* Get the number of bytes in the entry. */ +#define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize) + +/* Get the number of bytes in the user's key/data pair. */ +#define NBLEAFDBT(ksize, dsize) \ + LALIGN(sizeof(size_t) + sizeof(size_t) + sizeof(u_char) + \ + (ksize) + (dsize)) + +/* Copy a BLEAF entry to the page. */ +#define WR_BLEAF(p, key, data, flags) { \ + *(size_t *)p = key->size; \ + p += sizeof(size_t); \ + *(size_t *)p = data->size; \ + p += sizeof(size_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ + memmove(p, key->data, key->size); \ + p += key->size; \ + memmove(p, data->data, data->size); \ +} + +/* For the recno leaf pages, the item is a data entry. */ +typedef struct _rleaf { + size_t dsize; /* size of data */ + u_char flags; /* P_BIGDATA */ + char bytes[1]; +} RLEAF; + +/* Get the page's RLEAF structure at index indx. */ +#define GETRLEAF(pg, indx) \ + ((RLEAF *)((char *)(pg) + (pg)->linp[indx])) + +/* Get the number of bytes in the entry. */ +#define NRLEAF(p) NRLEAFDBT((p)->dsize) + +/* Get the number of bytes from the user's data. */ +#define NRLEAFDBT(dsize) \ + LALIGN(sizeof(size_t) + sizeof(u_char) + (dsize)) + +/* Copy a RLEAF entry to the page. */ +#define WR_RLEAF(p, data, flags) { \ + *(size_t *)p = data->size; \ + p += sizeof(size_t); \ + *(u_char *)p = flags; \ + p += sizeof(u_char); \ + memmove(p, data->data, data->size); \ +} + +/* + * A record in the tree is either a pointer to a page and an index in the page + * or a page number and an index. These structures are used as a cursor, stack + * entry and search returns as well as to pass records to other routines. + * + * One comment about searches. Internal page searches must find the largest + * record less than key in the tree so that descents work. Leaf page searches + * must find the smallest record greater than key so that the returned index + * is the record's correct position for insertion. + * + * One comment about cursors. The cursor key is never removed from the tree, + * even if deleted. This is because it is quite difficult to decide where the + * cursor should be when other keys have been inserted/deleted in the tree; + * duplicate keys make it impossible. This scheme does require extra work + * though, to make sure that we don't perform an operation on a deleted key. + */ +typedef struct _epgno { + pgno_t pgno; /* the page number */ + indx_t index; /* the index on the page */ +} EPGNO; + +typedef struct _epg { + PAGE *page; /* the (pinned) page */ + indx_t index; /* the index on the page */ +} EPG; + +/* + * The metadata of the tree. The m_nrecs field is used only by the RECNO code. + * This is because the btree doesn't really need it and it requires that every + * put or delete call modify the metadata. + */ +typedef struct _btmeta { + u_int32_t m_magic; /* magic number */ + u_int32_t m_version; /* version */ + u_int32_t m_psize; /* page size */ + u_int32_t m_free; /* page number of first free page */ + u_int32_t m_nrecs; /* R: number of records */ +#define SAVEMETA (B_NODUPS | R_RECNO) + u_int32_t m_flags; /* bt_flags & SAVEMETA */ + u_int32_t m_unused; /* unused */ +} BTMETA; + +/* The in-memory btree/recno data structure. */ +typedef struct _btree { + MPOOL *bt_mp; /* memory pool cookie */ + + DB *bt_dbp; /* pointer to enclosing DB */ + + EPG bt_cur; /* current (pinned) page */ + PAGE *bt_pinned; /* page pinned across calls */ + + EPGNO bt_bcursor; /* B: btree cursor */ + recno_t bt_rcursor; /* R: recno cursor (1-based) */ + +#define BT_POP(t) (t->bt_sp ? t->bt_stack + --t->bt_sp : NULL) +#define BT_CLR(t) (t->bt_sp = 0) + EPGNO *bt_stack; /* stack of parent pages */ + u_int bt_sp; /* current stack pointer */ + u_int bt_maxstack; /* largest stack */ + + char *bt_kbuf; /* key buffer */ + size_t bt_kbufsz; /* key buffer size */ + char *bt_dbuf; /* data buffer */ + size_t bt_dbufsz; /* data buffer size */ + + int bt_fd; /* tree file descriptor */ + + pgno_t bt_free; /* next free page */ + u_int32_t bt_psize; /* page size */ + indx_t bt_ovflsize; /* cut-off for key/data overflow */ + int bt_lorder; /* byte order */ + /* sorted order */ + enum { NOT, BACK, FORWARD } bt_order; + EPGNO bt_last; /* last insert */ + + /* B: key comparison function */ + int (*bt_cmp) __P((const DBT *, const DBT *)); + /* B: prefix comparison function */ + size_t (*bt_pfx) __P((const DBT *, const DBT *)); + /* R: recno input function */ + int (*bt_irec) __P((struct _btree *, recno_t)); + + FILE *bt_rfp; /* R: record FILE pointer */ + int bt_rfd; /* R: record file descriptor */ + + caddr_t bt_cmap; /* R: current point in mapped space */ + caddr_t bt_smap; /* R: start of mapped space */ + caddr_t bt_emap; /* R: end of mapped space */ + size_t bt_msize; /* R: size of mapped region. */ + + recno_t bt_nrecs; /* R: number of records */ + size_t bt_reclen; /* R: fixed record length */ + u_char bt_bval; /* R: delimiting byte/pad character */ + +/* + * NB: + * B_NODUPS and R_RECNO are stored on disk, and may not be changed. + */ +#define B_DELCRSR 0x00001 /* cursor has been deleted */ +#define B_INMEM 0x00002 /* in-memory tree */ +#define B_METADIRTY 0x00004 /* need to write metadata */ +#define B_MODIFIED 0x00008 /* tree modified */ +#define B_NEEDSWAP 0x00010 /* if byte order requires swapping */ +#define B_NODUPS 0x00020 /* no duplicate keys permitted */ +#define B_RDONLY 0x00040 /* read-only tree */ +#define R_RECNO 0x00080 /* record oriented tree */ +#define B_SEQINIT 0x00100 /* sequential scan initialized */ + +#define R_CLOSEFP 0x00200 /* opened a file pointer */ +#define R_EOF 0x00400 /* end of input file reached. */ +#define R_FIXLEN 0x00800 /* fixed length records */ +#define R_MEMMAPPED 0x01000 /* memory mapped file. */ +#define R_INMEM 0x02000 /* in-memory file */ +#define R_MODIFIED 0x04000 /* modified file */ +#define R_RDONLY 0x08000 /* read-only file */ + +#define B_DB_LOCK 0x10000 /* DB_LOCK specified. */ +#define B_DB_SHMEM 0x20000 /* DB_SHMEM specified. */ +#define B_DB_TXN 0x40000 /* DB_TXN specified. */ + + u_int32_t bt_flags; /* btree state */ +} BTREE; + +#define SET(t, f) ((t)->bt_flags |= (f)) +#define CLR(t, f) ((t)->bt_flags &= ~(f)) +#define ISSET(t, f) ((t)->bt_flags & (f)) + +#include "extern.h" diff --git a/lib/libc/db/btree/extern.h b/lib/libc/db/btree/extern.h new file mode 100644 index 0000000..4007bc7 --- /dev/null +++ b/lib/libc/db/btree/extern.h @@ -0,0 +1,70 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.3 (Berkeley) 2/21/94 + */ + +int __bt_close __P((DB *)); +int __bt_cmp __P((BTREE *, const DBT *, EPG *)); +int __bt_crsrdel __P((BTREE *, EPGNO *)); +int __bt_defcmp __P((const DBT *, const DBT *)); +size_t __bt_defpfx __P((const DBT *, const DBT *)); +int __bt_delete __P((const DB *, const DBT *, u_int)); +int __bt_dleaf __P((BTREE *, PAGE *, int)); +int __bt_fd __P((const DB *)); +EPG *__bt_first __P((BTREE *, const DBT *, int *)); +int __bt_free __P((BTREE *, PAGE *)); +int __bt_get __P((const DB *, const DBT *, DBT *, u_int)); +PAGE *__bt_new __P((BTREE *, pgno_t *)); +void __bt_pgin __P((void *, pgno_t, void *)); +void __bt_pgout __P((void *, pgno_t, void *)); +int __bt_push __P((BTREE *, pgno_t, int)); +int __bt_put __P((const DB *dbp, DBT *, const DBT *, u_int)); +int __bt_ret __P((BTREE *, EPG *, DBT *, DBT *)); +EPG *__bt_search __P((BTREE *, const DBT *, int *)); +int __bt_seq __P((const DB *, DBT *, DBT *, u_int)); +int __bt_split __P((BTREE *, PAGE *, + const DBT *, const DBT *, int, size_t, indx_t)); +int __bt_sync __P((const DB *, u_int)); + +int __ovfl_delete __P((BTREE *, void *)); +int __ovfl_get __P((BTREE *, void *, size_t *, char **, size_t *)); +int __ovfl_put __P((BTREE *, const DBT *, pgno_t *)); + +#ifdef DEBUG +void __bt_dnpage __P((DB *, pgno_t)); +void __bt_dpage __P((PAGE *)); +void __bt_dump __P((DB *)); +#endif +#ifdef STATISTICS +void __bt_stat __P((DB *)); +#endif diff --git a/lib/libc/db/db/Makefile.inc b/lib/libc/db/db/Makefile.inc new file mode 100644 index 0000000..59478ba1 --- /dev/null +++ b/lib/libc/db/db/Makefile.inc @@ -0,0 +1,5 @@ +# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 + +.PATH: ${.CURDIR}/db/db + +SRCS+= db.c diff --git a/lib/libc/db/db/db.c b/lib/libc/db/db/db.c new file mode 100644 index 0000000..a18f056 --- /dev/null +++ b/lib/libc/db/db/db.c @@ -0,0 +1,99 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)db.c 8.4 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdio.h> + +#include <db.h> + +DB * +dbopen(fname, flags, mode, type, openinfo) + const char *fname; + int flags, mode; + DBTYPE type; + const void *openinfo; +{ + +#define DB_FLAGS (DB_LOCK | DB_SHMEM | DB_TXN) +#define USE_OPEN_FLAGS \ + (O_CREAT | O_EXCL | O_EXLOCK | O_NONBLOCK | O_RDONLY | \ + O_RDWR | O_SHLOCK | O_TRUNC) + + if ((flags & ~(USE_OPEN_FLAGS | DB_FLAGS)) == 0) + switch (type) { + case DB_BTREE: + return (__bt_open(fname, flags & USE_OPEN_FLAGS, + mode, openinfo, flags & DB_FLAGS)); + case DB_HASH: + return (__hash_open(fname, flags & USE_OPEN_FLAGS, + mode, openinfo, flags & DB_FLAGS)); + case DB_RECNO: + return (__rec_open(fname, flags & USE_OPEN_FLAGS, + mode, openinfo, flags & DB_FLAGS)); + } + errno = EINVAL; + return (NULL); +} + +static int +__dberr() +{ + return (RET_ERROR); +} + +/* + * __DBPANIC -- Stop. + * + * Parameters: + * dbp: pointer to the DB structure. + */ +void +__dbpanic(dbp) + DB *dbp; +{ + /* The only thing that can succeed is a close. */ + dbp->del = (int (*)())__dberr; + dbp->fd = (int (*)())__dberr; + dbp->get = (int (*)())__dberr; + dbp->put = (int (*)())__dberr; + dbp->seq = (int (*)())__dberr; + dbp->sync = (int (*)())__dberr; +} diff --git a/lib/libc/db/doc/hash.usenix.ps b/lib/libc/db/doc/hash.usenix.ps new file mode 100644 index 0000000..c884778 --- /dev/null +++ b/lib/libc/db/doc/hash.usenix.ps @@ -0,0 +1,12209 @@ +%!PS-Adobe-1.0 +%%Creator: utopia:margo (& Seltzer,608-13E,8072,) +%%Title: stdin (ditroff) +%%CreationDate: Tue Dec 11 15:06:45 1990 +%%EndComments +% @(#)psdit.pro 1.3 4/15/88 +% lib/psdit.pro -- prolog for psdit (ditroff) files +% Copyright (c) 1984, 1985 Adobe Systems Incorporated. All Rights Reserved. +% last edit: shore Sat Nov 23 20:28:03 1985 +% RCSID: $Header: psdit.pro,v 2.1 85/11/24 12:19:43 shore Rel $ + +% Changed by Edward Wang (edward@ucbarpa.berkeley.edu) to handle graphics, +% 17 Feb, 87. + +/$DITroff 140 dict def $DITroff begin +/fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def +/xi{0 72 11 mul translate 72 resolution div dup neg scale 0 0 moveto + /fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def F + /pagesave save def}def +/PB{save /psv exch def currentpoint translate + resolution 72 div dup neg scale 0 0 moveto}def +/PE{psv restore}def +/arctoobig 90 def /arctoosmall .05 def +/m1 matrix def /m2 matrix def /m3 matrix def /oldmat matrix def +/tan{dup sin exch cos div}def +/point{resolution 72 div mul}def +/dround {transform round exch round exch itransform}def +/xT{/devname exch def}def +/xr{/mh exch def /my exch def /resolution exch def}def +/xp{}def +/xs{docsave restore end}def +/xt{}def +/xf{/fontname exch def /slotno exch def fontnames slotno get fontname eq not + {fonts slotno fontname findfont put fontnames slotno fontname put}if}def +/xH{/fontheight exch def F}def +/xS{/fontslant exch def F}def +/s{/fontsize exch def /fontheight fontsize def F}def +/f{/fontnum exch def F}def +/F{fontheight 0 le{/fontheight fontsize def}if + fonts fontnum get fontsize point 0 0 fontheight point neg 0 0 m1 astore + fontslant 0 ne{1 0 fontslant tan 1 0 0 m2 astore m3 concatmatrix}if + makefont setfont .04 fontsize point mul 0 dround pop setlinewidth}def +/X{exch currentpoint exch pop moveto show}def +/N{3 1 roll moveto show}def +/Y{exch currentpoint pop exch moveto show}def +/S{show}def +/ditpush{}def/ditpop{}def +/AX{3 -1 roll currentpoint exch pop moveto 0 exch ashow}def +/AN{4 2 roll moveto 0 exch ashow}def +/AY{3 -1 roll currentpoint pop exch moveto 0 exch ashow}def +/AS{0 exch ashow}def +/MX{currentpoint exch pop moveto}def +/MY{currentpoint pop exch moveto}def +/MXY{moveto}def +/cb{pop}def % action on unknown char -- nothing for now +/n{}def/w{}def +/p{pop showpage pagesave restore /pagesave save def}def +/Dt{/Dlinewidth exch def}def 1 Dt +/Ds{/Ddash exch def}def -1 Ds +/Di{/Dstipple exch def}def 1 Di +/Dsetlinewidth{2 Dlinewidth mul setlinewidth}def +/Dsetdash{Ddash 4 eq{[8 12]}{Ddash 16 eq{[32 36]} + {Ddash 20 eq{[32 12 8 12]}{[]}ifelse}ifelse}ifelse 0 setdash}def +/Dstroke{gsave Dsetlinewidth Dsetdash 1 setlinecap stroke grestore + currentpoint newpath moveto}def +/Dl{rlineto Dstroke}def +/arcellipse{/diamv exch def /diamh exch def oldmat currentmatrix pop + currentpoint translate 1 diamv diamh div scale /rad diamh 2 div def + currentpoint exch rad add exch rad -180 180 arc oldmat setmatrix}def +/Dc{dup arcellipse Dstroke}def +/De{arcellipse Dstroke}def +/Da{/endv exch def /endh exch def /centerv exch def /centerh exch def + /cradius centerv centerv mul centerh centerh mul add sqrt def + /eradius endv endv mul endh endh mul add sqrt def + /endang endv endh atan def + /startang centerv neg centerh neg atan def + /sweep startang endang sub dup 0 lt{360 add}if def + sweep arctoobig gt + {/midang startang sweep 2 div sub def /midrad cradius eradius add 2 div def + /midh midang cos midrad mul def /midv midang sin midrad mul def + midh neg midv neg endh endv centerh centerv midh midv Da + Da} + {sweep arctoosmall ge + {/controldelt 1 sweep 2 div cos sub 3 sweep 2 div sin mul div 4 mul def + centerv neg controldelt mul centerh controldelt mul + endv neg controldelt mul centerh add endh add + endh controldelt mul centerv add endv add + centerh endh add centerv endv add rcurveto Dstroke} + {centerh endh add centerv endv add rlineto Dstroke} + ifelse} + ifelse}def +/Dpatterns[ +[%cf[widthbits] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000103810000000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000001038100000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0042660000246600>] +[8<0000990000990000>] +[8<0804020180402010>] +[8<2418814242811824>] +[8<6699996666999966>] +[8<8000000008000000>] +[8<00001c3e363e1c00>] +[8<0000000000000000>] +[32<00000040000000c00000004000000040000000e0000000000000000000000000>] +[32<00000000000060000000900000002000000040000000f0000000000000000000>] +[32<000000000000000000e0000000100000006000000010000000e0000000000000>] +[32<00000000000000002000000060000000a0000000f00000002000000000000000>] +[32<0000000e0000000000000000000000000000000f000000080000000e00000001>] +[32<0000090000000600000000000000000000000000000007000000080000000e00>] +[32<00010000000200000004000000040000000000000000000000000000000f0000>] +[32<0900000006000000090000000600000000000000000000000000000006000000>]] +[%ug +[8<0000020000000000>] +[8<0000020000002000>] +[8<0004020000002000>] +[8<0004020000402000>] +[8<0004060000402000>] +[8<0004060000406000>] +[8<0006060000406000>] +[8<0006060000606000>] +[8<00060e0000606000>] +[8<00060e000060e000>] +[8<00070e000060e000>] +[8<00070e000070e000>] +[8<00070e020070e000>] +[8<00070e020070e020>] +[8<04070e020070e020>] +[8<04070e024070e020>] +[8<04070e064070e020>] +[8<04070e064070e060>] +[8<06070e064070e060>] +[8<06070e066070e060>] +[8<06070f066070e060>] +[8<06070f066070f060>] +[8<060f0f066070f060>] +[8<060f0f0660f0f060>] +[8<060f0f0760f0f060>] +[8<060f0f0760f0f070>] +[8<0e0f0f0760f0f070>] +[8<0e0f0f07e0f0f070>] +[8<0e0f0f0fe0f0f070>] +[8<0e0f0f0fe0f0f0f0>] +[8<0f0f0f0fe0f0f0f0>] +[8<0f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f1f9>] +[8<bf8f1f9ff9f8f1f9>] +[8<bf8f1f9ffbf8f1f9>] +[8<bf8f1fdffbf8f1f9>] +[8<bf8f1fdffbf8f1fd>] +[8<ff8f1fdffbf8f1fd>] +[8<ff8f1fdffff8f1fd>] +[8<ff8f1ffffff8f1fd>] +[8<ff8f1ffffff8f1ff>] +[8<ff9f1ffffff8f1ff>] +[8<ff9f1ffffff9f1ff>] +[8<ff9f9ffffff9f1ff>] +[8<ff9f9ffffff9f9ff>] +[8<ffbf9ffffff9f9ff>] +[8<ffbf9ffffffbf9ff>] +[8<ffbfdffffffbf9ff>] +[8<ffbfdffffffbfdff>] +[8<ffffdffffffbfdff>] +[8<ffffdffffffffdff>] +[8<fffffffffffffdff>] +[8<ffffffffffffffff>]] +[%mg +[8<8000000000000000>] +[8<0822080080228000>] +[8<0204081020408001>] +[8<40e0400000000000>] +[8<66999966>] +[8<8001000010080000>] +[8<81c36666c3810000>] +[8<f0e0c08000000000>] +[16<07c00f801f003e007c00f800f001e003c007800f001f003e007c00f801f003e0>] +[16<1f000f8007c003e001f000f8007c003e001f800fc007e003f001f8007c003e00>] +[8<c3c300000000c3c3>] +[16<0040008001000200040008001000200040008000000100020004000800100020>] +[16<0040002000100008000400020001800040002000100008000400020001000080>] +[16<1fc03fe07df0f8f8f07de03fc01f800fc01fe03ff07df8f87df03fe01fc00f80>] +[8<80>] +[8<8040201000000000>] +[8<84cc000048cc0000>] +[8<9900009900000000>] +[8<08040201804020100800020180002010>] +[8<2418814242811824>] +[8<66999966>] +[8<8000000008000000>] +[8<70f8d8f870000000>] +[8<0814224180402010>] +[8<aa00440a11a04400>] +[8<018245aa45820100>] +[8<221c224180808041>] +[8<88000000>] +[8<0855800080550800>] +[8<2844004482440044>] +[8<0810204080412214>] +[8<00>]]]def +/Dfill{ + transform /maxy exch def /maxx exch def + transform /miny exch def /minx exch def + minx maxx gt{/minx maxx /maxx minx def def}if + miny maxy gt{/miny maxy /maxy miny def def}if + Dpatterns Dstipple 1 sub get exch 1 sub get + aload pop /stip exch def /stipw exch def /stiph 128 def + /imatrix[stipw 0 0 stiph 0 0]def + /tmatrix[stipw 0 0 stiph 0 0]def + /minx minx cvi stiph idiv stiph mul def + /miny miny cvi stipw idiv stipw mul def + gsave eoclip 0 setgray + miny stiph maxy{ + tmatrix exch 5 exch put + minx stipw maxx{ + tmatrix exch 4 exch put tmatrix setmatrix + stipw stiph true imatrix {stip} imagemask + }for + }for + grestore +}def +/Dp{Dfill Dstroke}def +/DP{Dfill currentpoint newpath moveto}def +end + +/ditstart{$DITroff begin + /nfonts 60 def % NFONTS makedev/ditroff dependent! + /fonts[nfonts{0}repeat]def + /fontnames[nfonts{()}repeat]def +/docsave save def +}def + +% character outcalls +/oc{ + /pswid exch def /cc exch def /name exch def + /ditwid pswid fontsize mul resolution mul 72000 div def + /ditsiz fontsize resolution mul 72 div def + ocprocs name known{ocprocs name get exec}{name cb}ifelse +}def +/fractm [.65 0 0 .6 0 0] def +/fraction{ + /fden exch def /fnum exch def gsave /cf currentfont def + cf fractm makefont setfont 0 .3 dm 2 copy neg rmoveto + fnum show rmoveto currentfont cf setfont(\244)show setfont fden show + grestore ditwid 0 rmoveto +}def +/oce{grestore ditwid 0 rmoveto}def +/dm{ditsiz mul}def +/ocprocs 50 dict def ocprocs begin +(14){(1)(4)fraction}def +(12){(1)(2)fraction}def +(34){(3)(4)fraction}def +(13){(1)(3)fraction}def +(23){(2)(3)fraction}def +(18){(1)(8)fraction}def +(38){(3)(8)fraction}def +(58){(5)(8)fraction}def +(78){(7)(8)fraction}def +(sr){gsave 0 .06 dm rmoveto(\326)show oce}def +(is){gsave 0 .15 dm rmoveto(\362)show oce}def +(->){gsave 0 .02 dm rmoveto(\256)show oce}def +(<-){gsave 0 .02 dm rmoveto(\254)show oce}def +(==){gsave 0 .05 dm rmoveto(\272)show oce}def +(uc){gsave currentpoint 400 .009 dm mul add translate + 8 -8 scale ucseal oce}def +end + +% an attempt at a PostScript FONT to implement ditroff special chars +% this will enable us to +% cache the little buggers +% generate faster, more compact PS out of psdit +% confuse everyone (including myself)! +50 dict dup begin +/FontType 3 def +/FontName /DIThacks def +/FontMatrix [.001 0 0 .001 0 0] def +/FontBBox [-260 -260 900 900] def% a lie but ... +/Encoding 256 array def +0 1 255{Encoding exch /.notdef put}for +Encoding + dup 8#040/space put %space + dup 8#110/rc put %right ceil + dup 8#111/lt put %left top curl + dup 8#112/bv put %bold vert + dup 8#113/lk put %left mid curl + dup 8#114/lb put %left bot curl + dup 8#115/rt put %right top curl + dup 8#116/rk put %right mid curl + dup 8#117/rb put %right bot curl + dup 8#120/rf put %right floor + dup 8#121/lf put %left floor + dup 8#122/lc put %left ceil + dup 8#140/sq put %square + dup 8#141/bx put %box + dup 8#142/ci put %circle + dup 8#143/br put %box rule + dup 8#144/rn put %root extender + dup 8#145/vr put %vertical rule + dup 8#146/ob put %outline bullet + dup 8#147/bu put %bullet + dup 8#150/ru put %rule + dup 8#151/ul put %underline + pop +/DITfd 100 dict def +/BuildChar{0 begin + /cc exch def /fd exch def + /charname fd /Encoding get cc get def + /charwid fd /Metrics get charname get def + /charproc fd /CharProcs get charname get def + charwid 0 fd /FontBBox get aload pop setcachedevice + 2 setlinejoin 40 setlinewidth + newpath 0 0 moveto gsave charproc grestore + end}def +/BuildChar load 0 DITfd put +/CharProcs 50 dict def +CharProcs begin +/space{}def +/.notdef{}def +/ru{500 0 rls}def +/rn{0 840 moveto 500 0 rls}def +/vr{0 800 moveto 0 -770 rls}def +/bv{0 800 moveto 0 -1000 rls}def +/br{0 840 moveto 0 -1000 rls}def +/ul{0 -140 moveto 500 0 rls}def +/ob{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath stroke}def +/bu{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath fill}def +/sq{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath stroke}def +/bx{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath fill}def +/ci{500 360 rmoveto currentpoint newpath 333 0 360 arc + 50 setlinewidth stroke}def + +/lt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 add exch s4 a4p stroke}def +/lb{0 800 moveto 0 -550 rlineto currx -200 2cx s4 add exch s4 a4p stroke}def +/rt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 sub exch s4 a4p stroke}def +/rb{0 800 moveto 0 -500 rlineto currx -200 2cx s4 sub exch s4 a4p stroke}def +/lk{0 800 moveto 0 300 -300 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/rk{0 800 moveto 0 300 s2 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/lf{0 800 moveto 0 -1000 rlineto s4 0 rls}def +/rf{0 800 moveto 0 -1000 rlineto s4 neg 0 rls}def +/lc{0 -200 moveto 0 1000 rlineto s4 0 rls}def +/rc{0 -200 moveto 0 1000 rlineto s4 neg 0 rls}def +end + +/Metrics 50 dict def Metrics begin +/.notdef 0 def +/space 500 def +/ru 500 def +/br 0 def +/lt 416 def +/lb 416 def +/rt 416 def +/rb 416 def +/lk 416 def +/rk 416 def +/rc 416 def +/lc 416 def +/rf 416 def +/lf 416 def +/bv 416 def +/ob 350 def +/bu 350 def +/ci 750 def +/bx 750 def +/sq 750 def +/rn 500 def +/ul 500 def +/vr 0 def +end + +DITfd begin +/s2 500 def /s4 250 def /s3 333 def +/a4p{arcto pop pop pop pop}def +/2cx{2 copy exch}def +/rls{rlineto stroke}def +/currx{currentpoint pop}def +/dround{transform round exch round exch itransform} def +end +end +/DIThacks exch definefont pop +ditstart +(psc)xT +576 1 1 xr +1(Times-Roman)xf 1 f +2(Times-Italic)xf 2 f +3(Times-Bold)xf 3 f +4(Times-BoldItalic)xf 4 f +5(Helvetica)xf 5 f +6(Helvetica-Bold)xf 6 f +7(Courier)xf 7 f +8(Courier-Bold)xf 8 f +9(Symbol)xf 9 f +10(DIThacks)xf 10 f +10 s +1 f +xi +%%EndProlog + +%%Page: 1 1 +10 s 10 xH 0 xS 1 f +3 f +22 s +1249 626(A)N +1420(N)X +1547(ew)X +1796(H)X +1933(ashing)X +2467(P)X +2574(ackage)X +3136(for)X +3405(U)X +3532(N)X +3659(IX)X +2 f +20 s +3855 562(1)N +1 f +12 s +1607 779(Margo)N +1887(Seltzer)X +9 f +2179(-)X +1 f +2256(University)X +2686(of)X +2790(California,)X +3229(Berkeley)X +2015 875(Ozan)N +2242(Yigit)X +9 f +2464(-)X +1 f +2541(York)X +2762(University)X +3 f +2331 1086(ABSTRACT)N +1 f +10 s +1152 1222(UNIX)N +1385(support)X +1657(of)X +1756(disk)X +1921(oriented)X +2216(hashing)X +2497(was)X +2654(originally)X +2997(provided)X +3314(by)X +2 f +3426(dbm)X +1 f +3595([ATT79])X +3916(and)X +1152 1310(subsequently)N +1595(improved)X +1927(upon)X +2112(in)X +2 f +2199(ndbm)X +1 f +2402([BSD86].)X +2735(In)X +2826(AT&T)X +3068(System)X +3327(V,)X +3429(in-memory)X +3809(hashed)X +1152 1398(storage)N +1420(and)X +1572(access)X +1814(support)X +2090(was)X +2251(added)X +2479(in)X +2577(the)X +2 f +2711(hsearch)X +1 f +3000(library)X +3249(routines)X +3542([ATT85].)X +3907(The)X +1152 1486(result)N +1367(is)X +1457(a)X +1530(system)X +1789(with)X +1968(two)X +2125(incompatible)X +2580(hashing)X +2865(schemes,)X +3193(each)X +3377(with)X +3555(its)X +3666(own)X +3840(set)X +3965(of)X +1152 1574(shortcomings.)N +1152 1688(This)N +1316(paper)X +1517(presents)X +1802(the)X +1922(design)X +2152(and)X +2289(performance)X +2717(characteristics)X +3198(of)X +3286(a)X +3343(new)X +3498(hashing)X +3768(package)X +1152 1776(providing)N +1483(a)X +1539(superset)X +1822(of)X +1909(the)X +2027(functionality)X +2456(provided)X +2761(by)X +2 f +2861(dbm)X +1 f +3019(and)X +2 f +3155(hsearch)X +1 f +3409(.)X +3469(The)X +3614(new)X +3768(package)X +1152 1864(uses)N +1322(linear)X +1537(hashing)X +1818(to)X +1912(provide)X +2189(ef\256cient)X +2484(support)X +2755(of)X +2853(both)X +3026(memory)X +3324(based)X +3538(and)X +3685(disk)X +3849(based)X +1152 1952(hash)N +1319(tables)X +1526(with)X +1688(performance)X +2115(superior)X +2398(to)X +2480(both)X +2 f +2642(dbm)X +1 f +2800(and)X +2 f +2936(hsearch)X +1 f +3210(under)X +3413(most)X +3588(conditions.)X +3 f +1380 2128(Introduction)N +1 f +892 2260(Current)N +1196(UNIX)X +1456(systems)X +1768(offer)X +1984(two)X +2163(forms)X +2409(of)X +720 2348(hashed)N +973(data)X +1137(access.)X +2 f +1413(Dbm)X +1 f +1599(and)X +1745(its)X +1850(derivatives)X +2231(provide)X +720 2436(keyed)N +939(access)X +1171(to)X +1259(disk)X +1418(resident)X +1698(data)X +1858(while)X +2 f +2062(hsearch)X +1 f +2342(pro-)X +720 2524(vides)N +929(access)X +1175(for)X +1309(memory)X +1616(resident)X +1910(data.)X +2124(These)X +2356(two)X +720 2612(access)N +979(methods)X +1302(are)X +1453(incompatible)X +1923(in)X +2037(that)X +2209(memory)X +720 2700(resident)N +1011(hash)X +1195(tables)X +1419(may)X +1593(not)X +1731(be)X +1843(stored)X +2075(on)X +2191(disk)X +2360(and)X +720 2788(disk)N +884(resident)X +1169(tables)X +1387(cannot)X +1632(be)X +1739(read)X +1909(into)X +2063(memory)X +2360(and)X +720 2876(accessed)N +1022(using)X +1215(the)X +1333(in-memory)X +1709(routines.)X +2 f +892 2990(Dbm)N +1 f +1091(has)X +1241(several)X +1512(shortcomings.)X +2026(Since)X +2247(data)X +2423(is)X +720 3078(assumed)N +1032(to)X +1130(be)X +1242(disk)X +1411(resident,)X +1721(each)X +1905(access)X +2146(requires)X +2440(a)X +720 3166(system)N +963(call,)X +1120(and)X +1257(almost)X +1491(certainly,)X +1813(a)X +1869(disk)X +2022(operation.)X +2365(For)X +720 3254(extremely)N +1072(large)X +1264(databases,)X +1623(where)X +1851(caching)X +2131(is)X +2214(unlikely)X +720 3342(to)N +810(be)X +914(effective,)X +1244(this)X +1386(is)X +1466(acceptable,)X +1853(however,)X +2177(when)X +2378(the)X +720 3430(database)N +1022(is)X +1100(small)X +1298(\(i.e.)X +1447(the)X +1569(password)X +1896(\256le\),)X +2069(performance)X +720 3518(improvements)N +1204(can)X +1342(be)X +1443(obtained)X +1744(through)X +2018(caching)X +2293(pages)X +720 3606(of)N +818(the)X +947(database)X +1255(in)X +1348(memory.)X +1685(In)X +1782(addition,)X +2 f +2094(dbm)X +1 f +2262(cannot)X +720 3694(store)N +902(data)X +1062(items)X +1261(whose)X +1492(total)X +1660(key)X +1802(and)X +1943(data)X +2102(size)X +2252(exceed)X +720 3782(the)N +850(page)X +1034(size)X +1191(of)X +1290(the)X +1420(hash)X +1599(table.)X +1827(Similarly,)X +2176(if)X +2257(two)X +2409(or)X +720 3870(more)N +907(keys)X +1076(produce)X +1357(the)X +1477(same)X +1664(hash)X +1833(value)X +2029(and)X +2166(their)X +2334(total)X +720 3958(size)N +876(exceeds)X +1162(the)X +1291(page)X +1474(size,)X +1650(the)X +1779(table)X +1966(cannot)X +2210(store)X +2396(all)X +720 4046(the)N +838(colliding)X +1142(keys.)X +892 4160(The)N +1050(in-memory)X +2 f +1439(hsearch)X +1 f +1725(routines)X +2015(have)X +2199(different)X +720 4248(shortcomings.)N +1219(First,)X +1413(the)X +1539(notion)X +1771(of)X +1865(a)X +1928(single)X +2146(hash)X +2320(table)X +720 4336(is)N +807(embedded)X +1171(in)X +1266(the)X +1397(interface,)X +1732(preventing)X +2108(an)X +2217(applica-)X +720 4424(tion)N +902(from)X +1116(accessing)X +1482(multiple)X +1806(tables)X +2050(concurrently.)X +720 4512(Secondly,)N +1063(the)X +1186(routine)X +1438(to)X +1525(create)X +1743(a)X +1804(hash)X +1976(table)X +2157(requires)X +2440(a)X +720 4600(parameter)N +1066(which)X +1286(declares)X +1573(the)X +1694(size)X +1842(of)X +1932(the)X +2053(hash)X +2223(table.)X +2422(If)X +720 4688(this)N +856(size)X +1001(is)X +1074(set)X +1183(too)X +1305(low,)X +1465(performance)X +1892(degradation)X +2291(or)X +2378(the)X +720 4776(inability)N +1008(to)X +1092(add)X +1230(items)X +1425(to)X +1509(the)X +1628(table)X +1805(may)X +1964(result.)X +2223(In)X +2311(addi-)X +720 4864(tion,)N +2 f +910(hsearch)X +1 f +1210(requires)X +1515(that)X +1681(the)X +1825(application)X +2226(allocate)X +720 4952(memory)N +1037(for)X +1181(the)X +1329(key)X +1495(and)X +1661(data)X +1845(items.)X +2108(Lastly,)X +2378(the)X +2 f +720 5040(hsearch)N +1 f +1013(routines)X +1310(provide)X +1594(no)X +1713(interface)X +2034(to)X +2135(store)X +2329(hash)X +720 5128(tables)N +927(on)X +1027(disk.)X +16 s +720 5593 MXY +864 0 Dl +2 f +8 s +760 5648(1)N +1 f +9 s +5673(UNIX)Y +990(is)X +1056(a)X +1106(registered)X +1408(trademark)X +1718(of)X +1796(AT&T.)X +10 s +2878 2128(The)N +3032(goal)X +3199(of)X +3295(our)X +3431(work)X +3625(was)X +3779(to)X +3870(design)X +4108(and)X +4253(imple-)X +2706 2216(ment)N +2900(a)X +2970(new)X +3138(package)X +3436(that)X +3590(provides)X +3899(a)X +3968(superset)X +4264(of)X +4364(the)X +2706 2304(functionality)N +3144(of)X +3240(both)X +2 f +3411(dbm)X +1 f +3578(and)X +2 f +3723(hsearch)X +1 f +3977(.)X +4045(The)X +4198(package)X +2706 2392(had)N +2871(to)X +2982(overcome)X +3348(the)X +3495(interface)X +3826(shortcomings)X +4306(cited)X +2706 2480(above)N +2930(and)X +3078(its)X +3185(implementation)X +3719(had)X +3867(to)X +3961(provide)X +4238(perfor-)X +2706 2568(mance)N +2942(equal)X +3142(or)X +3235(superior)X +3524(to)X +3612(that)X +3758(of)X +3851(the)X +3975(existing)X +4253(imple-)X +2706 2656(mentations.)N +3152(In)X +3274(order)X +3498(to)X +3614(provide)X +3913(a)X +4003(compact)X +4329(disk)X +2706 2744(representation,)N +3224(graceful)X +3531(table)X +3729(growth,)X +4018(and)X +4176(expected)X +2706 2832(constant)N +3033(time)X +3234(performance,)X +3720(we)X +3873(selected)X +4191(Litwin's)X +2706 2920(linear)N +2923(hashing)X +3206(algorithm)X +3551([LAR88,)X +3872(LIT80].)X +4178(We)X +4324(then)X +2706 3008(enhanced)N +3037(the)X +3161(algorithm)X +3498(to)X +3586(handle)X +3826(page)X +4004(over\257ows)X +4346(and)X +2706 3096(large)N +2900(key)X +3049(handling)X +3362(with)X +3537(a)X +3606(single)X +3830(mechanism,)X +4248(named)X +2706 3184(buddy-in-waiting.)N +3 f +2975 3338(Existing)N +3274(UNIX)X +3499(Hashing)X +3802(Techniques)X +1 f +2878 3470(Over)N +3076(the)X +3210(last)X +3357(decade,)X +3637(several)X +3901(dynamic)X +4213(hashing)X +2706 3558(schemes)N +3000(have)X +3174(been)X +3348(developed)X +3700(for)X +3816(the)X +3936(UNIX)X +4159(timeshar-)X +2706 3646(ing)N +2856(system,)X +3146(starting)X +3433(with)X +3622(the)X +3767(inclusion)X +4107(of)X +2 f +4221(dbm)X +1 f +4359(,)X +4426(a)X +2706 3734(minimal)N +3008(database)X +3321(library)X +3571(written)X +3834(by)X +3950(Ken)X +4120(Thompson)X +2706 3822([THOM90],)N +3141(in)X +3248(the)X +3391(Seventh)X +3694(Edition)X +3974(UNIX)X +4220(system.)X +2706 3910(Since)N +2916(then,)X +3106(an)X +3214(extended)X +3536(version)X +3804(of)X +3903(the)X +4032(same)X +4228(library,)X +2 f +2706 3998(ndbm)N +1 f +2884(,)X +2933(and)X +3078(a)X +3142(public-domain)X +3637(clone)X +3839(of)X +3934(the)X +4060(latter,)X +2 f +4273(sdbm)X +1 f +4442(,)X +2706 4086(have)N +2902(been)X +3098(developed.)X +3491(Another)X +3797 0.1645(interface-compatible)AX +2706 4174(library)N +2 f +2950(gdbm)X +1 f +3128(,)X +3178(was)X +3333(recently)X +3622(made)X +3826(available)X +4145(as)X +4241(part)X +4395(of)X +2706 4262(the)N +2829(Free)X +2997(Software)X +3312(Foundation's)X +3759(\(FSF\))X +3970(software)X +4271(distri-)X +2706 4350(bution.)N +2878 4464(All)N +3017(of)X +3121(these)X +3323(implementations)X +3893(are)X +4029(based)X +4248(on)X +4364(the)X +2706 4552(idea)N +2871(of)X +2969(revealing)X +3299(just)X +3445(enough)X +3711(bits)X +3856(of)X +3953(a)X +4019(hash)X +4196(value)X +4400(to)X +2706 4640(locate)N +2920(a)X +2978(page)X +3151(in)X +3234(a)X +3291(single)X +3503(access.)X +3770(While)X +2 f +3987(dbm/ndbm)X +1 f +4346(and)X +2 f +2706 4728(sdbm)N +1 f +2908(map)X +3079(the)X +3210(hash)X +3390(value)X +3597(directly)X +3874(to)X +3968(a)X +4036(disk)X +4201(address,)X +2 f +2706 4816(gdbm)N +1 f +2921(uses)X +3096(the)X +3231(hash)X +3414(value)X +3624(to)X +3722(index)X +3936(into)X +4096(a)X +2 f +4168(directory)X +1 f +2706 4904([ENB88])N +3020(containing)X +3378(disk)X +3531(addresses.)X +2878 5018(The)N +2 f +3033(hsearch)X +1 f +3317(routines)X +3605(in)X +3697(System)X +3962(V)X +4049(are)X +4177(designed)X +2706 5106(to)N +2804(provide)X +3085(memory-resident)X +3669(hash)X +3852(tables.)X +4115(Since)X +4328(data)X +2706 5194(access)N +2948(does)X +3131(not)X +3269(require)X +3533(disk)X +3702(access,)X +3964(simple)X +4213(hashing)X +2706 5282(schemes)N +3010(which)X +3238(may)X +3408(require)X +3667(multiple)X +3964(probes)X +4209(into)X +4364(the)X +2706 5370(table)N +2889(are)X +3015(used.)X +3209(A)X +3294(more)X +3486(interesting)X +3851(version)X +4114(of)X +2 f +4208(hsearch)X +1 f +2706 5458(is)N +2784(a)X +2845(public)X +3070(domain)X +3335(library,)X +2 f +3594(dynahash)X +1 f +3901(,)X +3945(that)X +4089(implements)X +2706 5546(Larson's)N +3036(in-memory)X +3440(adaptation)X +3822([LAR88])X +4164(of)X +4279(linear)X +2706 5634(hashing)N +2975([LIT80].)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +1 f +4424(1)X + +2 p +%%Page: 2 2 +10 s 10 xH 0 xS 1 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +2 f +1074 538(dbm)N +1 f +1232(and)X +2 f +1368(ndbm)X +1 f +604 670(The)N +2 f +760(dbm)X +1 f +928(and)X +2 f +1074(ndbm)X +1 f +1282(library)X +1526(implementations)X +2089(are)X +432 758(based)N +667(on)X +799(the)X +949(same)X +1166(algorithm)X +1529(by)X +1661(Ken)X +1846(Thompson)X +432 846([THOM90,)N +824(TOR88,)X +1113(WAL84],)X +1452(but)X +1582(differ)X +1789(in)X +1879(their)X +2054(pro-)X +432 934(grammatic)N +801(interfaces.)X +1160(The)X +1311(latter)X +1502(is)X +1581(a)X +1643(modi\256ed)X +1952(version)X +432 1022(of)N +533(the)X +665(former)X +918(which)X +1148(adds)X +1328(support)X +1601(for)X +1728(multiple)X +2027(data-)X +432 1110(bases)N +634(to)X +724(be)X +828(open)X +1011(concurrently.)X +1484(The)X +1636(discussion)X +1996(of)X +2090(the)X +432 1198(algorithm)N +774(that)X +925(follows)X +1196(is)X +1280(applicable)X +1640(to)X +1732(both)X +2 f +1904(dbm)X +1 f +2072(and)X +2 f +432 1286(ndbm)N +1 f +610(.)X +604 1400(The)N +760(basic)X +956(structure)X +1268(of)X +2 f +1366(dbm)X +1 f +1535(calls)X +1712(for)X +1836(\256xed-sized)X +432 1488(disk)N +612(blocks)X +868(\(buckets\))X +1214(and)X +1377(an)X +2 f +1499(access)X +1 f +1755(function)X +2068(that)X +432 1576(maps)N +623(a)X +681(key)X +819(to)X +902(a)X +959(bucket.)X +1234(The)X +1380(interface)X +1683(routines)X +1962(use)X +2090(the)X +2 f +432 1664(access)N +1 f +673(function)X +970(to)X +1062(obtain)X +1292(the)X +1420(appropriate)X +1816(bucket)X +2060(in)X +2152(a)X +432 1752(single)N +643(disk)X +796(access.)X +604 1866(Within)N +869(the)X +2 f +1010(access)X +1 f +1263(function,)X +1593(a)X +1672(bit-randomizing)X +432 1954(hash)N +610(function)X +2 f +8 s +877 1929(2)N +1 f +10 s +940 1954(is)N +1024(used)X +1202(to)X +1294(convert)X +1565(a)X +1631(key)X +1777(into)X +1931(a)X +1997(32-bit)X +432 2042(hash)N +605(value.)X +825(Out)X +971(of)X +1064(these)X +1254(32)X +1359(bits,)X +1519(only)X +1686(as)X +1778(many)X +1981(bits)X +2121(as)X +432 2130(necessary)N +773(are)X +900(used)X +1075(to)X +1165(determine)X +1514(the)X +1639(particular)X +1974(bucket)X +432 2218(on)N +533(which)X +750(a)X +807(key)X +944(resides.)X +1228(An)X +1347(in-memory)X +1724(bitmap)X +1967(is)X +2041(used)X +432 2306(to)N +533(determine)X +893(how)X +1070(many)X +1287(bits)X +1441(are)X +1579(required.)X +1905(Each)X +2104(bit)X +432 2394(indicates)N +746(whether)X +1033(its)X +1136(associated)X +1494(bucket)X +1736(has)X +1871(been)X +2051(split)X +432 2482(yet)N +562(\(a)X +657(0)X +728(indicating)X +1079(that)X +1230(the)X +1359(bucket)X +1604(has)X +1742(not)X +1875(yet)X +2004(split\).)X +432 2570(The)N +590(use)X +730(of)X +830(the)X +961(hash)X +1141(function)X +1441(and)X +1590(the)X +1720(bitmap)X +1974(is)X +2059(best)X +432 2658(described)N +769(by)X +878(stepping)X +1177(through)X +1454(database)X +1759(creation)X +2046(with)X +432 2746(multiple)N +718(invocations)X +1107(of)X +1194(a)X +2 f +1250(store)X +1 f +1430(operation.)X +604 2860(Initially,)N +906(the)X +1033(hash)X +1209(table)X +1394(contains)X +1690(a)X +1755(single)X +1974(bucket)X +432 2948(\(bucket)N +711(0\),)X +836(the)X +972(bit)X +1094(map)X +1270(contains)X +1575(a)X +1649(single)X +1878(bit)X +2000(\(bit)X +2148(0)X +432 3036(corresponding)N +913(to)X +997(bucket)X +1233(0\),)X +1342(and)X +1480(0)X +1542(bits)X +1699(of)X +1788(a)X +1846(hash)X +2014(value)X +432 3124(are)N +560(examined)X +901(to)X +992(determine)X +1342(where)X +1568(a)X +1633(key)X +1778(is)X +1860(placed)X +2099(\(in)X +432 3212(bucket)N +670(0\).)X +801(When)X +1017(bucket)X +1255(0)X +1319(is)X +1396(full,)X +1551(its)X +1650(bit)X +1758(in)X +1844(the)X +1966(bitmap)X +432 3300(\(bit)N +564(0\))X +652(is)X +726(set,)X +856(and)X +993(its)X +1089(contents)X +1377(are)X +1497(split)X +1655(between)X +1943(buckets)X +432 3388(0)N +499(and)X +641(1,)X +727(by)X +833(considering)X +1233(the)X +1357(0)X +2 f +7 s +3356(th)Y +10 s +1 f +1480 3388(bit)N +1590(\(the)X +1741(lowest)X +1976(bit)X +2086(not)X +432 3476(previously)N +800(examined\))X +1169(of)X +1266(the)X +1393(hash)X +1569(value)X +1772(for)X +1895(each)X +2072(key)X +432 3564(within)N +668(the)X +798(bucket.)X +1064(Given)X +1292(a)X +1359(well-designed)X +1840(hash)X +2018(func-)X +432 3652(tion,)N +613(approximately)X +1112(half)X +1273(of)X +1376(the)X +1510(keys)X +1693(will)X +1853(have)X +2041(hash)X +432 3740(values)N +666(with)X +837(the)X +964(0)X +2 f +7 s +3708(th)Y +10 s +1 f +1090 3740(bit)N +1203(set.)X +1341(All)X +1471(such)X +1646(keys)X +1821(and)X +1965(associ-)X +432 3828(ated)N +586(data)X +740(are)X +859(moved)X +1097(to)X +1179(bucket)X +1413(1,)X +1493(and)X +1629(the)X +1747(rest)X +1883(remain)X +2126(in)X +432 3916(bucket)N +666(0.)X +604 4030(After)N +804(this)X +949(split,)X +1135(the)X +1262(\256le)X +1393(now)X +1560(contains)X +1856(two)X +2005(buck-)X +432 4118(ets,)N +562(and)X +699(the)X +818(bitmap)X +1061(contains)X +1349(three)X +1530(bits:)X +1687(the)X +1805(0)X +2 f +7 s +4086(th)Y +10 s +1 f +1922 4118(bit)N +2026(is)X +2099(set)X +432 4206(to)N +525(indicate)X +810(a)X +876(bucket)X +1120(0)X +1190(split)X +1357(when)X +1561(no)X +1671(bits)X +1816(of)X +1913(the)X +2041(hash)X +432 4294(value)N +648(are)X +789(considered,)X +1199(and)X +1357(two)X +1519(more)X +1726(unset)X +1937(bits)X +2094(for)X +432 4382(buckets)N +706(0)X +775(and)X +920(1.)X +1029(The)X +1183(placement)X +1542(of)X +1638(an)X +1742(incoming)X +2072(key)X +432 4470(now)N +604(requires)X +897(examination)X +1327(of)X +1428(the)X +1560(0)X +2 f +7 s +4438(th)Y +10 s +1 f +1691 4470(bit)N +1809(of)X +1910(the)X +2041(hash)X +432 4558(value,)N +667(and)X +824(the)X +963(key)X +1119(is)X +1212(placed)X +1462(either)X +1685(in)X +1787(bucket)X +2041(0)X +2121(or)X +432 4646(bucket)N +674(1.)X +782(If)X +864(either)X +1075(bucket)X +1317(0)X +1385(or)X +1480(bucket)X +1722(1)X +1790(\256lls)X +1937(up,)X +2064(it)X +2135(is)X +432 4734(split)N +598(as)X +693(before,)X +947(its)X +1050(bit)X +1162(is)X +1243(set)X +1360(in)X +1450(the)X +1576(bitmap,)X +1846(and)X +1990(a)X +2054(new)X +432 4822(set)N +541(of)X +628(unset)X +817(bits)X +952(are)X +1071(added)X +1283(to)X +1365(the)X +1483(bitmap.)X +604 4936(Each)N +791(time)X +959(we)X +1079(consider)X +1376(a)X +1437(new)X +1596(bit)X +1705(\(bit)X +1841(n\),)X +1953(we)X +2072(add)X +432 5024(2)N +2 f +7 s +4992(n)Y +9 f +509(+)X +1 f +540(1)X +10 s +595 5024(bits)N +737(to)X +826(the)X +951(bitmap)X +1199(and)X +1341(obtain)X +1567(2)X +2 f +7 s +4992(n)Y +9 f +1644(+)X +1 f +1675(1)X +10 s +1729 5024(more)N +1920(address-)X +432 5112(able)N +595(buckets)X +869(in)X +960(the)X +1087(\256le.)X +1258(As)X +1376(a)X +1441(result,)X +1668(the)X +1795(bitmap)X +2045(con-)X +432 5200(tains)N +618(the)X +751(previous)X +1062(2)X +2 f +7 s +5168(n)Y +9 f +1139(+)X +1 f +1170(1)X +2 f +10 s +9 f +5200(-)Y +1 f +1242(1)X +1317(bits)X +1467(\(1)X +2 f +9 f +1534(+)X +1 f +1578(2)X +2 f +9 f +(+)S +1 f +1662(4)X +2 f +9 f +(+)S +1 f +1746(...)X +2 f +9 f +(+)S +1 f +1850(2)X +2 f +7 s +5168(n)Y +10 s +1 f +1931 5200(\))N +1992(which)X +432 5288(trace)N +649(the)X +807(entire)X +2 f +1050(split)X +1247(history)X +1 f +1529(of)X +1656(the)X +1813(addressable)X +16 s +432 5433 MXY +864 0 Dl +2 f +8 s +472 5488(2)N +1 f +9 s +523 5513(This)N +670(bit-randomizing)X +1153(property)X +1416(is)X +1482(important)X +1780(to)X +1854(obtain)X +2052(radi-)X +432 5593(cally)N +599(different)X +874(hash)X +1033(values)X +1244(for)X +1355(nearly)X +1562(identical)X +1836(keys,)X +2012(which)X +432 5673(in)N +506(turn)X +640(avoids)X +846(clustering)X +1148(of)X +1226(such)X +1376(keys)X +1526(in)X +1600(a)X +1650(single)X +1840(bucket.)X +10 s +2418 538(buckets.)N +2590 652(Given)N +2809(a)X +2868(key)X +3007(and)X +3146(the)X +3267(bitmap)X +3512(created)X +3768(by)X +3871(this)X +4009(algo-)X +2418 740(rithm,)N +2638(we)X +2759(\256rst)X +2910(examine)X +3209(bit)X +3320(0)X +3386(of)X +3479(the)X +3603(bitmap)X +3851(\(the)X +4002(bit)X +4112(to)X +2418 828(consult)N +2673(when)X +2871(0)X +2934(bits)X +3072(of)X +3162(the)X +3283(hash)X +3453(value)X +3650(are)X +3772(being)X +3973(exam-)X +2418 916(ined\).)N +2631(If)X +2713(it)X +2785(is)X +2866(set)X +2982(\(indicating)X +3356(that)X +3503(the)X +3628(bucket)X +3869(split\),)X +4080(we)X +2418 1004(begin)N +2617(considering)X +3012(the)X +3131(bits)X +3267(of)X +3355(the)X +3473(32-bit)X +3684(hash)X +3851(value.)X +4085(As)X +2418 1092(bit)N +2525(n)X +2587(is)X +2662(revealed,)X +2977(a)X +3035(mask)X +3226(equal)X +3422(to)X +3506(2)X +2 f +7 s +1060(n)Y +9 f +3583(+)X +1 f +3614(1)X +2 f +10 s +9 f +1092(-)Y +1 f +3686(1)X +3748(will)X +3894(yield)X +4076(the)X +2418 1180(current)N +2675(bucket)X +2918(address.)X +3228(Adding)X +3496(2)X +2 f +7 s +1148(n)Y +9 f +3573(+)X +1 f +3604(1)X +2 f +10 s +9 f +1180(-)Y +1 f +3676(1)X +3744(to)X +3834(the)X +3960(bucket)X +2418 1268(address)N +2701(identi\256es)X +3035(which)X +3272(bit)X +3397(in)X +3500(the)X +3639(bitmap)X +3902(must)X +4098(be)X +2418 1356(checked.)N +2743(We)X +2876(continue)X +3173(revealing)X +3493(bits)X +3628(of)X +3715(the)X +3833(hash)X +4000(value)X +2418 1444(until)N +2591(all)X +2698(set)X +2814(bits)X +2955(in)X +3043(the)X +3167(bitmap)X +3415(are)X +3540(exhausted.)X +3907(The)X +4058(fol-)X +2418 1532(lowing)N +2682(algorithm,)X +3055(a)X +3133(simpli\256cation)X +3614(of)X +3723(the)X +3863(algorithm)X +2418 1620(due)N +2565(to)X +2658(Ken)X +2823(Thompson)X +3196([THOM90,)X +3590(TOR88],)X +3908(uses)X +4076(the)X +2418 1708(hash)N +2625(value)X +2839(and)X +2995(the)X +3133(bitmap)X +3395(to)X +3497(calculate)X +3823(the)X +3960(bucket)X +2418 1796(address)N +2679(as)X +2766(discussed)X +3093(above.)X +0(Courier)xf 0 f +1 f +0 f +8 s +2418 2095(hash)N +2608(=)X +2684 -0.4038(calchash\(key\);)AX +2418 2183(mask)N +2608(=)X +2684(0;)X +2418 2271(while)N +2646 -0.4018(\(isbitset\(\(hash)AX +3254(&)X +3330(mask\))X +3558(+)X +3634(mask\)\))X +2706 2359(mask)N +2896(=)X +2972(\(mask)X +3200(<<)X +3314(1\))X +3428(+)X +3504(1;)X +2418 2447(bucket)N +2684(=)X +2760(hash)X +2950(&)X +3026(mask;)X +2 f +10 s +3211 2812(sdbm)N +1 f +2590 2944(The)N +2 f +2738(sdbm)X +1 f +2930(library)X +3167(is)X +3243(a)X +3302(public-domain)X +3791(clone)X +3987(of)X +4076(the)X +2 f +2418 3032(ndbm)N +1 f +2638(library,)X +2914(developed)X +3286(by)X +3408(Ozan)X +3620(Yigit)X +3826(to)X +3929(provide)X +2 f +2418 3120(ndbm)N +1 f +2596('s)X +2692(functionality)X +3139(under)X +3359(some)X +3565(versions)X +3869(of)X +3973(UNIX)X +2418 3208(that)N +2559(exclude)X +2830(it)X +2894(for)X +3008(licensing)X +3317(reasons)X +3578([YIG89].)X +3895(The)X +4040(pro-)X +2418 3296(grammer)N +2735(interface,)X +3064(and)X +3207(the)X +3332(basic)X +3524(structure)X +3832(of)X +2 f +3926(sdbm)X +1 f +4121(is)X +2418 3384(identical)N +2733(to)X +2 f +2834(ndbm)X +1 f +3051(but)X +3192(internal)X +3476(details)X +3723(of)X +3828(the)X +2 f +3964(access)X +1 f +2418 3472(function,)N +2726(such)X +2894(as)X +2982(the)X +3101(calculation)X +3474(of)X +3561(the)X +3679(bucket)X +3913(address,)X +2418 3560(and)N +2563(the)X +2690(use)X +2825(of)X +2920(different)X +3225(hash)X +3400(functions)X +3726(make)X +3928(the)X +4054(two)X +2418 3648(incompatible)N +2856(at)X +2934(the)X +3052(database)X +3349(level.)X +2590 3762(The)N +2 f +2740(sdbm)X +1 f +2934(library)X +3173(is)X +3251(based)X +3458(on)X +3562(a)X +3622(simpli\256ed)X +3965(imple-)X +2418 3850(mentation)N +2778(of)X +2885(Larson's)X +3206(1978)X +2 f +3406(dynamic)X +3717(hashing)X +1 f +4009(algo-)X +2418 3938(rithm)N +2616(including)X +2943(the)X +2 f +3066(re\256nements)X +3461(and)X +3605(variations)X +1 f +3953(of)X +4044(sec-)X +2418 4026(tion)N +2562(5)X +2622([LAR78].)X +2956(Larson's)X +3257(original)X +3526(algorithm)X +3857(calls)X +4024(for)X +4138(a)X +2418 4114(forest)N +2635(of)X +2736(binary)X +2975(hash)X +3156(trees)X +3341(that)X +3494(are)X +3626(accessed)X +3941(by)X +4054(two)X +2418 4202(hash)N +2586(functions.)X +2925(The)X +3071(\256rst)X +3216(hash)X +3384(function)X +3672(selects)X +3907(a)X +3964(partic-)X +2418 4290(ular)N +2571(tree)X +2720(within)X +2952(the)X +3078(forest.)X +3309(The)X +3462(second)X +3713(hash)X +3887(function,)X +2418 4378(which)N +2659(is)X +2757(required)X +3070(to)X +3177(be)X +3297(a)X +3377(boolean)X +3675(pseudo-random)X +2418 4466(number)N +2687(generator)X +3015(that)X +3159(is)X +3236(seeded)X +3479(by)X +3583(the)X +3705(key,)X +3865(is)X +3942(used)X +4112(to)X +2418 4554(traverse)N +2733(the)X +2890(tree)X +3070(until)X +3275(internal)X +3579(\(split\))X +3829(nodes)X +4075(are)X +2418 4642(exhausted)N +2763(and)X +2903(an)X +3003(external)X +3286(\(non-split\))X +3648(node)X +3827(is)X +3903(reached.)X +2418 4730(The)N +2571(bucket)X +2813(addresses)X +3149(are)X +3276(stored)X +3500(directly)X +3772(in)X +3861(the)X +3986(exter-)X +2418 4818(nal)N +2536(nodes.)X +2590 4932(Larson's)N +2903(re\256nements)X +3309(are)X +3440(based)X +3655(on)X +3767(the)X +3897(observa-)X +2418 5020(tion)N +2570(that)X +2718(the)X +2844(nodes)X +3059(can)X +3199(be)X +3303(represented)X +3702(by)X +3809(a)X +3872(single)X +4090(bit)X +2418 5108(that)N +2569(is)X +2653(set)X +2773(for)X +2898(internal)X +3174(nodes)X +3392(and)X +3539(not)X +3672(set)X +3791(for)X +3915(external)X +2418 5196(nodes,)N +2652(resulting)X +2959(in)X +3048(a)X +3111(radix)X +3303(search)X +3536(trie.)X +3709(Figure)X +3944(1)X +4010(illus-)X +2418 5284(trates)N +2621(this.)X +2804(Nodes)X +3037(A)X +3123(and)X +3267(B)X +3348(are)X +3475(internal)X +3748(\(split\))X +3967(nodes,)X +2418 5372(thus)N +2573(having)X +2813(no)X +2915(bucket)X +3151(addresses)X +3480(associated)X +3831(with)X +3994(them.)X +2418 5460(Instead,)N +2693(the)X +2814(external)X +3096(nodes)X +3306(\(C,)X +3429(D,)X +3530(and)X +3669(E\))X +3768(each)X +3938(need)X +4112(to)X +2418 5548(refer)N +2594(to)X +2679(a)X +2738(bucket)X +2975(address.)X +3279(These)X +3494(bucket)X +3731(addresses)X +4062(can)X +2418 5636(be)N +2529(stored)X +2760(in)X +2857(the)X +2990(trie)X +3132(itself)X +3327(where)X +3559(the)X +3691(subtries)X +3974(would)X +3 f +432 5960(2)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +3 p +%%Page: 3 3 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(live)N +862(if)X +933(they)X +1092(existed)X +1340([KNU68].)X +1709(For)X +1841(example,)X +2154(if)X +2224(nodes)X +2432(F)X +720 626(and)N +858(G)X +938(were)X +1117(the)X +1237(children)X +1522(of)X +1610(node)X +1787(C,)X +1881(the)X +2000(bucket)X +2235(address)X +720 714(L00)N +886(could)X +1101(reside)X +1330(in)X +1429(the)X +1563(bits)X +1714(that)X +1870(will)X +2030(eventually)X +2400(be)X +720 802(used)N +887(to)X +969(store)X +1145(nodes)X +1352(F)X +1416(and)X +1552(G)X +1630(and)X +1766(all)X +1866(their)X +2033(children.)X +10 f +720 890 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +1894 2247(L1)N +784 1925(A)N +1431(E)X +1106 2247(D)N +1428 1281(C)N +1109 1603(B)N +1884 1930(L01)N +1879 1286(L00)N +1221 1814(1)N +903 2131(1)N +1221 1402(0)N +903 1714(0)N +1 Dt +1397 1821 MXY +-8 -32 Dl +-5 19 Dl +-20 6 Dl +33 7 Dl +-187 -182 Dl +1397 1322 MXY +-33 7 Dl +20 6 Dl +5 19 Dl +8 -32 Dl +-187 182 Dl +1069 1639 MXY +-32 7 Dl +20 6 Dl +5 19 Dl +7 -32 Dl +-186 182 Dl +1374 1891 MXY +185 Dc +1779 2133 MXY +0 161 Dl +322 0 Dl +0 -161 Dl +-322 0 Dl +1811 MY +0 161 Dl +322 0 Dl +0 -161 Dl +-322 0 Dl +1166 MY +0 161 Dl +322 0 Dl +0 -161 Dl +-322 0 Dl +1052 2213 MXY +185 Dc +1569 MY +185 Dc +720 1881 MXY +185 Dc +1779 2213 MXY +-28 -17 Dl +10 17 Dl +-10 18 Dl +28 -18 Dl +-543 0 Dl +1769 1891 MXY +-28 -18 Dl +10 18 Dl +-10 18 Dl +28 -18 Dl +-201 0 Dl +1364 1247 MXY +185 Dc +1769 MX +-28 -18 Dl +10 18 Dl +-10 18 Dl +28 -18 Dl +-201 0 Dl +1064 2143 MXY +-7 -32 Dl +-5 19 Dl +-20 6 Dl +32 7 Dl +-181 -181 Dl +3 Dt +-1 Ds +8 s +720 2482(Figure)N +925(1:)X +1 f +1002(Radix)X +1179(search)X +1365(trie)X +1474(with)X +1612(internal)X +1831(nodes)X +2004(A)X +2074(and)X +2189(B,)X +2271(external)X +720 2570(nodes)N +891(C,)X +972(D,)X +1056(and)X +1170(E,)X +1247(and)X +1361(bucket)X +1553(addresses)X +1819(stored)X +1997(in)X +2069(the)X +2168(unused)X +2370(por-)X +720 2658(tion)N +836(of)X +905(the)X +999(trie.)X +10 s +10 f +720 2922 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +892 3124(Further)N +1153(simpli\256cations)X +1647(of)X +1738(the)X +1860(above)X +2076([YIG89])X +2377(are)X +720 3212(possible.)N +1038(Using)X +1265(a)X +1337(single)X +1564(radix)X +1765(trie)X +1908(to)X +2006(avoid)X +2219(the)X +2352(\256rst)X +720 3300(hash)N +904(function,)X +1227(replacing)X +1562(the)X +1696(pseudo-random)X +2231(number)X +720 3388(generator)N +1052(with)X +1222(a)X +1286(well)X +1452(designed,)X +1785(bit-randomizing)X +2329(hash)X +720 3476(function,)N +1053(and)X +1215(using)X +1434(the)X +1578(portion)X +1855(of)X +1967(the)X +2110(hash)X +2302(value)X +720 3564(exposed)N +1021(during)X +1268(the)X +1404(trie)X +1549(traversal)X +1864(as)X +1969(a)X +2042(direct)X +2262(bucket)X +720 3652(address)N +990(results)X +1228(in)X +1319(an)X +2 f +1424(access)X +1 f +1663(function)X +1959(that)X +2108(works)X +2333(very)X +720 3740(similar)N +974(to)X +1068(Thompson's)X +1499(algorithm)X +1841(above.)X +2084(The)X +2240(follow-)X +720 3828(ing)N +847(algorithm)X +1183(uses)X +1346(the)X +1469(hash)X +1641(value)X +1840(to)X +1927(traverse)X +2206(a)X +2266(linear-)X +720 3916(ized)N +874(radix)X +1059(trie)X +2 f +8 s +1166 3891(3)N +1 f +10 s +1218 3916(starting)N +1478(at)X +1556(the)X +1674(0)X +2 f +7 s +3884(th)Y +10 s +1 f +1791 3916(bit.)N +0 f +8 s +720 4215(tbit)N +910(=)X +986(0;)X +1296(/*)X +1410(radix)X +1638(trie)X +1828(index)X +2056(*/)X +720 4303(hbit)N +910(=)X +986(0;)X +1296(/*)X +1410(hash)X +1600(bit)X +1752(index)X +2056(*/)X +720 4391(mask)N +910(=)X +986(0;)X +720 4479(hash)N +910(=)X +986 -0.4038(calchash\(key\);)AX +720 4655(for)N +872(\(mask)X +1100(=)X +1176(0;)X +910 4743 -0.4018(isbitset\(tbit\);)AN +910 4831(mask)N +1100(=)X +1176(\(mask)X +1404(<<)X +1518(1\))X +1632(+)X +1708(1\))X +1008 4919(if)N +1122(\(hash)X +1350(&)X +1426(\(1)X +1540(<<)X +1654 -0.4219(hbit++\)\)\))AX +1160 5007(/*)N +1274(right)X +1502(son)X +1692(*/)X +1160 5095(tbit)N +1350(=)X +1426(2)X +1502(*)X +1578(tbit)X +1768(+)X +1844(2;)X +1008 5183(else)N +1 f +16 s +720 5353 MXY +864 0 Dl +2 f +8 s +760 5408(3)N +1 f +9 s +818 5433(A)N +896(linearized)X +1206(radix)X +1380(trie)X +1502(is)X +1576(merely)X +1802(an)X +1895(array)X +2068(representation)X +720 5513(of)N +800(the)X +908(radix)X +1076(search)X +1280(trie)X +1396(described)X +1692(above.)X +1920(The)X +2052(children)X +2308(of)X +2388(the)X +720 5593(node)N +885(with)X +1038(index)X +1223(i)X +1267(can)X +1391(be)X +1483(found)X +1675(at)X +1751(the)X +1863(nodes)X +2055(indexed)X +2307(2*i+1)X +720 5673(and)N +842(2*i+2.)X +0 f +8 s +3146 538(/*)N +3260(left)X +3450(son)X +3678(*/)X +3146 626(tbit)N +3336(=)X +3412(2)X +3488(*)X +3564(tbit)X +3754(+)X +3830(1;)X +2706 802(bucket)N +2972(=)X +3048(hash)X +3238(&)X +3314(mask;)X +2 f +10 s +3495 1167(gdbm)N +1 f +2878 1299(The)N +3027(gdbm)X +3233(\(GNU)X +3458(data)X +3616(base)X +3783(manager\))X +4111(library)X +4349(is)X +4426(a)X +2706 1387(UNIX)N +2933(database)X +3236(manager)X +3539(written)X +3792(by)X +3897(Philip)X +4112(A.)X +4215(Nelson,)X +2706 1475(and)N +2848(made)X +3048(available)X +3364(as)X +3457(a)X +3518(part)X +3668(of)X +3760(the)X +3883(FSF)X +4040(software)X +4342(dis-)X +2706 1563(tribution.)N +3052(The)X +3207(gdbm)X +3419(library)X +3663(provides)X +3969(the)X +4097(same)X +4292(func-)X +2706 1651(tionality)N +3028(of)X +3151(the)X +2 f +3304(dbm)X +1 f +3442(/)X +2 f +3464(ndbm)X +1 f +3697(libraries)X +4015([NEL90])X +4360(but)X +2706 1739(attempts)N +3018(to)X +3121(avoid)X +3340(some)X +3550(of)X +3658(their)X +3846(shortcomings.)X +4337(The)X +2706 1827(gdbm)N +2918(library)X +3162(allows)X +3401(for)X +3525(arbitrary-length)X +4059(data,)X +4242(and)X +4387(its)X +2706 1915(database)N +3027(is)X +3124(a)X +3203(singular,)X +3524(non-sparse)X +2 f +8 s +3872 1890(4)N +1 f +10 s +3947 1915(\256le.)N +4112(The)X +4280(gdbm)X +2706 2003(library)N +2947(also)X +3103(includes)X +2 f +3396(dbm)X +1 f +3560(and)X +2 f +3702(ndbm)X +1 f +3906(compatible)X +4288(inter-)X +2706 2091(faces.)N +2878 2205(The)N +3025(gdbm)X +3229(library)X +3465(is)X +3540(based)X +3745(on)X +2 f +3847(extensible)X +4189(hashing)X +1 f +4442(,)X +2706 2293(a)N +2766(dynamic)X +3066(hashing)X +3339(algorithm)X +3674(by)X +3778(Fagin)X +3984(et)X +4066(al)X +4148([FAG79].)X +2706 2381(This)N +2881(algorithm)X +3225(differs)X +3467(from)X +3655(the)X +3785(previously)X +4155(discussed)X +2706 2469(algorithms)N +3069(in)X +3152(that)X +3293(it)X +3358(uses)X +3517(a)X +2 f +3574(directory)X +1 f +3889(that)X +4030(is)X +4103(a)X +4159(collapsed)X +2706 2557(representation)N +3192([ENB88])X +3517(of)X +3615(the)X +3744(radix)X +3940(search)X +4177(trie)X +4315(used)X +2706 2645(by)N +2 f +2806(sdbm)X +1 f +2975(.)X +10 f +2706 2733 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +7 s +3572 3761(L1)N +1 Dt +3485 3738 MXY +-20 -13 Dl +7 13 Dl +-7 13 Dl +20 -13 Dl +-400 0 Dl +3180 3027 MXY +136 Dc +2706 3494 MXY +136 Dc +2950 3264 MXY +136 Dc +3738 MY +136 Dc +3485 2968 MXY +0 118 Dl +238 0 Dl +0 -118 Dl +-238 0 Dl +3442 MY +0 119 Dl +238 0 Dl +0 -119 Dl +-238 0 Dl +3679 MY +0 119 Dl +238 0 Dl +0 -119 Dl +-238 0 Dl +3187 3501 MXY +136 Dc +2963 3316 MXY +-24 5 Dl +15 4 Dl +4 15 Dl +5 -24 Dl +-137 134 Dl +3204 3083 MXY +-24 5 Dl +15 4 Dl +3 14 Dl +6 -23 Dl +-137 133 Dl +3204 3450 MXY +-6 -24 Dl +-3 14 Dl +-15 5 Dl +24 5 Dl +-137 -134 Dl +2842 3369(0)N +3075 3139(0)N +2842 3676(1)N +3075 3443(1)N +3562 3054(L00)N +3565 3528(L01)N +4197 2968 MXY +0 118 Dl +237 0 Dl +0 -118 Dl +-237 0 Dl +3205 MY +0 119 Dl +237 0 Dl +0 -119 Dl +-237 0 Dl +3561 MY +0 118 Dl +237 0 Dl +0 -118 Dl +-237 0 Dl +3960 2909 MXY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +3146 MY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +3383 MY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +3620 MY +0 237 Dl +118 0 Dl +0 -237 Dl +-118 0 Dl +4197 3027 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-119 0 Dl +4197 3264 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-119 0 Dl +3501 MY +59 0 Dl +0 89 Dl +4078 3738 MXY +59 0 Dl +0 -88 Dl +4197 3590 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-60 0 Dl +4197 3650 MXY +-21 -13 Dl +8 13 Dl +-8 13 Dl +21 -13 Dl +-60 0 Dl +3991 3050(00)N +3991 3287(01)N +3991 3524(10)N +3991 3761(11)N +4269 3050(L00)N +4269 3287(L01)N +4283 3643(L1)N +3485 3501 MXY +-20 -13 Dl +7 13 Dl +-7 13 Dl +20 -13 Dl +-155 0 Dl +3485 3027 MXY +-20 -13 Dl +7 13 Dl +-7 13 Dl +20 -13 Dl +-163 0 Dl +2967 3687 MXY +-5 -24 Dl +-4 14 Dl +-15 4 Dl +24 6 Dl +-141 -141 Dl +3 Dt +-1 Ds +8 s +2706 4033(Figure)N +2903(2:)X +1 f +2972(A)X +3034(radix)X +3181(search)X +3359(trie)X +3460(and)X +3568(a)X +3612(directory)X +3858(representing)X +4189(the)X +4283(trie.)X +10 s +10 f +2706 4209 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +2878 4411(In)N +2968(this)X +3106(algorithm,)X +3460(a)X +3519(directory)X +3832(consists)X +4108(of)X +4198(a)X +4256(search)X +2706 4499(trie)N +2847(of)X +2947(depth)X +2 f +3158(n)X +1 f +3211(,)X +3264(containing)X +3635(2)X +2 f +7 s +4467(n)Y +10 s +1 f +3749 4499(bucket)N +3996(addresses)X +4337(\(i.e.)X +2706 4587(each)N +2897(element)X +3194(of)X +3304(the)X +3445(trie)X +3594(is)X +3689(a)X +3767(bucket)X +4023(address\).)X +4373(To)X +2706 4675(access)N +2935(the)X +3056(hash)X +3226(table,)X +3425(a)X +3483(32-bit)X +3696(hash)X +3865(value)X +4061(is)X +4136(calculated)X +2706 4763(and)N +2 f +2861(n)X +1 f +2953(bits)X +3107(of)X +3213(the)X +3350(value)X +3563(are)X +3701(used)X +3886(to)X +3986(index)X +4202(into)X +4364(the)X +2706 4851(directory)N +3018(to)X +3102(obtain)X +3324(a)X +3382(bucket)X +3618(address.)X +3921(It)X +3992(is)X +4067(important)X +4400(to)X +2706 4939(note)N +2866(that)X +3008(multiple)X +3296(entries)X +3532(of)X +3620(this)X +3756(directory)X +4067(may)X +4226(contain)X +2706 5027(the)N +2833(same)X +3026(bucket)X +3268(address)X +3537(as)X +3632(a)X +3696(result)X +3902(of)X +3997(directory)X +4315(dou-)X +2706 5115(bling)N +2903(during)X +3145(bucket)X +3392(splitting.)X +3706(Figure)X +3948(2)X +4021(illustrates)X +4364(the)X +2706 5203(relationship)N +3126(between)X +3436(a)X +3513(typical)X +3772(\(skewed\))X +4108(search)X +4355(trie)X +2706 5291(and)N +2850(its)X +2953(directory)X +3271(representation.)X +3774(The)X +3927(formation)X +4270(of)X +4364(the)X +2706 5379(directory)N +3016(shown)X +3245(in)X +3327(the)X +3445(\256gure)X +3652(is)X +3725(as)X +3812(follows.)X +16 s +2706 5593 MXY +864 0 Dl +2 f +8 s +2746 5648(4)N +1 f +9 s +2796 5673(It)N +2858(does)X +3008(not)X +3118(contain)X +3348(holes.)X +3 f +10 s +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(3)X + +4 p +%%Page: 4 4 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +604 538(Initially,)N +937(there)X +1158(is)X +1271(one)X +1446(slot)X +1620(in)X +1741(the)X +1898(directory)X +432 626(addressing)N +802(a)X +865(single)X +1083(bucket.)X +1364(The)X +1515(depth)X +1719(of)X +1812(the)X +1936(trie)X +2069(is)X +2148(0)X +432 714(and)N +577(0)X +646(bits)X +790(of)X +886(each)X +1063(hash)X +1239(value)X +1442(are)X +1570(examined)X +1910(to)X +2000(deter-)X +432 802(mine)N +624(in)X +718(which)X +946(bucket)X +1192(to)X +1286(place)X +1488(a)X +1556(key;)X +1726(all)X +1837(keys)X +2015(go)X +2126(in)X +432 890(bucket)N +682(0.)X +797(When)X +1024(this)X +1174(bucket)X +1423(is)X +1511(full,)X +1677(its)X +1787(contents)X +2089(are)X +432 978(divided)N +698(between)X +992(L0)X +1107(and)X +1249(L1)X +1363(as)X +1455(was)X +1605(done)X +1786(in)X +1873(the)X +1996(previ-)X +432 1066(ously)N +664(discussed)X +1030(algorithms.)X +1471(After)X +1700(this)X +1874(split,)X +2090(the)X +432 1154(address)N +710(of)X +814(the)X +948(second)X +1207(bucket)X +1457(must)X +1648(be)X +1760(stored)X +1992(in)X +2090(the)X +432 1242(directory.)N +796(To)X +939(accommodate)X +1438(the)X +1589(new)X +1776(address,)X +2090(the)X +432 1330(directory)N +752(is)X +835(split)X +2 f +8 s +972 1305(5)N +1 f +10 s +1330(,)Y +1054(by)X +1163(doubling)X +1476(it,)X +1569(thus)X +1731(increasing)X +2090(the)X +432 1418(depth)N +630(of)X +717(the)X +835(directory)X +1145(by)X +1245(one.)X +604 1532(After)N +813(this)X +967(split,)X +1163(a)X +1237(single)X +1466(bit)X +1588(of)X +1693(the)X +1829(hash)X +2014(value)X +432 1620(needs)N +663(to)X +773(be)X +896(examined)X +1255(to)X +1364(decide)X +1621(whether)X +1927(the)X +2072(key)X +432 1708(belongs)N +711(to)X +803(L0)X +922(or)X +1019(L1.)X +1158(Once)X +1358(one)X +1504(of)X +1601(these)X +1795(buckets)X +2069(\256lls)X +432 1796(\(L0)N +578(for)X +702(example\),)X +1051(it)X +1125(is)X +1208(split)X +1375(as)X +1472(before,)X +1728(and)X +1873(the)X +2000(direc-)X +432 1884(tory)N +585(is)X +662(split)X +823(again)X +1021(to)X +1107(make)X +1305(room)X +1498(for)X +1615(the)X +1736(address)X +2000(of)X +2090(the)X +432 1972(third)N +618(bucket.)X +927(This)X +1104(splitting)X +1400(causes)X +1645(the)X +1778(addresses)X +2121(of)X +432 2060(the)N +567(non-splitting)X +1012(bucket)X +1263(\(L1\))X +1443(to)X +1541(be)X +1653(duplicated.)X +2063(The)X +432 2148(directory)N +766(now)X +948(has)X +1099(four)X +1277(entries,)X +1555(a)X +1635(depth)X +1857(of)X +1968(2,)X +2072(and)X +432 2236(indexes)N +700(the)X +821(buckets)X +1089(L00,)X +1261(L01)X +1413(and)X +1552(L1,)X +1684(as)X +1774(shown)X +2006(in)X +2090(the)X +432 2324(Figure)N +661(2.)X +604 2438(The)N +756(crucial)X +1002(part)X +1154(of)X +1247(the)X +1371(algorithm)X +1708(is)X +1787(the)X +1911(observa-)X +432 2526(tion)N +580(that)X +724(L1)X +837(is)X +914(addressed)X +1255(twice)X +1453(in)X +1539(the)X +1661(directory.)X +1995(If)X +2073(this)X +432 2614(bucket)N +679(were)X +869(to)X +964(split)X +1134(now,)X +1324(the)X +1454(directory)X +1776(already)X +2045(con-)X +432 2702(tains)N +611(room)X +808(to)X +898(hold)X +1067(the)X +1192(address)X +1460(of)X +1554(the)X +1679(new)X +1840(bucket.)X +2121(In)X +432 2790(general,)N +711(the)X +831(relationship)X +1231(between)X +1521(the)X +1641(directory)X +1953(and)X +2090(the)X +432 2878(number)N +704(of)X +798(bucket)X +1039(addresses)X +1374(contained)X +1713(therein)X +1962(is)X +2041(used)X +432 2966(to)N +517(decide)X +750(when)X +947(to)X +1031(split)X +1190(the)X +1310(directory.)X +1662(Each)X +1845(bucket)X +2081(has)X +432 3054(a)N +505(depth,)X +740(\()X +2 f +767(n)X +7 s +3070(b)Y +10 s +1 f +848 3054(\),)N +932(associated)X +1299(with)X +1478(it)X +1558(and)X +1710(appears)X +1992(in)X +2090(the)X +432 3142(directory)N +744(exactly)X +998(2)X +2 f +7 s +3106(n)Y +9 f +1075(-)X +2 f +1106(n)X +4 s +3110(b)Y +7 s +1 f +10 s +1181 3142(times.)N +1396(When)X +1610(a)X +1668(bucket)X +1904(splits,)X +2113(its)X +432 3230(depth)N +638(increases)X +961(by)X +1069(one.)X +1253(The)X +1406(directory)X +1724(must)X +1907(split)X +2072(any)X +432 3318(time)N +602(a)X +665(bucket's)X +964(depth)X +1169(exceeds)X +1451(the)X +1576(depth)X +1781(of)X +1875(the)X +2000(direc-)X +432 3406(tory.)N +630(The)X +784(following)X +1123(code)X +1303(fragment)X +1621(helps)X +1818(to)X +1908(illustrate)X +432 3494(the)N +554(extendible)X +912(hashing)X +1185(algorithm)X +1520([FAG79])X +1838(for)X +1955(access-)X +432 3582(ing)N +554(individual)X +898(buckets)X +1163(and)X +1299(maintaining)X +1701(the)X +1819(directory.)X +0 f +8 s +432 3881(hash)N +622(=)X +698 -0.4038(calchash\(key\);)AX +432 3969(mask)N +622(=)X +698 -0.4018(maskvec[depth];)AX +432 4145(bucket)N +698(=)X +774 -0.4038(directory[hash)AX +1344(&)X +1420(mask];)X +432 4321(/*)N +546(Key)X +698 -0.4219(Insertion)AX +1078(*/)X +432 4409(if)N +546 -0.4038(\(store\(bucket,)AX +1116(key,)X +1306(data\))X +1534(==)X +1648(FAIL\))X +1876({)X +720 4497(newbl)N +948(=)X +1024 -0.4167(getpage\(\);)AX +720 4585 -0.4000(bucket->depth++;)AN +720 4673 -0.4091(newbl->depth)AN +1214(=)X +1290 -0.4038(bucket->depth;)AX +720 4761(if)N +834 -0.4038(\(bucket->depth)AX +1404(>)X +1480(depth\))X +1746({)X +1008 4849(/*)N +1122(double)X +1388 -0.4219(directory)AX +1768(*/)X +1008 4937(depth++;)N +1 f +16 s +432 5033 MXY +864 0 Dl +2 f +8 s +472 5088(5)N +1 f +9 s +534 5113(This)N +692(decision)X +962(to)X +1048(split)X +1202(the)X +1319(directory)X +1608(is)X +1685(based)X +1878(on)X +1979(a)X +2040(com-)X +432 5193(parison)N +666(of)X +748(the)X +858(depth)X +1040(of)X +1121(the)X +1230(page)X +1387(being)X +1568(split)X +1713(and)X +1838(the)X +1947(depth)X +2128(of)X +432 5273(the)N +543(trie.)X +698(In)X +781(Figure)X +992(2,)X +1069(the)X +1180(depths)X +1390(of)X +1472(both)X +1622(L00)X +1760(and)X +1886(L01)X +2024(are)X +2134(2,)X +432 5353(whereas)N +689(the)X +798(depth)X +979(of)X +1060(L1)X +1161(is)X +1230(1.)X +1323(Therefore,)X +1646(if)X +1710(L1)X +1810(were)X +1970(to)X +2046(split,)X +432 5433(the)N +543(directory)X +826(would)X +1029(not)X +1144(need)X +1303(to)X +1382(split.)X +1565(In)X +1648(reality,)X +1872(a)X +1926(bucket)X +2140(is)X +432 5513(allocated)N +727(for)X +846(the)X +969(directory)X +1264(at)X +1351(the)X +1474(time)X +1637(of)X +1732(\256le)X +1858(creation)X +2124(so)X +432 5593(although)N +707(the)X +818(directory)X +1100(splits)X +1274(logically,)X +1566(physical)X +1828(splits)X +2002(do)X +2096(not)X +432 5673(occur)N +610(until)X +760(the)X +866(\256le)X +976(becomes)X +1246(quite)X +1408(large.)X +0 f +8 s +2994 538 -0.4219(directory)AN +3374(=)X +3450 -0.3971(double\(directory\);)AX +2706 626(})N +2706 714 -0.3958(splitbucket\(bucket,)AN +3466(newbl\))X +2706 802(...)N +2418 890(})N +2 f +10 s +3169 1255(hsearch)N +1 f +2590 1387(Since)N +2 f +2807(hsearch)X +1 f +3100(does)X +3286(not)X +3427(have)X +3617(to)X +3717(translate)X +4027(hash)X +2418 1475(values)N +2659(into)X +2819(disk)X +2988(addresses,)X +3352(it)X +3432(can)X +3579(use)X +3721(much)X +3934(simpler)X +2418 1563(algorithms)N +2808(than)X +2994(those)X +3211(de\256ned)X +3495(above.)X +3775(System)X +4058(V's)X +2 f +2418 1651(hsearch)N +1 f +2708(constructs)X +3069(a)X +3141(\256xed-size)X +3489(hash)X +3671(table)X +3862(\(speci\256ed)X +2418 1739(by)N +2519(the)X +2637(user)X +2791(at)X +2869(table)X +3045(creation\).)X +3391(By)X +3504(default,)X +3767(a)X +3823(multiplica-)X +2418 1827(tive)N +2570(hash)X +2748(function)X +3046(based)X +3260(on)X +3371(that)X +3522(described)X +3861(in)X +3954(Knuth,)X +2418 1915(Volume)N +2710(3,)X +2804(section)X +3065(6.4)X +3199([KNU68])X +3541(is)X +3628(used)X +3809(to)X +3905(obtain)X +4138(a)X +2418 2003(primary)N +2694(bucket)X +2930(address.)X +3233(If)X +3309(this)X +3446(bucket)X +3681(is)X +3755(full,)X +3907(a)X +3964(secon-)X +2418 2091(dary)N +2593(multiplicative)X +3069(hash)X +3248(value)X +3454(is)X +3538(computed)X +3885(to)X +3978(de\256ne)X +2418 2179(the)N +2542(probe)X +2751(interval.)X +3062(The)X +3213(probe)X +3422(interval)X +3693(is)X +3772(added)X +3989(to)X +4076(the)X +2418 2267(original)N +2712(bucket)X +2971(address)X +3257(\(modulo)X +3573(the)X +3716(table)X +3916(size\))X +4112(to)X +2418 2355(obtain)N +2658(a)X +2734(new)X +2908(bucket)X +3162(address.)X +3483(This)X +3665(process)X +3946(repeats)X +2418 2443(until)N +2588(an)X +2688(empty)X +2911(bucket)X +3148(is)X +3224(found.)X +3474(If)X +3551(no)X +3654(bucket)X +3891(is)X +3967(found,)X +2418 2531(an)N +2514(insertion)X +2814(fails)X +2972(with)X +3134(a)X +3190(``table)X +3420(full'')X +3605(condition.)X +2590 2645(The)N +2768(basic)X +2986(algorithm)X +3350(may)X +3541(be)X +3670(modi\256ed)X +4006(by)X +4138(a)X +2418 2733(number)N +2705(of)X +2813(compile)X +3112(time)X +3295(options)X +3571(available)X +3902(to)X +4005(those)X +2418 2821(users)N +2604(with)X +2767(AT&T)X +3006(source)X +3237(code.)X +3450(First,)X +3637(the)X +3756(package)X +4040(pro-)X +2418 2909(vides)N +2638(two)X +2809(options)X +3094(for)X +3238(hash)X +3435(functions.)X +3803(Users)X +4036(may)X +2418 2997(specify)N +2690(their)X +2877(own)X +3055(hash)X +3242(function)X +3549(by)X +3669(compiling)X +4032(with)X +2418 3085(``USCR'')N +2757(de\256ned)X +3016(and)X +3155(declaring)X +3477(and)X +3616(de\256ning)X +3901(the)X +4022(vari-)X +2418 3173(able)N +2 f +2578(hcompar)X +1 f +2863(,)X +2909(a)X +2971(function)X +3263(taking)X +3488(two)X +3633(string)X +3840(arguments)X +2418 3261(and)N +2560(returning)X +2880(an)X +2982(integer.)X +3271(Users)X +3480(may)X +3643(also)X +3797(request)X +4054(that)X +2418 3349(hash)N +2587(values)X +2814(be)X +2912(computed)X +3250(simply)X +3489(by)X +3590(taking)X +3811(the)X +3930(modulo)X +2418 3437(of)N +2521(key)X +2673(\(using)X +2909(division)X +3201(rather)X +3424(than)X +3597(multiplication)X +4080(for)X +2418 3525(hash)N +2589(value)X +2787(calculation\).)X +3230(If)X +3308(this)X +3447(technique)X +3783(is)X +3859(used,)X +4049(col-)X +2418 3613(lisions)N +2651(are)X +2775(resolved)X +3072(by)X +3176(scanning)X +3485(sequentially)X +3896(from)X +4076(the)X +2418 3701(selected)N +2702(bucket)X +2941(\(linear)X +3176(probing\).)X +3517(This)X +3684(option)X +3913(is)X +3991(avail-)X +2418 3789(able)N +2572(by)X +2672(de\256ning)X +2954(the)X +3072(variable)X +3351(``DIV'')X +3622(at)X +3700(compile)X +3978(time.)X +2590 3903(A)N +2720(second)X +3015(option,)X +3311(based)X +3565(on)X +3716(an)X +3863(algorithm)X +2418 3991(discovered)N +2787(by)X +2888(Richard)X +3163(P.)X +3248(Brent,)X +3466(rearranges)X +3822(the)X +3940(table)X +4116(at)X +2418 4079(the)N +2549(time)X +2724(of)X +2824(insertion)X +3137(in)X +3232(order)X +3434(to)X +3528(speed)X +3743(up)X +3855(retrievals.)X +2418 4167(The)N +2571(basic)X +2764(idea)X +2926(is)X +3007(to)X +3097(shorten)X +3361(long)X +3531(probe)X +3741(sequences)X +4094(by)X +2418 4255(lengthening)N +2833(short)X +3030(probe)X +3249(sequences.)X +3651(Once)X +3857(the)X +3991(probe)X +2418 4343(chain)N +2613(has)X +2741(exceeded)X +3062(some)X +3252(threshold)X +3571(\(Brent)X +3796(suggests)X +4087(2\),)X +2418 4431(we)N +2541(attempt)X +2809(to)X +2899(shuf\257e)X +3145(any)X +3289(colliding)X +3601(keys)X +3776(\(keys)X +3978(which)X +2418 4519(appeared)N +2734(in)X +2821(the)X +2944(probe)X +3152(sequence)X +3471(of)X +3562(the)X +3684(new)X +3842(key\).)X +4049(The)X +2418 4607(details)N +2652(of)X +2744(this)X +2884(key)X +3025(shuf\257ing)X +3333(can)X +3469(be)X +3569(found)X +3780(in)X +3866([KNU68])X +2418 4695(and)N +2576([BRE73].)X +2946(This)X +3129(algorithm)X +3481(may)X +3660(be)X +3777(obtained)X +4094(by)X +2418 4783(de\256ning)N +2700(the)X +2818(variable)X +3097(``BRENT'')X +3487(at)X +3565(compile)X +3843(time.)X +2590 4897(A)N +2698(third)X +2899(set)X +3038(of)X +3154(options,)X +3458(obtained)X +3783(by)X +3912(de\256ning)X +2418 4985(``CHAINED'',)N +2943(use)X +3086(linked)X +3321(lists)X +3484(to)X +3581(resolve)X +3848(collisions.)X +2418 5073(Either)N +2647(of)X +2747(the)X +2878(primary)X +3164(hash)X +3343(function)X +3642(described)X +3982(above)X +2418 5161(may)N +2584(be)X +2688(used,)X +2882(but)X +3011(all)X +3118(collisions)X +3451(are)X +3577(resolved)X +3876(by)X +3983(build-)X +2418 5249(ing)N +2554(a)X +2623(linked)X +2856(list)X +2986(of)X +3086(entries)X +3333(from)X +3522(the)X +3653(primary)X +3940(bucket.)X +2418 5337(By)N +2542(default,)X +2816(new)X +2981(entries)X +3226(will)X +3381(be)X +3488(added)X +3711(to)X +3804(a)X +3871(bucket)X +4116(at)X +2418 5425(the)N +2541(beginning)X +2886(of)X +2978(the)X +3101(bucket)X +3339(chain.)X +3577(However,)X +3916(compile)X +2418 5513(options)N +2706(``SORTUP'')X +3173(or)X +3293(``SORTDOWN'')X +3908(may)X +4098(be)X +2418 5601(speci\256ed)N +2723(to)X +2805(order)X +2995(the)X +3113(hash)X +3280(chains)X +3505(within)X +3729(each)X +3897(bucket.)X +3 f +432 5960(4)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +5 p +%%Page: 5 5 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +2 f +1444 538(dynahash)N +1 f +892 670(The)N +2 f +1054(dynahash)X +1 f +1398(library,)X +1669(written)X +1932(by)X +2048(Esmond)X +2346(Pitt,)X +720 758(implements)N +1183(Larson's)X +1554(linear)X +1827(hashing)X +2165(algorithm)X +720 846([LAR88])N +1097(with)X +1302(an)X +2 f +1440(hsearch)X +1 f +1756(compatible)X +2174(interface.)X +720 934(Intuitively,)N +1099(a)X +1161(hash)X +1334(table)X +1516(begins)X +1751(as)X +1844(a)X +1905(single)X +2121(bucket)X +2360(and)X +720 1022(grows)N +941(in)X +1028(generations,)X +1443(where)X +1665(a)X +1725(generation)X +2088(corresponds)X +720 1110(to)N +815(a)X +884(doubling)X +1201(in)X +1296(the)X +1427(size)X +1585(of)X +1685(the)X +1815(hash)X +1994(table.)X +2222(The)X +2379(0)X +2 f +7 s +1078(th)Y +10 s +1 f +720 1198(generation)N +1085(occurs)X +1321(as)X +1414(the)X +1538(table)X +1719(grows)X +1940(from)X +2121(one)X +2262(bucket)X +720 1286(to)N +814(two.)X +1006(In)X +1105(the)X +1235(next)X +1405(generation)X +1776(the)X +1906(table)X +2093(grows)X +2320(from)X +720 1374(two)N +862(to)X +946(four.)X +1122(During)X +1371(each)X +1541(generation,)X +1921(every)X +2121(bucket)X +2356(that)X +720 1462(existed)N +967(at)X +1045(the)X +1163(beginning)X +1503(of)X +1590(the)X +1708(generation)X +2067(is)X +2140(split.)X +892 1576(The)N +1041(table)X +1221(starts)X +1414(as)X +1505(a)X +1565(single)X +1780(bucket)X +2018(\(numbered)X +2389(0\),)X +720 1664(the)N +839(current)X +1088(split)X +1245(bucket)X +1479(is)X +1552(set)X +1661(to)X +1743(bucket)X +1977(0,)X +2057(and)X +2193(the)X +2311(max-)X +720 1752(imum)N +933(split)X +1097(point)X +1288(is)X +1368(set)X +1483(to)X +1571(twice)X +1771(the)X +1895(current)X +2149(split)X +2312(point)X +720 1840(\(0\).)N +863(When)X +1084(it)X +1157(is)X +1239(time)X +1410(for)X +1532(a)X +1596(bucket)X +1838(to)X +1928(split,)X +2113(the)X +2239(keys)X +2414(in)X +720 1928(the)N +872(current)X +1154(split)X +1345(bucket)X +1612(are)X +1764(divided)X +2057(between)X +2378(the)X +720 2016(current)N +981(split)X +1151(bucket)X +1397(and)X +1545(a)X +1613(new)X +1779(bucket)X +2025(whose)X +2262(bucket)X +720 2104(number)N +1000(is)X +1088(equal)X +1297(to)X +1394(1)X +1469(+)X +1549(current)X +1812(split)X +1984(bucket)X +2232(+)X +2311(max-)X +720 2192(imum)N +927(split)X +1085(point.)X +1310(We)X +1442(can)X +1574(determine)X +1915(which)X +2131(keys)X +2298(move)X +720 2280(to)N +807(the)X +929(new)X +1087(bucket)X +1325(by)X +1429(examining)X +1791(the)X +2 f +1913(n)X +7 s +1962 2248(th)N +10 s +1 f +2043 2280(bit)N +2151(of)X +2242(a)X +2302(key's)X +720 2368(hash)N +899(value)X +1105(where)X +1334(n)X +1406(is)X +1491(the)X +1620(generation)X +1990(number.)X +2306(After)X +720 2456(the)N +846(bucket)X +1088(at)X +1174(the)X +1300(maximum)X +1651(split)X +1815(point)X +2006(has)X +2140(been)X +2319(split,)X +720 2544(the)N +839(generation)X +1198(number)X +1463(is)X +1536(incremented,)X +1973(the)X +2091(current)X +2339(split)X +720 2632(point)N +908(is)X +985(set)X +1098(back)X +1274(to)X +1360(zero,)X +1543(and)X +1683(the)X +1805(maximum)X +2152(split)X +2312(point)X +720 2720(is)N +815(set)X +946(to)X +1050(the)X +1190(number)X +1477(of)X +1586(the)X +1725(last)X +1877(bucket)X +2132(in)X +2235(the)X +2374(\256le)X +720 2808(\(which)N +971(is)X +1052(equal)X +1253(to)X +1342(twice)X +1543(the)X +1668(old)X +1797(maximum)X +2148(split)X +2312(point)X +720 2896(plus)N +873(1\).)X +892 3010(To)N +1031(facilitate)X +1361(locating)X +1668(keys,)X +1884(we)X +2027(maintain)X +2356(two)X +720 3098(masks.)N +989(The)X +1143(low)X +1291(mask)X +1488(is)X +1569(equal)X +1771(to)X +1861(the)X +1987(maximum)X +2339(split)X +720 3186(bucket)N +967(and)X +1116(the)X +1247(high)X +1422(mask)X +1624(is)X +1710(equal)X +1917(to)X +2011(the)X +2141(next)X +2311(max-)X +720 3274(imum)N +931(split)X +1093(bucket.)X +1372(To)X +1486(locate)X +1703(a)X +1764(speci\256c)X +2033(key,)X +2193(we)X +2311(com-)X +720 3362(pute)N +881(a)X +940(32-bit)X +1154(hash)X +1324(value)X +1520(using)X +1715(a)X +1773(bit-randomizing)X +2311(algo-)X +720 3450(rithm)N +932(such)X +1118(as)X +1224(the)X +1361(one)X +1516(described)X +1862(in)X +1962([LAR88].)X +2334(This)X +720 3538(hash)N +893(value)X +1093(is)X +1172(then)X +1336(masked)X +1607(with)X +1775(the)X +1898(high)X +2065(mask.)X +2299(If)X +2378(the)X +720 3626(resulting)N +1026(number)X +1297(is)X +1376(greater)X +1626(than)X +1790(the)X +1913(maximum)X +2262(bucket)X +720 3714(in)N +823(the)X +962(table)X +1159(\(current)X +1455(split)X +1633(bucket)X +1888(+)X +1974(maximum)X +2339(split)X +720 3802(point\),)N +962(the)X +1091(hash)X +1269(value)X +1474(is)X +1558(masked)X +1834(with)X +2007(the)X +2136(low)X +2287(mask.)X +720 3890(In)N +825(either)X +1046(case,)X +1242(the)X +1377(result)X +1592(of)X +1696(the)X +1831(mask)X +2037(is)X +2127(the)X +2262(bucket)X +720 3978(number)N +989(for)X +1107(the)X +1229(given)X +1431(key.)X +1611(The)X +1759(algorithm)X +2093(below)X +2312(illus-)X +720 4066(trates)N +914(this)X +1049(process.)X +0 f +8 s +720 4365(h)N +796(=)X +872 -0.4038(calchash\(key\);)AX +720 4453(bucket)N +986(=)X +1062(h)X +1138(&)X +1214 -0.4167(high_mask;)AX +720 4541(if)N +834(\()X +910(bucket)X +1176(>)X +1252 -0.4167(max_bucket)AX +1670(\))X +1008 4629(bucket)N +1274(=)X +1350(h)X +1426(&)X +1502 -0.4219(low_mask;)AX +720 4717 -0.4018(return\(bucket\);)AN +1 f +10 s +892 5042(In)N +1013(order)X +1237(to)X +1353(decide)X +1617(when)X +1845(to)X +1961(split)X +2152(a)X +2242(bucket,)X +2 f +720 5130(dynahash)N +1 f +1050(uses)X +2 f +1210(controlled)X +1561(splitting)X +1 f +1822(.)X +1884(A)X +1964(hash)X +2133(table)X +2311(has)X +2440(a)X +720 5218(\256ll)N +837(factor)X +1054(which)X +1279(is)X +1361(expressed)X +1707(in)X +1798(terms)X +2004(of)X +2099(the)X +2225(average)X +720 5306(number)N +990(of)X +1082(keys)X +1253(in)X +1339(each)X +1511(bucket.)X +1789(Each)X +1974(time)X +2140(the)X +2262(table's)X +720 5394(total)N +885(number)X +1153(of)X +1243(keys)X +1413(divided)X +1676(by)X +1778(its)X +1875(number)X +2142(of)X +2231(buckets)X +720 5482(exceeds)N +995(this)X +1130(\256ll)X +1238(factor,)X +1466(a)X +1522(bucket)X +1756(is)X +1829(split.)X +2878 538(Since)N +3079(the)X +2 f +3200(hsearch)X +1 f +3477(create)X +3693(interface)X +3998(\()X +2 f +4025(hcreate)X +1 f +4266(\))X +4315(calls)X +2706 626(for)N +2842(an)X +2960(estimate)X +3269(of)X +3378(the)X +3518(\256nal)X +3702(size)X +3869(of)X +3978(the)X +4118(hash)X +4306(table)X +2706 714(\()N +2 f +2733(nelem)X +1 f +2925(\),)X +2 f +3007(dynahash)X +1 f +3349(uses)X +3522(this)X +3672(information)X +4085(to)X +4182(initialize)X +2706 802(the)N +2848(table.)X +3088(The)X +3257(initial)X +3486(number)X +3774(of)X +3884(buckets)X +4172(is)X +4268(set)X +4400(to)X +2 f +2706 890(nelem)N +1 f +2926(rounded)X +3217(to)X +3306(the)X +3431(next)X +3596(higher)X +3828(power)X +4056(of)X +4150(two.)X +4337(The)X +2706 978(current)N +2958(split)X +3118(point)X +3305(is)X +3381(set)X +3493(to)X +3578(0)X +3641(and)X +3780(the)X +3901(maximum)X +4248(bucket)X +2706 1066(and)N +2842(maximum)X +3186(split)X +3343(point)X +3527(are)X +3646(set)X +3755(to)X +3837(this)X +3972(rounded)X +4255(value.)X +3 f +3148 1220(The)N +3301(New)X +3473(Implementation)X +1 f +2878 1352(Our)N +3042(implementation)X +3583(is)X +3675(also)X +3842(based)X +4063(on)X +4181(Larson's)X +2706 1440(linear)N +2939(hashing)X +3238([LAR88])X +3582(algorithm)X +3943(as)X +4060(well)X +4248(as)X +4364(the)X +2 f +2706 1528(dynahash)N +1 f +3047(implementation.)X +3623(The)X +2 f +3782(dbm)X +1 f +3954(family)X +4197(of)X +4297(algo-)X +2706 1616(rithms)N +2942(decide)X +3184(dynamically)X +3612(which)X +3840(bucket)X +4085(to)X +4178(split)X +4346(and)X +2706 1704(when)N +2914(to)X +3010(split)X +3180(it)X +3257(\(when)X +3491(it)X +3568(over\257ows\))X +3944(while)X +2 f +4155(dynahash)X +1 f +2706 1792(splits)N +2933(in)X +3054(a)X +3149(prede\256ned)X +3547(order)X +3776(\(linearly\))X +4134(and)X +4309(at)X +4426(a)X +2706 1880(prede\256ned)N +3116(time)X +3328(\(when)X +3599(the)X +3767(table)X +3993(\256ll)X +4151(factor)X +4409(is)X +2706 1968(exceeded\).)N +3121(We)X +3280(use)X +3434(a)X +3517(hybrid)X +3773(of)X +3887(these)X +4099(techniques.)X +2706 2056(Splits)N +2913(occur)X +3118(in)X +3206(the)X +3330(prede\256ned)X +3695(order)X +3891(of)X +3984(linear)X +4193(hashing,)X +2706 2144(but)N +2845(the)X +2980(time)X +3159(at)X +3253(which)X +3485(pages)X +3704(are)X +3839(split)X +4012(is)X +4101(determined)X +2706 2232(both)N +2869(by)X +2970(page)X +3143(over\257ows)X +3480(\()X +2 f +3507(uncontrolled)X +3937(splitting)X +1 f +4198(\))X +4246(and)X +4382(by)X +2706 2320(exceeding)N +3052(the)X +3170(\256ll)X +3278(factor)X +3486(\()X +2 f +3513(controlled)X +3862(splitting)X +1 f +4123(\))X +2878 2434(A)N +2962(hash)X +3135(table)X +3317(is)X +3395(parameterized)X +3876(by)X +3981(both)X +4148(its)X +4248(bucket)X +2706 2522(size)N +2904(\()X +2 f +2931(bsize)X +1 f +(\))S +3191(and)X +3380(\256ll)X +3541(factor)X +3801(\()X +2 f +3828(ffactor)X +1 f +4041(\).)X +4180(Whereas)X +2 f +2706 2610(dynahash's)N +1 f +3095(buckets)X +3364(can)X +3500(be)X +3599(represented)X +3993(as)X +4083(a)X +4142(linked)X +4365(list)X +2706 2698(of)N +2798(elements)X +3108(in)X +3195(memory,)X +3507(our)X +3639(package)X +3928(needs)X +4136(to)X +4222(support)X +2706 2786(disk)N +2874(access,)X +3135(and)X +3286(must)X +3476(represent)X +3806(buckets)X +4086(in)X +4183(terms)X +4395(of)X +2706 2874(pages.)N +2955(The)X +2 f +3106(bsize)X +1 f +3291(is)X +3369(the)X +3492(size)X +3642(\(in)X +3756(bytes\))X +3977(of)X +4069(these)X +4259(pages.)X +2706 2962(As)N +2833(in)X +2933(linear)X +3154(hashing,)X +3461(the)X +3597(number)X +3879(of)X +3983(buckets)X +4265(in)X +4364(the)X +2706 3050(table)N +2906(is)X +3003(equal)X +3221(to)X +3327(the)X +3469(number)X +3758(of)X +3869(keys)X +4060(in)X +4165(the)X +4306(table)X +2706 3138(divided)N +2988(by)X +2 f +3110(ffactor)X +1 f +3323(.)X +2 f +8 s +3113(6)Y +1 f +10 s +3417 3138(The)N +3584(controlled)X +3950(splitting)X +4252(occurs)X +2706 3226(each)N +2878(time)X +3044(the)X +3166(number)X +3435(of)X +3526(keys)X +3697(in)X +3783(the)X +3905(table)X +4085(exceeds)X +4364(the)X +2706 3314(\256ll)N +2814(factor)X +3022(multiplied)X +3370(by)X +3470(the)X +3588(number)X +3853(of)X +3940(buckets.)X +2878 3428(Inserting)N +3187(keys)X +3358(and)X +3498(splitting)X +3783(buckets)X +4051(is)X +4127(performed)X +2706 3516(precisely)N +3018(as)X +3107(described)X +3437(previously)X +3796(for)X +2 f +3911(dynahash)X +1 f +4218(.)X +4279(How-)X +2706 3604(ever,)N +2897(since)X +3094(buckets)X +3371(are)X +3502(now)X +3671(comprised)X +4036(of)X +4134(pages,)X +4368(we)X +2706 3692(must)N +2883(be)X +2981(prepared)X +3284(to)X +3367(handle)X +3602(cases)X +3793(where)X +4011(the)X +4130(size)X +4276(of)X +4364(the)X +2706 3780(keys)N +2873(and)X +3009(data)X +3163(in)X +3245(a)X +3301(bucket)X +3535(exceed)X +3779(the)X +3897(bucket)X +4131(size.)X +3 f +3318 3934(Over\257ow)N +3654(Pages)X +1 f +2878 4066(There)N +3095(are)X +3223(two)X +3372(cases)X +3571(where)X +3797(a)X +3862(key)X +4007(may)X +4174(not)X +4305(\256t)X +4400(in)X +2706 4154(its)N +2802(designated)X +3166(bucket.)X +3441(In)X +3529(the)X +3647(\256rst)X +3791(case,)X +3970(the)X +4088(total)X +4250(size)X +4395(of)X +2706 4242(the)N +2833(key)X +2978(and)X +3123(data)X +3286(may)X +3453(exceed)X +3706(the)X +3833(bucket)X +4076(size.)X +4269(In)X +4364(the)X +2706 4330(second,)N +3008(addition)X +3328(of)X +3453(a)X +3547(new)X +3739(key)X +3913(could)X +4149(cause)X +4386(an)X +2706 4418(over\257ow,)N +3068(but)X +3227(the)X +3382(bucket)X +3652(in)X +3770(question)X +4097(is)X +4206(not)X +4364(yet)X +2706 4506(scheduled)N +3049(to)X +3133(be)X +3230(split.)X +3428(In)X +3516(existing)X +3790(implementations,)X +4364(the)X +2706 4594(second)N +2953(case)X +3115(never)X +3317(arises)X +3523(\(since)X +3738(buckets)X +4006(are)X +4128(split)X +4288(when)X +2706 4682(they)N +2871(over\257ow\))X +3210(and)X +3352(the)X +3476(\256rst)X +3626(case)X +3791(is)X +3870(not)X +3998(handled)X +4278(at)X +4362(all.)X +2706 4770(Although)N +3036(large)X +3225(key/data)X +3525(pair)X +3678(handling)X +3986(is)X +4066(dif\256cult)X +4346(and)X +2706 4858(expensive,)N +3083(it)X +3163(is)X +3252(essential.)X +3604(In)X +3706(a)X +3777(linear)X +3995(hashed)X +4253(imple-)X +2706 4946(mentation,)N +3087(over\257ow)X +3413(pages)X +3636(are)X +3775(required)X +4083(for)X +4217(buckets)X +2706 5034(which)N +2935(over\257ow)X +3253(before)X +3492(they)X +3662(are)X +3793(split,)X +3982(so)X +4085(we)X +4211(can)X +4355(use)X +2706 5122(the)N +2833(same)X +3027(mechanism)X +3421(for)X +3544(large)X +3734(key/data)X +4035(pairs)X +4220(that)X +4368(we)X +2706 5210(use)N +2837(for)X +2955(over\257ow)X +3264(pages.)X +3511(Logically,)X +3862(we)X +3980(chain)X +4177(over\257ow)X +16 s +2706 5353 MXY +864 0 Dl +2 f +8 s +2746 5408(6)N +1 f +9 s +2801 5433(This)N +2952(is)X +3023(not)X +3138(strictly)X +3361(true.)X +3532(The)X +3667(\256le)X +3782(does)X +3937(not)X +4052(contract)X +4306(when)X +2706 5513(keys)N +2861(are)X +2972(deleted,)X +3221(so)X +3308(the)X +3419(number)X +3662(of)X +3744(buckets)X +3986(is)X +4056(actually)X +4306(equal)X +2706 5593(to)N +2782(the)X +2890(maximum)X +3202(number)X +3441(of)X +3520(keys)X +3671(ever)X +3814(present)X +4041(in)X +4116(the)X +4223(table)X +4382(di-)X +2706 5673(vided)N +2884(by)X +2974(the)X +3080(\256ll)X +3178(factor.)X +3 f +10 s +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(5)X + +6 p +%%Page: 6 6 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +432 538(pages)N +639(to)X +725(the)X +847(buckets)X +1116(\(also)X +1296(called)X +1512(primary)X +1789(pages\).)X +2062(In)X +2152(a)X +432 626(memory)N +730(based)X +943(representation,)X +1448(over\257ow)X +1763(pages)X +1976(do)X +2086(not)X +432 714(pose)N +628(any)X +792(special)X +1063(problems)X +1409(because)X +1712(we)X +1854(can)X +2014(chain)X +432 802(over\257ow)N +776(pages)X +1017(to)X +1137(primary)X +1449(pages)X +1690(using)X +1921(memory)X +432 890(pointers.)N +776(However,)X +1137(mapping)X +1463(these)X +1674(over\257ow)X +2005(pages)X +432 978(into)N +584(a)X +648(disk)X +809(\256le)X +939(is)X +1019(more)X +1211(of)X +1305(a)X +1368(challenge,)X +1723(since)X +1915(we)X +2036(need)X +432 1066(to)N +547(be)X +675(able)X +861(to)X +975(address)X +1268(both)X +1462(bucket)X +1728(pages,)X +1983(whose)X +432 1154(numbers)N +729(are)X +849(growing)X +1137(linearly,)X +1422(and)X +1558(some)X +1747(indeterminate)X +432 1242(number)N +715(of)X +820(over\257ow)X +1143(pages)X +1364(without)X +1646(reorganizing)X +2090(the)X +432 1330(\256le.)N +604 1444(One)N +789(simple)X +1053(solution)X +1361(would)X +1612(be)X +1739(to)X +1852(allocate)X +2152(a)X +432 1532(separate)N +737(\256le)X +880(for)X +1015(over\257ow)X +1341(pages.)X +1604(The)X +1769(disadvantage)X +432 1620(with)N +605(such)X +783(a)X +850(technique)X +1193(is)X +1276(that)X +1426(it)X +1500(requires)X +1789(an)X +1895(extra)X +2086(\256le)X +432 1708(descriptor,)N +794(an)X +891(extra)X +1073(system)X +1316(call)X +1453(on)X +1554(open)X +1731(and)X +1867(close,)X +2072(and)X +432 1796(logically)N +739(associating)X +1122(two)X +1269(independent)X +1687(\256les.)X +1886(For)X +2023(these)X +432 1884(reasons,)N +728(we)X +857(wanted)X +1123(to)X +1219(map)X +1391(both)X +1567(primary)X +1855(pages)X +2072(and)X +432 1972(over\257ow)N +737(pages)X +940(into)X +1084(the)X +1202(same)X +1387(\256le)X +1509(space.)X +604 2086(The)N +799(buddy-in-waiting)X +1425(algorithm)X +1806(provides)X +2152(a)X +432 2174(mechanism)N +851(to)X +966(support)X +1259(multiple)X +1578(pages)X +1814(per)X +1970(logical)X +432 2262(bucket)N +685(while)X +902(retaining)X +1226(the)X +1362(simple)X +1613(split)X +1788(sequence)X +2121(of)X +432 2350(linear)N +681(hashing.)X +1015(Over\257ow)X +1383(pages)X +1631(are)X +1795(preallocated)X +432 2438(between)N +781(generations)X +1232(of)X +1379(primary)X +1713(pages.)X +1996(These)X +432 2526(over\257ow)N +759(pages)X +984(are)X +1125(used)X +1314(by)X +1436(any)X +1594(bucket)X +1850(containing)X +432 2614(more)N +646(keys)X +842(than)X +1029(\256t)X +1144(on)X +1273(the)X +1420(primary)X +1723(page)X +1924(and)X +2089(are)X +432 2702(reclaimed,)N +808(if)X +896(possible,)X +1217(when)X +1430(the)X +1567(bucket)X +1819(later)X +2000(splits.)X +432 2790(Figure)N +687(3)X +773(depicts)X +1045(the)X +1188(layout)X +1433(of)X +1545(primary)X +1844(pages)X +2072(and)X +432 2878(over\257ow)N +752(pages)X +970(within)X +1209(the)X +1342(same)X +1542(\256le.)X +1699(Over\257ow)X +2036(page)X +432 2966(use)N +586(information)X +1011(is)X +1111(recorded)X +1440(in)X +1548(bitmaps)X +1847(which)X +2089(are)X +432 3054(themselves)N +819(stored)X +1046(on)X +1157(over\257ow)X +1472(pages.)X +1725(The)X +1880(addresses)X +432 3142(of)N +520(the)X +639(bitmap)X +882(pages)X +1086(and)X +1223(the)X +1342(number)X +1608(of)X +1695(pages)X +1898(allocated)X +432 3230(at)N +515(each)X +688(split)X +850(point)X +1039(are)X +1163(stored)X +1384(in)X +1470(the)X +1592(\256le)X +1718(header.)X +1997(Using)X +432 3318(this)N +577(information,)X +1005(both)X +1177(over\257ow)X +1492(addresses)X +1829(and)X +1974(bucket)X +432 3406(addresses)N +764(can)X +900(be)X +999(mapped)X +1276(to)X +1361(disk)X +1517(addresses)X +1848(by)X +1951(the)X +2072(fol-)X +432 3494(lowing)N +674(calculation:)X +0 f +8 s +432 3793(int)N +736(bucket;)X +1192(/*)X +1306(bucket)X +1572(address)X +1876(*/)X +432 3881(u_short)N +736(oaddr;)X +1192(/*)X +1306(OVERFLOW)X +1648(address)X +1952(*/)X +432 3969(int)N +736 -0.4125(nhdr_pages;)AX +1192(/*)X +1306(npages)X +1572(in)X +1686 -112.4062(\256le)AX +1838(header)X +2104(*/)X +432 4057(int)N +736 -0.4125(spares[32];)AX +1192(/*)X +1306(npages)X +1572(at)X +1686(each)X +1876(split)X +2104(*/)X +432 4145(int)N +736(log2\(\);)X +1198(/*)X +1312(ceil\(log)X +1654(base)X +1844(2\))X +1958(*/)X +432 4321(#DEFINE)N +736 -0.3929(BUCKET_TO_PAGE\(bucket\))AX +1610(\\)X +584 4409(bucket)N +850(+)X +926 -0.4167(nhdr_pages)AX +1344(+)X +1420(\\)X +584 4497 -0.3894(\(bucket?spares[logs2\(bucket)AN +1648(+)X +1724(1\)-1]:0\))X +432 4673(#DEFINE)N +736 -0.3947(OADDR_TO_PAGE\(oaddr\))AX +1534(\\)X +584 4761 -0.3984(BUCKET_TO_PAGE\(\(1)AN +1268(<<)X +1382 -0.4091(\(oaddr>>11\)\))AX +1876(-)X +1952(1\))X +2066(+)X +2142(\\)X +584 4849(oaddr)N +812(&)X +888(0x7ff;)X +1 f +10 s +604 5262(An)N +728(over\257ow)X +1039(page)X +1217(is)X +1295(addressed)X +1637(by)X +1742(its)X +1842(split)X +2004(point,)X +432 5350(identifying)N +858(the)X +1031(generations)X +1476(between)X +1819(which)X +2090(the)X +432 5438(over\257ow)N +740(page)X +915(is)X +991(allocated,)X +1324(and)X +1463(its)X +1561(page)X +1736(number,)X +2023(iden-)X +432 5526(tifying)N +665(the)X +783(particular)X +1111(page)X +1283(within)X +1507(the)X +1625(split)X +1782(point.)X +1986(In)X +2073(this)X +432 5614(implementation,)N +983(offsets)X +1225(within)X +1457(pages)X +1668(are)X +1795(16)X +1903(bits)X +2046(long)X +432 5702(\(limiting)N +732(the)X +851(maximum)X +1196(page)X +1368(size)X +1513(to)X +1595(32K\),)X +1800(so)X +1891(we)X +2005(select)X +2418 538(an)N +2535(over\257ow)X +2860(page)X +3052(addressing)X +3435(algorithm)X +3786(that)X +3946(can)X +4098(be)X +2418 626(expressed)N +2760(in)X +2847(16)X +2952(bits)X +3091(and)X +3231(which)X +3451(allows)X +3684(quick)X +3886(retrieval.)X +2418 714(The)N +2568(top)X +2695(\256ve)X +2840(bits)X +2980(indicate)X +3258(the)X +3380(split)X +3541(point)X +3729(and)X +3869(the)X +3991(lower)X +2418 802(eleven)N +2650(indicate)X +2926(the)X +3046(page)X +3220(number)X +3487(within)X +3713(the)X +3832(split)X +3990(point.)X +2418 890(Since)N +2633(\256ve)X +2789(bits)X +2940(are)X +3075(reserved)X +3384(for)X +3514(the)X +3648(split)X +3821(point,)X +4041(\256les)X +2418 978(may)N +2578(split)X +2737(32)X +2839(times)X +3034(yielding)X +3318(a)X +3376(maximum)X +3721(\256le)X +3844(size)X +3990(of)X +4078(2)X +7 s +946(32)Y +10 s +2418 1066(buckets)N +2698(and)X +2849(32)X +2 f +(*)S +1 f +2982(2)X +7 s +1034(11)Y +10 s +3113 1066(over\257ow)N +3433(pages.)X +3691(The)X +3850(maximum)X +2418 1154(page)N +2597(size)X +2749(is)X +2829(2)X +7 s +1122(15)Y +10 s +1154(,)Y +2971(yielding)X +3259(a)X +3321(maximum)X +3671(\256le)X +3799(size)X +3950(greater)X +2418 1242(than)N +2601(131,000)X +2906(GB)X +3061(\(on)X +3212(\256le)X +3358(systems)X +3655(supporting)X +4041(\256les)X +2418 1330(larger)N +2626(than)X +2784(4GB\).)X +10 f +2418 1418 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 Dt +4014 2275 MXY +0 133 Dl +3881 2275 MXY +0 133 Dl +3748 2275 MXY +0 133 Dl +3083 2275 MXY +0 133 Dl +5 s +1 f +3523 2475(2/3)N +3390(2/2)X +3257(2/1)X +2859(1/2)X +2726(1/1)X +5 Dt +3814 1743 MXY +0 133 Dl +3282 1743 MXY +0 133 Dl +3017 1743 MXY +0 133 Dl +2884 1743 MXY +0 133 Dl +1 Dt +3681 1743 MXY +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3548 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3415 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3282 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3150 MX +0 133 Dl +132 0 Dl +0 -133 Dl +-132 0 Dl +3017 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +2884 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3 f +8 s +3017 2601(Over\257ow)N +3285(Addresses)X +3515 2833(Over\257ow)N +3783(Pages)X +2850(Buckets)X +1 Di +3349 2740 MXY + 3349 2740 lineto + 3482 2740 lineto + 3482 2873 lineto + 3349 2873 lineto + 3349 2740 lineto +closepath 3 3349 2740 3482 2873 Dp +2684 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +5 Dt +4146 2275 MXY +0 133 Dl +3216 2275 MXY +0 133 Dl +2684 2275 MXY +0 133 Dl +2551 2275 MXY +0 133 Dl +1 f +3798 1963(3)N +3266 1980(2)N +3001(1)X +2868(0)X +1 Dt +2751 1743 MXY +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3548 2275 MXY +-15 -22 Dl +2 16 Dl +-13 11 Dl +26 -5 Dl +-282 -117 Dl +3432 2275 MXY +-10 -25 Dl +-2 16 Dl +-15 8 Dl +27 1 Dl +-166 -117 Dl +3282 2275 MXY +12 -25 Dl +-14 10 Dl +-15 -6 Dl +17 21 Dl +-16 -117 Dl +2884 2275 MXY +26 7 Dl +-12 -12 Dl +3 -16 Dl +-17 21 Dl +382 -117 Dl +2751 2275 MXY +25 9 Dl +-11 -12 Dl +5 -17 Dl +-19 20 Dl +515 -117 Dl +3 f +3070 2152(Over\257ow)N +3338(Pages)X +3482 2275 MXY + 3482 2275 lineto + 3615 2275 lineto + 3615 2408 lineto + 3482 2408 lineto + 3482 2275 lineto +closepath 3 3482 2275 3615 2408 Dp +3349 MX + 3349 2275 lineto + 3482 2275 lineto + 3482 2408 lineto + 3349 2408 lineto + 3349 2275 lineto +closepath 3 3349 2275 3482 2408 Dp +3216 MX + 3216 2275 lineto + 3349 2275 lineto + 3349 2408 lineto + 3216 2408 lineto + 3216 2275 lineto +closepath 3 3216 2275 3349 2408 Dp +2817 MX + 2817 2275 lineto + 2950 2275 lineto + 2950 2408 lineto + 2817 2408 lineto + 2817 2275 lineto +closepath 3 2817 2275 2950 2408 Dp +2684 MX + 2684 2275 lineto + 2817 2275 lineto + 2817 2408 lineto + 2684 2408 lineto + 2684 2275 lineto +closepath 3 2684 2275 2817 2408 Dp +3615 MX +0 133 Dl +531 0 Dl +0 -133 Dl +-531 0 Dl +2950 MX +0 133 Dl +266 0 Dl +0 -133 Dl +-266 0 Dl +2551 MX +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3798 1726 MXY +-21 -18 Dl +6 16 Dl +-10 13 Dl +25 -11 Dl +-599 -99 Dl +3266 1726 MXY +-1 -27 Dl +-7 15 Dl +-17 1 Dl +25 11 Dl +-67 -99 Dl +3033 1726 MXY +27 1 Dl +-14 -8 Dl +-1 -17 Dl +-12 24 Dl +166 -99 Dl +2900 1726 MXY +27 7 Dl +-13 -11 Dl +3 -17 Dl +-17 21 Dl +299 -99 Dl +3058 1621(Split)N +3203(Points)X +2418 2275 MXY +0 133 Dl +133 0 Dl +0 -133 Dl +-133 0 Dl +3 Dt +-1 Ds +3137(Figure)Y +2619(3:)X +1 f +2691(Split)X +2832(points)X +3008(occur)X +3168(between)X +3399(generations)X +3712(and)X +3823(are)X +3919(numbered)X +2418 3225(from)N +2560(0.)X +2642(In)X +2713(this)X +2824(\256gure)X +2991(there)X +3136(are)X +3231(two)X +3345(over\257ow)X +3590(pages)X +3753(allocated)X +4000(at)X +4063(split)X +2418 3313(point)N +2566(1)X +2614(and)X +2722(three)X +2865(allocated)X +3111(at)X +3173(split)X +3300(point)X +3448(2.)X +10 s +10 f +2418 3489 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +2949 3731(Buffer)N +3192(Management)X +1 f +2590 3863(The)N +2744(hash)X +2920(table)X +3105(is)X +3187(stored)X +3412(in)X +3502(memory)X +3797(as)X +3892(a)X +3956(logical)X +2418 3951(array)N +2633(of)X +2749(bucket)X +3012(pointers.)X +3359(Physically,)X +3761(the)X +3907(array)X +4121(is)X +2418 4039(arranged)N +2728(in)X +2818(segments)X +3144(of)X +3239(256)X +3387(pointers.)X +3713(Initially,)X +4013(there)X +2418 4127(is)N +2530(space)X +2767(to)X +2887(allocate)X +3195(256)X +3373(segments.)X +3769(Reallocation)X +2418 4215(occurs)N +2651(when)X +2847(the)X +2967(number)X +3234(of)X +3323(buckets)X +3590(exceeds)X +3867(32K)X +4027(\(256)X +2418 4303(*)N +2508(256\).)X +2745(Primary)X +3053(pages)X +3286(may)X +3473(be)X +3598(accessed)X +3929(directly)X +2418 4391(through)N +2711(the)X +2853(array)X +3062(by)X +3185(bucket)X +3442(number)X +3730(and)X +3889(over\257ow)X +2418 4479(pages)N +2628(are)X +2754 0.4028(referenced)AX +3122(logically)X +3429(by)X +3536(their)X +3710(over\257ow)X +4022(page)X +2418 4567(address.)N +2726(For)X +2864(small)X +3063(hash)X +3236(tables,)X +3469(it)X +3539(is)X +3618(desirable)X +3934(to)X +4022(keep)X +2418 4655(all)N +2525(pages)X +2735(in)X +2823(main)X +3009(memory)X +3302(while)X +3506(on)X +3612(larger)X +3826(tables,)X +4059(this)X +2418 4743(is)N +2523(probably)X +2860(impossible.)X +3298(To)X +3438(satisfy)X +3698(both)X +3891(of)X +4009(these)X +2418 4831(requirements,)N +2900(the)X +3041(package)X +3348(includes)X +3658(buffer)X +3897(manage-)X +2418 4919(ment)N +2598(with)X +2760(LRU)X +2940(\(least)X +3134(recently)X +3413(used\))X +3607(replacement.)X +2590 5033(By)N +2730(default,)X +3020(the)X +3165(package)X +3475(allocates)X +3802(up)X +3928(to)X +4036(64K)X +2418 5121(bytes)N +2616(of)X +2712(buffered)X +3014(pages.)X +3246(All)X +3377(pages)X +3589(in)X +3680(the)X +3807(buffer)X +4032(pool)X +2418 5209(are)N +2542(linked)X +2766(in)X +2852(LRU)X +3036(order)X +3230(to)X +3316(facilitate)X +3621(fast)X +3761(replacement.)X +2418 5297(Whereas)N +2724(ef\256cient)X +3011(access)X +3241(to)X +3327(primary)X +3605(pages)X +3812(is)X +3889(provided)X +2418 5385(by)N +2521(the)X +2642(bucket)X +2879(array,)X +3087(ef\256cient)X +3372(access)X +3600(to)X +3684(over\257ow)X +3991(pages)X +2418 5473(is)N +2501(provided)X +2816(by)X +2926(linking)X +3182(over\257ow)X +3497(page)X +3679(buffers)X +3936(to)X +4027(their)X +2418 5561(predecessor)N +2827(page)X +3008(\(either)X +3247(the)X +3374(primary)X +3657(page)X +3838(or)X +3933(another)X +2418 5649(over\257ow)N +2742(page\).)X +3000(This)X +3181(means)X +3425(that)X +3584(an)X +3699(over\257ow)X +4022(page)X +3 f +432 5960(6)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +7 p +%%Page: 7 7 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(cannot)N +955(be)X +1052(present)X +1305(in)X +1388(the)X +1507(buffer)X +1724(pool)X +1886(if)X +1955(its)X +2050(primary)X +2324(page)X +720 626(is)N +804(not)X +937(present.)X +1240(This)X +1413(does)X +1591(not)X +1724(impact)X +1972(performance)X +2409(or)X +720 714(functionality,)N +1209(because)X +1524(an)X +1660(over\257ow)X +2005(page)X +2217(will)X +2400(be)X +720 802(accessed)N +1048(only)X +1236(after)X +1430(its)X +1550(predecessor)X +1975(page)X +2172(has)X +2324(been)X +720 890(accessed.)N +1068(Figure)X +1303(4)X +1369(depicts)X +1622(the)X +1746(data)X +1905(structures)X +2242(used)X +2414(to)X +720 978(manage)N +990(the)X +1108(buffer)X +1325(pool.)X +892 1092(The)N +1040(in-memory)X +1419(bucket)X +1656(array)X +1845(contains)X +2134(pointers)X +2414(to)X +720 1180(buffer)N +975(header)X +1248(structures)X +1617(which)X +1870(represent)X +2222(primary)X +720 1268(pages.)N +968(Buffer)X +1203(headers)X +1474(contain)X +1735(modi\256ed)X +2043(bits,)X +2202(the)X +2324(page)X +720 1356(address)N +995(of)X +1096(the)X +1228(buffer,)X +1479(a)X +1548(pointer)X +1808(to)X +1903(the)X +2034(actual)X +2259(buffer,)X +720 1444(and)N +875(a)X +950(pointer)X +1216(to)X +1317(the)X +1454(buffer)X +1690(header)X +1944(for)X +2077(an)X +2191(over\257ow)X +720 1532(page)N +901(if)X +979(it)X +1052(exists,)X +1283(in)X +1374(addition)X +1665(to)X +1756(the)X +1883(LRU)X +2072(links.)X +2296(If)X +2378(the)X +720 1620(buffer)N +950(corresponding)X +1442(to)X +1537(a)X +1606(particular)X +1947(bucket)X +2194(is)X +2280(not)X +2414(in)X +720 1708(memory,)N +1048(its)X +1164(pointer)X +1432(is)X +1526(NULL.)X +1801(In)X +1909(effect,)X +2154(pages)X +2377(are)X +720 1796(linked)N +950(in)X +1042(three)X +1233(ways.)X +1468(Using)X +1689(the)X +1817(buffer)X +2043(headers,)X +2338(they)X +720 1884(are)N +851(linked)X +1083(physically)X +1444(through)X +1725(the)X +1854(LRU)X +2045(links)X +2231(and)X +2378(the)X +720 1972(over\257ow)N +1036(links.)X +1241(Using)X +1462(the)X +1590(pages)X +1803(themselves,)X +2209(they)X +2377(are)X +720 2060(linked)N +943(logically)X +1246(through)X +1518(the)X +1639(over\257ow)X +1946(addresses)X +2276(on)X +2378(the)X +720 2148(page.)N +948(Since)X +1162(over\257ow)X +1482(pages)X +1700(are)X +1834(accessed)X +2151(only)X +2328(after)X +720 2236(their)N +904(predecessor)X +1321(pages,)X +1560(they)X +1734(are)X +1869(removed)X +2186(from)X +2378(the)X +720 2324(buffer)N +937(pool)X +1099(when)X +1293(their)X +1460(primary)X +1734(is)X +1807(removed.)X +10 f +720 2412 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 Dt +2309 3177 MXY +24 15 Dl +-8 -15 Dl +8 -15 Dl +-24 15 Dl +52 0 Dl +789 3160 MXY +-35 0 Dl +0 -156 Dl +1607 0 Dl +0 173 Dl +789 3091 MXY +-24 -15 Dl +9 15 Dl +-9 15 Dl +24 -15 Dl +-69 0 Dl +2309 3125 MXY +104 0 Dl +0 -155 Dl +-1693 0 Dl +0 121 Dl +927 3160 MXY +24 15 Dl +-9 -15 Dl +9 -15 Dl +-24 15 Dl +553 0 Dl +1618 3177 MXY +8 27 Dl +4 -17 Dl +16 -6 Dl +-28 -4 Dl +138 121 Dl +1895 3315 MXY +28 3 Dl +-15 -9 Dl +1 -18 Dl +-14 24 Dl +276 -138 Dl +3108 MY +-28 -3 Dl +15 10 Dl +-1 17 Dl +14 -24 Dl +-276 138 Dl +1756 3229 MXY +-8 -27 Dl +-3 17 Dl +-16 6 Dl +27 4 Dl +-138 -121 Dl +1480 MX +-24 -15 Dl +9 15 Dl +-9 15 Dl +24 -15 Dl +-553 0 Dl +3 f +5 s +1083 3073(LRU)N +1178(chain)X +4 Ds +1402 3851 MXY + 1402 3851 lineto + 1471 3851 lineto + 1471 3920 lineto + 1402 3920 lineto + 1402 3851 lineto +closepath 19 1402 3851 1471 3920 Dp +1445 3747(Over\257ow)N +1613(Address)X +1549 3609 MXY +0 69 Dl +1756 MX +-23 -15 Dl +8 15 Dl +-8 15 Dl +23 -15 Dl +-207 0 Dl +-1 Ds +3 Dt +1756 3419 MXY +-6 -28 Dl +-4 17 Dl +-17 5 Dl +27 6 Dl +-138 -138 Dl +2240 3471 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +1826 3609 MXY +15 -24 Dl +-15 9 Dl +-16 -9 Dl +16 24 Dl +0 -138 Dl +1549 MX +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +858 3471 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +2240 3056 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +1549 3056 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +858 3056 MXY +15 -24 Dl +-15 9 Dl +-15 -9 Dl +15 24 Dl +0 -138 Dl +1 Dt +2171 3471 MXY + 2171 3471 lineto + 2448 3471 lineto + 2448 3609 lineto + 2171 3609 lineto + 2171 3471 lineto +closepath 19 2171 3471 2448 3609 Dp +1756 3609 MXY + 1756 3609 lineto + 2033 3609 lineto + 2033 3747 lineto + 1756 3747 lineto + 1756 3609 lineto +closepath 3 1756 3609 2033 3747 Dp +1480 3471 MXY + 1480 3471 lineto + 1756 3471 lineto + 1756 3609 lineto + 1480 3609 lineto + 1480 3471 lineto +closepath 19 1480 3471 1756 3609 Dp +789 MX + 789 3471 lineto + 1065 3471 lineto + 1065 3609 lineto + 789 3609 lineto + 789 3471 lineto +closepath 19 789 3471 1065 3609 Dp +962 3903(Buffer)N +1083(Header)X +849 3851 MXY + 849 3851 lineto + 918 3851 lineto + 918 3920 lineto + 849 3920 lineto + 849 3851 lineto +closepath 14 849 3851 918 3920 Dp +1756 3194 MXY + 1756 3194 lineto + 1895 3194 lineto + 1895 3471 lineto + 1756 3471 lineto + 1756 3194 lineto +closepath 14 1756 3194 1895 3471 Dp +2171 3056 MXY + 2171 3056 lineto + 2309 3056 lineto + 2309 3333 lineto + 2171 3333 lineto + 2171 3056 lineto +closepath 14 2171 3056 2309 3333 Dp +1480 MX + 1480 3056 lineto + 1618 3056 lineto + 1618 3333 lineto + 1480 3333 lineto + 1480 3056 lineto +closepath 14 1480 3056 1618 3333 Dp +789 MX + 789 3056 lineto + 927 3056 lineto + 927 3333 lineto + 789 3333 lineto + 789 3056 lineto +closepath 14 789 3056 927 3333 Dp +2780 MY +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +927 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1065 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1203 MX +0 138 Dl +139 0 Dl +0 -138 Dl +-139 0 Dl +1342 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1480 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1618 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +1756 MX +0 138 Dl +139 0 Dl +0 -138 Dl +-139 0 Dl +1895 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +2033 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +2171 MX +0 138 Dl +138 0 Dl +0 -138 Dl +-138 0 Dl +2309 MX +0 138 Dl +139 0 Dl +0 -138 Dl +-139 0 Dl +13 s +1048 2720(In)N +1173(Memory)X +1580(Bucket)X +1918(Array)X +867 3584(B0)N +1558(B5)X +2223(B10)X +1788 3722(O1/1)N +5 s +1515 3903(Primay)N +1651(Buffer)X +4 Ds +1990 3851 MXY + 1990 3851 lineto + 2059 3851 lineto + 2059 3920 lineto + 1990 3920 lineto + 1990 3851 lineto +closepath 3 1990 3851 2059 3920 Dp +2102 3903(Over\257ow)N +2270(Buffer)X +3 Dt +-1 Ds +8 s +720 4184(Figure)N +922(4:)X +1 f +996(Three)X +1164(primary)X +1386(pages)X +1551(\(B0,)X +1683(B5,)X +1794(B10\))X +1942(are)X +2039(accessed)X +2281(directly)X +720 4272(from)N +862(the)X +958(bucket)X +1146(array.)X +1326(The)X +1443(one)X +1553(over\257ow)X +1798(page)X +1935(\(O1/1\))X +2122(is)X +2182(linked)X +2359(phy-)X +720 4360(sically)N +915(from)X +1067(its)X +1155(primary)X +1384(page's)X +1577(buffer)X +1759(header)X +1955(as)X +2035(well)X +2172(as)X +2252(logically)X +720 4448(from)N +860(its)X +937(predecessor)X +1253(page)X +1389(buffer)X +1560(\(B5\).)X +10 s +10 f +720 4624 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +1191 4954(Table)N +1406(Parameterization)X +1 f +892 5086(When)N +1107(a)X +1166(hash)X +1336(table)X +1515(is)X +1590(created,)X +1865(the)X +1985(bucket)X +2221(size,)X +2388(\256ll)X +720 5174(factor,)N +953(initial)X +1164(number)X +1434(of)X +1526(elements,)X +1856(number)X +2125(of)X +2216(bytes)X +2409(of)X +720 5262(main)N +919(memory)X +1225(used)X +1411(for)X +1543(caching,)X +1851(and)X +2005(a)X +2079(user-de\256ned)X +720 5350(hash)N +892(function)X +1184(may)X +1347(be)X +1448(speci\256ed.)X +1797(The)X +1946(bucket)X +2184(size)X +2333(\(and)X +720 5438(page)N +906(size)X +1064(for)X +1191(over\257ow)X +1509(pages\))X +1752(defaults)X +2039(to)X +2134(256)X +2287(bytes.)X +720 5526(For)N +858(tables)X +1072(with)X +1241(large)X +1429(data)X +1590(items,)X +1810(it)X +1881(may)X +2046(be)X +2149(preferable)X +720 5614(to)N +803(increase)X +1088(the)X +1207(page)X +1380(size,)X +1545(and,)X +1701(conversely,)X +2089(applications)X +720 5702(storing)N +1002(small)X +1235(items)X +1467(exclusively)X +1891(in)X +2012(memory)X +2338(may)X +2706 538(bene\256t)N +2966(from)X +3164(a)X +3242(smaller)X +3520(bucket)X +3776(size.)X +3983(A)X +4082(bucket)X +4337(size)X +2706 626(smaller)N +2962(than)X +3120(64)X +3220(bytes)X +3409(is)X +3482(not)X +3604(recommended.)X +2878 740(The)N +3031(\256ll)X +3147(factor)X +3363(indicates)X +3676(a)X +3740(desired)X +4000(density)X +4258(within)X +2706 828(the)N +2833(hash)X +3009(table.)X +3234(It)X +3312(is)X +3394(an)X +3499(approximation)X +3995(of)X +4091(the)X +4217(number)X +2706 916(of)N +2815(keys)X +3004(allowed)X +3300(to)X +3404(accumulate)X +3811(in)X +3914(any)X +4071(one)X +4228(bucket,)X +2706 1004(determining)N +3119(when)X +3319(the)X +3442(hash)X +3614(table)X +3795(grows.)X +4056(Its)X +4161(default)X +4409(is)X +2706 1092(eight.)N +2953(If)X +3054(the)X +3199(user)X +3380(knows)X +3636(the)X +3781(average)X +4079(size)X +4251(of)X +4364(the)X +2706 1180(key/data)N +3008(pairs)X +3194(being)X +3402(stored)X +3627(in)X +3718(the)X +3845(table,)X +4050(near)X +4218(optimal)X +2706 1268(bucket)N +2943(sizes)X +3122(and)X +3261(\256ll)X +3372(factors)X +3614(may)X +3775(be)X +3874(selected)X +4155(by)X +4257(apply-)X +2706 1356(ing)N +2828(the)X +2946(equation:)X +0 f +8 s +2706 1655(\(1\))N +2994 -0.3938(\(\(average_pair_length)AX +3830(+)X +3906(4\))X +4020(*)X +3032 1743(ffactor\))N +3374(>=)X +3488(bsize)X +1 f +10 s +2706 2042(For)N +2859(highly)X +3104(time)X +3287(critical)X +3551(applications,)X +3999(experimenting)X +2706 2130(with)N +2919(different)X +3266(bucket)X +3550(sizes)X +3776(and)X +3962(\256ll)X +4120(factors)X +4409(is)X +2706 2218(encouraged.)N +2878 2332(Figures)N +3144(5a,b,)X +3326(and)X +3468(c)X +3530(illustrate)X +3836(the)X +3960(effects)X +4200(of)X +4292(vary-)X +2706 2420(ing)N +2841(page)X +3026(sizes)X +3215(and)X +3363(\256ll)X +3483(factors)X +3734(for)X +3860(the)X +3990(same)X +4187(data)X +4353(set.)X +2706 2508(The)N +2864(data)X +3031(set)X +3152(consisted)X +3482(of)X +3581(24474)X +3813(keys)X +3992(taken)X +4198(from)X +4386(an)X +2706 2596(online)N +2931(dictionary.)X +3301(The)X +3451(data)X +3609(value)X +3807(for)X +3925(each)X +4097(key)X +4237(was)X +4386(an)X +2706 2684(ASCII)N +2938(string)X +3143(for)X +3260(an)X +3359(integer)X +3605(from)X +3784(1)X +3847(to)X +3931(24474)X +4153(inclusive.)X +2706 2772(The)N +2867(test)X +3013(run)X +3155(consisted)X +3488(of)X +3590(creating)X +3884(a)X +3955(new)X +4124(hash)X +4306(table)X +2706 2860(\(where)N +2966(the)X +3100(ultimate)X +3398(size)X +3559(of)X +3662(the)X +3796(table)X +3987(was)X +4147(known)X +4400(in)X +2706 2948(advance\),)N +3054(entering)X +3354(each)X +3539(key/data)X +3848(pair)X +4010(into)X +4171(the)X +4306(table)X +2706 3036(and)N +2849(then)X +3014(retrieving)X +3353(each)X +3528(key/data)X +3827(pair)X +3979(from)X +4162(the)X +4286(table.)X +2706 3124(Each)N +2898(of)X +2996(the)X +3125(graphs)X +3369(shows)X +3599(the)X +3727(timings)X +3996(resulting)X +4306(from)X +2706 3212(varying)N +2973(the)X +3093(pagesize)X +3392(from)X +3570(128)X +3712(bytes)X +3903(to)X +3986(1M)X +4118(and)X +4255(the)X +4374(\256ll)X +2706 3300(factor)N +2929(from)X +3120(1)X +3195(to)X +3292(128.)X +3486(For)X +3631(each)X +3813(run,)X +3974(the)X +4106(buffer)X +4337(size)X +2706 3388(was)N +2874(set)X +3006(at)X +3106(1M.)X +3299(The)X +3466(tests)X +3650(were)X +3849(all)X +3971(run)X +4120(on)X +4242(an)X +4360(HP)X +2706 3476(9000/370)N +3077(\(33.3)X +3312(Mhz)X +3527(MC68030\),)X +3966(with)X +4176(16M)X +4395(of)X +2706 3564(memory,)N +3042(64K)X +3228(physically)X +3605(addressed)X +3970(cache,)X +4222(and)X +4386(an)X +2706 3652(HP7959S)N +3055(disk)X +3231(drive,)X +3459(running)X +3751(4.3BSD-Reno)X +4244(single-)X +2706 3740(user.)N +2878 3854(Both)N +3066(system)X +3321(time)X +3496(\(Figure)X +3764(5a\))X +3899(and)X +4047(elapsed)X +4320(time)X +2706 3942(\(Figure)N +2966(5b\))X +3097(show)X +3290(that)X +3434(for)X +3552(all)X +3655(bucket)X +3892(sizes,)X +4091(the)X +4212(greatest)X +2706 4030(performance)N +3137(gains)X +3329(are)X +3451(made)X +3648(by)X +3751(increasing)X +4104(the)X +4225(\256ll)X +4336(fac-)X +2706 4118(tor)N +2822(until)X +2995(equation)X +3298(1)X +3365(is)X +3445(satis\256ed.)X +3774(The)X +3925(user)X +4085(time)X +4253(shown)X +2706 4206(in)N +2791(Figure)X +3023(5c)X +3122(gives)X +3314(a)X +3373(more)X +3561(detailed)X +3838(picture)X +4083(of)X +4172(how)X +4332(per-)X +2706 4294(formance)N +3054(varies.)X +3330(The)X +3499(smaller)X +3778(bucket)X +4035(sizes)X +4234(require)X +2706 4382(fewer)N +2921(keys)X +3099(per)X +3233(page)X +3416(to)X +3509(satisfy)X +3749(equation)X +4056(1)X +4127(and)X +4274(there-)X +2706 4470(fore)N +2860(incur)X +3049(fewer)X +3257(collisions.)X +3607(However,)X +3946(when)X +4144(the)X +4265(buffer)X +2706 4558(pool)N +2884(size)X +3045(is)X +3134(\256xed,)X +3349(smaller)X +3620(pages)X +3838(imply)X +4059(more)X +4259(pages.)X +2706 4646(An)N +2830(increased)X +3160(number)X +3430(of)X +3522(pages)X +3730(means)X +3960(more)X +2 f +4150(malloc\(3\))X +1 f +2706 4734(calls)N +2879(and)X +3021(more)X +3212(overhead)X +3533(in)X +3621(the)X +3745(hash)X +3918(package's)X +4265(buffer)X +2706 4822(manager)N +3003(to)X +3085(manage)X +3355(the)X +3473(additional)X +3813(pages.)X +2878 4936(The)N +3028(tradeoff)X +3308(works)X +3529(out)X +3655(most)X +3834(favorably)X +4166(when)X +4364(the)X +2706 5024(page)N +2886(size)X +3039(is)X +3120(256)X +3268(and)X +3412(the)X +3538(\256ll)X +3654(factor)X +3870(is)X +3950(8.)X +4057(Similar)X +4319(con-)X +2706 5112(clusions)N +3009(were)X +3207(obtained)X +3524(if)X +3614(the)X +3753(test)X +3905(was)X +4071(run)X +4218(without)X +2706 5200(knowing)N +3007(the)X +3126(\256nal)X +3289(table)X +3466(size)X +3612(in)X +3695(advance.)X +4020(If)X +4095(the)X +4214(\256le)X +4337(was)X +2706 5288(closed)N +2942(and)X +3088(written)X +3345(to)X +3437(disk,)X +3620(the)X +3748(conclusions)X +4156(were)X +4343(still)X +2706 5376(the)N +2832(same.)X +3065(However,)X +3408(rereading)X +3740(the)X +3865(\256le)X +3994(from)X +4177(disk)X +4337(was)X +2706 5464(slightly)N +2983(faster)X +3199(if)X +3285(a)X +3358(larger)X +3583(bucket)X +3834(size)X +3996(and)X +4149(\256ll)X +4274(factor)X +2706 5552(were)N +2898(used)X +3079(\(1K)X +3238(bucket)X +3486(size)X +3645(and)X +3795(32)X +3909(\256ll)X +4031(factor\).)X +4320(This)X +2706 5640(follows)N +2987(intuitively)X +3356(from)X +3553(the)X +3691(improved)X +4038(ef\256ciency)X +4395(of)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(7)X + +8 p +%%Page: 8 8 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +432 538(performing)N +830(1K)X +965(reads)X +1172(from)X +1365(the)X +1500(disk)X +1670(rather)X +1894(than)X +2068(256)X +432 626(byte)N +609(reads.)X +857(In)X +962(general,)X +1257(performance)X +1702(for)X +1834(disk)X +2005(based)X +432 714(tables)N +639(is)X +712(best)X +861(when)X +1055(the)X +1173(page)X +1345(size)X +1490(is)X +1563(approximately)X +2046(1K.)X +10 f +432 802 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +619 2380 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +629 2437 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +648 2504 MXY +-12 25 Dl +24 0 Dl +-12 -25 Dl +686 2515 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +762 2516 MXY +-12 24 Dl +25 0 Dl +-13 -24 Dl +916 2515 MXY +-13 24 Dl +25 0 Dl +-12 -24 Dl +1222 2516 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +1834 2515 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +1 Dt +619 2392 MXY +10 57 Dl +19 67 Dl +38 11 Dl +76 1 Dl +154 -1 Dl +306 1 Dl +612 -1 Dl +8 s +1 f +1628 2522(128)N +3 Dt +607 2245 MXY +24 Dc +617 2375 MXY +23 Dc +635 2442 MXY +24 Dc +674 2525 MXY +23 Dc +750 2529 MXY +24 Dc +904 2527 MXY +23 Dc +1210 MX +23 Dc +1822 2528 MXY +23 Dc +20 Ds +1 Dt +619 2245 MXY +10 130 Dl +19 67 Dl +38 83 Dl +76 4 Dl +154 -2 Dl +306 0 Dl +612 1 Dl +678 2482(256)N +-1 Ds +3 Dt +619 2127 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +629 2191 MXY +0 25 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +648 2334 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +686 2409 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +762 2516 MXY +0 25 Dl +0 -12 Dl +13 0 Dl +-25 0 Dl +916 2516 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-25 0 Dl +1222 2515 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1834 2515 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +5 Dt +619 2139 MXY +10 65 Dl +19 142 Dl +38 75 Dl +76 108 Dl +154 -1 Dl +306 -1 Dl +612 0 Dl +694 2401(512)N +3 Dt +631 2064 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +641 2077 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +660 2132 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +698 2292 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +775 2382 MXY +-25 24 Dl +12 -12 Dl +-12 -12 Dl +25 24 Dl +928 2516 MXY +-25 24 Dl +13 -12 Dl +-13 -12 Dl +25 24 Dl +1234 2516 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +1846 2516 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +16 Ds +1 Dt +619 2076 MXY +10 14 Dl +19 54 Dl +38 160 Dl +76 90 Dl +154 134 Dl +306 1 Dl +612 -1 Dl +694 2257(1024)N +-1 Ds +3 Dt +619 1877 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +629 1855 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +648 1838 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +686 1860 MXY +12 -25 Dl +-24 0 Dl +12 25 Dl +762 1923 MXY +13 -24 Dl +-25 0 Dl +12 24 Dl +916 2087 MXY +12 -24 Dl +-25 0 Dl +13 24 Dl +1222 2256 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +1834 2541 MXY +12 -25 Dl +-24 0 Dl +12 25 Dl +619 1865 MXY +10 -22 Dl +19 -17 Dl +38 21 Dl +76 64 Dl +154 164 Dl +306 169 Dl +612 285 Dl +1645 2427(4096)N +619 1243 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +629 1196 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +648 1146 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +686 1174 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +762 1249 MXY +0 24 Dl +0 -12 Dl +13 0 Dl +-25 0 Dl +916 1371 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-25 0 Dl +1222 1680 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1834 1999 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +619 1255 MXY +10 -47 Dl +19 -50 Dl +38 28 Dl +76 75 Dl +154 122 Dl +306 309 Dl +612 319 Dl +1741 1934(8192)N +5 Dt +609 2531 MXY +1225 0 Dl +609 MX +0 -1553 Dl +2531 MY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +593 2625(0)N +-1 Ds +5 Dt +916 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +884 2625(32)N +-1 Ds +5 Dt +1222 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +1190 2625(64)N +-1 Ds +5 Dt +1528 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +1496 2625(96)N +-1 Ds +5 Dt +1834 2531 MXY +0 16 Dl +4 Ds +1 Dt +2531 MY +0 -1553 Dl +1786 2625(128)N +-1 Ds +5 Dt +609 2531 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +545 2558(0)N +-1 Ds +5 Dt +609 2013 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +481 2040(100)N +-1 Ds +5 Dt +609 1496 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +481 1523(200)N +-1 Ds +5 Dt +609 978 MXY +-16 0 Dl +4 Ds +1 Dt +609 MX +1225 0 Dl +481 1005(300)N +1088 2724(Fill)N +1194(Factor)X +422 1611(S)N +426 1667(e)N +426 1724(c)N +424 1780(o)N +424 1837(n)N +424 1893(d)N +428 1949(s)N +3 Dt +-1 Ds +3 f +432 2882(Figure)N +636(5a:)X +1 f +744(System)X +956(Time)X +1113(for)X +1209(dictionary)X +1490(data)X +1618(set)X +1711(with)X +1847(1M)X +1958(of)X +2033(buffer)X +432 2970(space)N +594(and)X +707(varying)X +923(bucket)X +1114(sizes)X +1259(and)X +1372(\256ll)X +1465(factors.)X +1675(Each)X +1823(line)X +1940(is)X +2004(labeled)X +432 3058(with)N +562(its)X +639(bucket)X +825(size.)X +10 s +10 f +432 3234 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +8 s +1 f +428 4381(s)N +424 4325(d)N +424 4269(n)N +424 4212(o)N +426 4156(c)N +426 4099(e)N +422 4043(S)N +1116 5156(Fill)N +1222(Factor)X +506 3437(3200)N +4 Ds +1 Dt +666 3410 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +506 3825(2400)N +4 Ds +1 Dt +666 3799 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +506 4214(1600)N +4 Ds +1 Dt +666 4186 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +538 4602(800)N +4 Ds +1 Dt +666 4575 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +602 4990(0)N +4 Ds +1 Dt +666 4963 MXY +1168 0 Dl +-1 Ds +5 Dt +666 MX +-16 0 Dl +1786 5057(128)N +4 Ds +1 Dt +1834 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +1510 5057(96)N +4 Ds +1 Dt +1542 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +1218 5057(64)N +4 Ds +1 Dt +1250 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +926 5057(32)N +4 Ds +1 Dt +958 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +650 5057(0)N +4 Ds +1 Dt +666 4963 MXY +0 -1553 Dl +-1 Ds +5 Dt +4963 MY +0 16 Dl +4963 MY +0 -1553 Dl +4963 MY +1168 0 Dl +1741 4752(8192)N +3 Dt +675 3732 MXY +9 -172 Dl +18 -118 Dl +37 128 Dl +73 -121 Dl +146 623 Dl +292 497 Dl +584 245 Dl +4802 MY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1250 4557 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +958 4060 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +812 3437 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +739 3558 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +702 3430 MXY +0 25 Dl +0 -13 Dl +13 0 Dl +-25 0 Dl +684 3548 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +675 3720 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1637 4912(4096)N +675 4307 MXY +9 -58 Dl +18 30 Dl +37 89 Dl +73 144 Dl +146 235 Dl +292 122 Dl +584 89 Dl +4970 MY +12 -24 Dl +-24 0 Dl +12 24 Dl +1250 4881 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +958 4759 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +812 4524 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +739 4380 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +702 4291 MXY +13 -24 Dl +-25 0 Dl +12 24 Dl +684 4261 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +675 4319 MXY +12 -24 Dl +-24 0 Dl +12 24 Dl +734 4662(1024)N +16 Ds +1 Dt +675 4352 MXY +9 60 Dl +18 134 Dl +37 266 Dl +73 117 Dl +146 30 Dl +292 0 Dl +584 -1 Dl +-1 Ds +3 Dt +1846 4946 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +1262 4946 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +970 4947 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +824 4917 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +751 4800 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +715 4534 MXY +-25 25 Dl +12 -13 Dl +-12 -12 Dl +25 25 Dl +696 4400 MXY +-24 24 Dl +12 -12 Dl +-12 -12 Dl +24 24 Dl +687 4339 MXY +-24 25 Dl +12 -12 Dl +-12 -13 Dl +24 25 Dl +718 4792(512)N +5 Dt +675 4422 MXY +9 137 Dl +18 278 Dl +37 105 Dl +73 18 Dl +146 -1 Dl +292 0 Dl +584 -1 Dl +3 Dt +4946 MY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +1250 4946 MXY +0 25 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +958 4947 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +812 4948 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +739 4930 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +702 4824 MXY +0 25 Dl +0 -12 Dl +13 0 Dl +-25 0 Dl +684 4547 MXY +0 24 Dl +0 -12 Dl +12 0 Dl +-24 0 Dl +675 4410 MXY +0 25 Dl +0 -13 Dl +12 0 Dl +-24 0 Dl +750 4921(256)N +20 Ds +1 Dt +675 4597 MXY +9 246 Dl +18 106 Dl +37 10 Dl +73 0 Dl +146 0 Dl +292 0 Dl +584 -1 Dl +-1 Ds +3 Dt +1822 MX +23 Dc +1238 4959 MXY +23 Dc +946 MX +23 Dc +800 MX +23 Dc +727 MX +23 Dc +691 4949 MXY +23 Dc +672 4843 MXY +24 Dc +663 4597 MXY +24 Dc +1395 4961(128)N +1 Dt +675 4855 MXY +9 93 Dl +18 10 Dl +37 1 Dl +73 0 Dl +146 -1 Dl +292 0 Dl +584 0 Dl +3 Dt +4946 MY +-12 24 Dl +24 0 Dl +-12 -24 Dl +1250 MX +-12 24 Dl +24 0 Dl +-12 -24 Dl +958 MX +-12 24 Dl +24 0 Dl +-12 -24 Dl +812 MX +-12 25 Dl +24 0 Dl +-12 -25 Dl +739 4947 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +702 4946 MXY +-12 24 Dl +25 0 Dl +-13 -24 Dl +684 4936 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +675 4843 MXY +-12 24 Dl +24 0 Dl +-12 -24 Dl +3 Dt +-1 Ds +3 f +432 5314(Figure)N +634(5b:)X +1 f +744(Elapsed)X +967(Time)X +1123(for)X +1218(dictionary)X +1498(data)X +1625(set)X +1717(with)X +1851(1M)X +1960(of)X +2033(buffer)X +432 5402(space)N +593(and)X +705(varying)X +920(bucket)X +1110(sizes)X +1254(and)X +1366(\256ll)X +1457(factors.)X +1681(Each)X +1827(line)X +1942(is)X +2004(labeled)X +432 5490(with)N +562(its)X +639(bucket)X +825(size.)X +10 s +2590 538(If)N +2677(an)X +2785(approximation)X +3284(of)X +3383(the)X +3513(number)X +3790(of)X +3889(elements)X +2418 626(ultimately)N +2773(to)X +2866(be)X +2973(stored)X +3200(in)X +3293(the)X +3422(hash)X +3599(table)X +3785(is)X +3868(known)X +4116(at)X +2418 714(the)N +2564(time)X +2754(of)X +2869(creation,)X +3196(the)X +3342(hash)X +3536(package)X +3847(takes)X +4059(this)X +2418 802(number)N +2688(as)X +2779(a)X +2839(parameter)X +3185(and)X +3325(uses)X +3487(it)X +3555(to)X +3641(hash)X +3812(entries)X +4050(into)X +2418 890(the)N +2541(full)X +2677(sized)X +2867(table)X +3048(rather)X +3261(than)X +3424(growing)X +3716(the)X +3838(table)X +4018(from)X +2418 978(a)N +2477(single)X +2691(bucket.)X +2968(If)X +3044(this)X +3181(number)X +3448(is)X +3523(not)X +3647(known,)X +3907(the)X +4027(hash)X +2418 1066(table)N +2632(starts)X +2859(with)X +3059(a)X +3153(single)X +3402(bucket)X +3674(and)X +3848(gracefully)X +2418 1154(expands)N +2707(as)X +2800(elements)X +3111(are)X +3236(added,)X +3474(although)X +3780(a)X +3842(slight)X +4044(per-)X +2418 1242(formance)N +2747(degradation)X +3151(may)X +3313(be)X +3413(noticed.)X +3713(Figure)X +3946(6)X +4010(illus-)X +2418 1330(trates)N +2625(the)X +2756(difference)X +3116(in)X +3211(performance)X +3651(between)X +3952(storing)X +2418 1418(keys)N +2588(in)X +2673(a)X +2732(\256le)X +2857(when)X +3054(the)X +3174(ultimate)X +3458(size)X +3605(is)X +3680(known)X +3920(\(the)X +4067(left)X +2418 1506(bars)N +2581(in)X +2672(each)X +2849(set\),)X +3014(compared)X +3360(to)X +3450(building)X +3744(the)X +3870(\256le)X +4000(when)X +2418 1594(the)N +2550(ultimate)X +2846(size)X +3005(is)X +3091(unknown)X +3422(\(the)X +3580(right)X +3764(bars)X +3931(in)X +4026(each)X +2418 1682(set\).)N +2609(Once)X +2814(the)X +2947(\256ll)X +3069(factor)X +3291(is)X +3378(suf\256ciently)X +3772(high)X +3948(for)X +4076(the)X +2418 1770(page)N +2596(size)X +2747(\(8\),)X +2887(growing)X +3180(the)X +3304(table)X +3486(dynamically)X +3908(does)X +4081(lit-)X +2418 1858(tle)N +2518(to)X +2600(degrade)X +2875(performance.)X +10 f +2418 1946 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +9 s +1 f +2413 3238(s)N +2409 3173(d)N +2409 3108(n)N +2409 3043(o)N +2411 2979(c)N +2411 2914(e)N +2407 2849(S)N +3143 4129(Fill)N +3261(Factor)X +2448 2152(15)N +4 Ds +1 Dt +2557 2122 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +2448 2747(10)N +4 Ds +1 Dt +2557 2717 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +2484 3343(5)N +4 Ds +1 Dt +2557 3313 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +2484 3938(0)N +4 Ds +1 Dt +2557 3908 MXY +1473 0 Dl +-1 Ds +5 Dt +2557 MX +-19 0 Dl +3976 4015(128)N +4 Ds +1 Dt +4030 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +3626 4015(96)N +4 Ds +1 Dt +3662 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +3258 4015(64)N +4 Ds +1 Dt +3294 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +2889 4015(32)N +4 Ds +1 Dt +2925 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +2539 4015(0)N +4 Ds +1 Dt +2557 3908 MXY +0 -1786 Dl +-1 Ds +5 Dt +3908 MY +0 19 Dl +3908 MY +0 -1786 Dl +3908 MY +1473 0 Dl +4053 2378(8192)N +3 Dt +2569 2277 MXY +11 0 Dl +23 48 Dl +46 -167 Dl +92 35 Dl +184 12 Dl +369 143 Dl +736 0 Dl +2334 MY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +3294 2334 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +2925 2192 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2741 2180 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2649 2144 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2603 2311 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-28 0 Dl +2580 2263 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2569 2263 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +4053 2591(4096)N +2569 2348 MXY +11 -11 Dl +23 -96 Dl +46 71 Dl +92 72 Dl +184 226 Dl +369 48 Dl +736 -60 Dl +2612 MY +14 -28 Dl +-28 0 Dl +14 28 Dl +3294 2672 MXY +13 -28 Dl +-27 0 Dl +14 28 Dl +2925 2624 MXY +14 -28 Dl +-28 0 Dl +14 28 Dl +2741 2398 MXY +14 -28 Dl +-28 0 Dl +14 28 Dl +2649 2326 MXY +14 -27 Dl +-28 0 Dl +14 27 Dl +2603 2255 MXY +14 -28 Dl +-28 0 Dl +14 28 Dl +2580 2350 MXY +14 -27 Dl +-28 0 Dl +14 27 Dl +2569 2362 MXY +13 -28 Dl +-27 0 Dl +14 28 Dl +4053 2681(1024)N +16 Ds +1 Dt +2569 2300 MXY +11 48 Dl +23 96 Dl +46 95 Dl +92 274 Dl +184 202 Dl +369 -155 Dl +736 -190 Dl +-1 Ds +3 Dt +4044 2656 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +3307 2846 MXY +-27 28 Dl +14 -14 Dl +-14 -14 Dl +27 28 Dl +2939 3001 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2755 2799 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2663 2525 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2617 2430 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2594 2334 MXY +-28 28 Dl +14 -14 Dl +-14 -14 Dl +28 28 Dl +2582 2287 MXY +-27 27 Dl +14 -14 Dl +-14 -13 Dl +27 27 Dl +4053 2851(512)N +5 Dt +2569 2372 MXY +11 -24 Dl +23 405 Dl +46 83 Dl +92 227 Dl +184 -72 Dl +369 -119 Dl +736 -107 Dl +3 Dt +2751 MY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +3294 2858 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +2925 2977 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2741 3049 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-28 0 Dl +2649 2823 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2603 2739 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2580 2334 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2569 2358 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +4053 2795(256)N +20 Ds +1 Dt +2569 2456 MXY +11 285 Dl +23 95 Dl +46 251 Dl +92 -60 Dl +184 -84 Dl +369 -107 Dl +736 -71 Dl +-1 Ds +3 Dt +4016 MX +27 Dc +3280 2836 MXY +27 Dc +2912 2943 MXY +27 Dc +2728 3027 MXY +27 Dc +2635 3087 MXY +28 Dc +2589 2836 MXY +28 Dc +2566 2741 MXY +27 Dc +2554 2456 MXY +28 Dc +4053 2741(128)N +1 Dt +2569 2729 MXY +11 203 Dl +23 131 Dl +46 -60 Dl +92 -119 Dl +184 -60 Dl +369 -83 Dl +736 -12 Dl +3 Dt +2716 MY +-14 27 Dl +28 0 Dl +-14 -27 Dl +3294 2727 MXY +-14 28 Dl +27 0 Dl +-13 -28 Dl +2925 2811 MXY +-14 27 Dl +28 0 Dl +-14 -27 Dl +2741 2870 MXY +-14 28 Dl +28 0 Dl +-14 -28 Dl +2649 2989 MXY +-14 28 Dl +28 0 Dl +-14 -28 Dl +2603 3049 MXY +-14 27 Dl +28 0 Dl +-14 -27 Dl +2580 2918 MXY +-14 28 Dl +28 0 Dl +-14 -28 Dl +2569 2716 MXY +-14 27 Dl +27 0 Dl +-13 -27 Dl +3 Dt +-1 Ds +3 f +8 s +2418 4286(Figure)N +2628(5c:)X +1 f +2738(User)X +2887(Time)X +3051(for)X +3154(dictionary)X +3442(data)X +3577(set)X +3677(with)X +3820(1M)X +3938(of)X +4019(buffer)X +2418 4374(space)N +2579(and)X +2691(varying)X +2906(bucket)X +3096(sizes)X +3240(and)X +3352(\256ll)X +3443(factors.)X +3667(Each)X +3813(line)X +3928(is)X +3990(labeled)X +2418 4462(with)N +2548(its)X +2625(bucket)X +2811(size.)X +10 s +10 f +2418 4638 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +2590 4840(Since)N +2796(no)X +2904(known)X +3150(hash)X +3325(function)X +3620(performs)X +3938(equally)X +2418 4928(well)N +2589(on)X +2702(all)X +2815(possible)X +3110(data,)X +3297(the)X +3428(user)X +3595(may)X +3766(\256nd)X +3923(that)X +4076(the)X +2418 5016(built-in)N +2678(hash)X +2849(function)X +3140(does)X +3311(poorly)X +3544(on)X +3648(a)X +3708(particular)X +4040(data)X +2418 5104(set.)N +2548(In)X +2636(this)X +2771(case,)X +2950(a)X +3006(hash)X +3173(function,)X +3480(taking)X +3700(two)X +3840(arguments)X +2418 5192(\(a)N +2507(pointer)X +2760(to)X +2848(a)X +2910(byte)X +3074(string)X +3282(and)X +3424(a)X +3486(length\))X +3739(and)X +3880(returning)X +2418 5280(an)N +2517(unsigned)X +2829(long)X +2993(to)X +3077(be)X +3175(used)X +3344(as)X +3433(the)X +3553(hash)X +3722(value,)X +3938(may)X +4098(be)X +2418 5368(speci\256ed)N +2731(at)X +2817(hash)X +2992(table)X +3176(creation)X +3463(time.)X +3673(When)X +3893(an)X +3996(exist-)X +2418 5456(ing)N +2570(hash)X +2767(table)X +2973(is)X +3076(opened)X +3358(and)X +3524(a)X +3609(hash)X +3805(function)X +4121(is)X +2418 5544(speci\256ed,)N +2752(the)X +2879(hash)X +3054(package)X +3346(will)X +3498(try)X +3615(to)X +3705(determine)X +4054(that)X +2418 5632(the)N +2546(hash)X +2723(function)X +3020(supplied)X +3321(is)X +3404(the)X +3532(one)X +3678(with)X +3850(which)X +4076(the)X +2418 5720(table)N +2630(was)X +2811(created.)X +3139(There)X +3382(are)X +3536(a)X +3627(variety)X +3905(of)X +4027(hash)X +3 f +432 5960(8)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +9 p +%%Page: 9 9 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(functions)N +1065(provided)X +1397(with)X +1586(the)X +1731(package.)X +2082(The)X +2253(default)X +720 626(function)N +1014(for)X +1135(the)X +1260(package)X +1551(is)X +1631(the)X +1755(one)X +1897(which)X +2119(offered)X +2378(the)X +720 714(best)N +875(performance)X +1308(in)X +1396(terms)X +1600(of)X +1693(cycles)X +1920(executed)X +2232(per)X +2360(call)X +720 802(\(it)N +827(did)X +965(not)X +1103(produce)X +1398(the)X +1531(fewest)X +1776(collisions)X +2117(although)X +2432(it)X +720 890(was)N +866(within)X +1091(a)X +1148(small)X +1341(percentage)X +1710(of)X +1797(the)X +1915(function)X +2202(that)X +2342(pro-)X +720 978(duced)N +947(the)X +1080(fewest)X +1324(collisions\).)X +1731(Again,)X +1981(in)X +2077(time)X +2253(critical)X +720 1066(applications,)N +1152(users)X +1342(are)X +1466(encouraged)X +1862(to)X +1949(experiment)X +2334(with)X +720 1154(a)N +783(variety)X +1032(of)X +1125(hash)X +1298(functions)X +1622(to)X +1710(achieve)X +1982(optimal)X +2252(perfor-)X +720 1242(mance.)N +10 f +720 1330 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +7 s +1038 2925(Full)N +1149(size)X +1251(table)X +1384(\(left\))X +1547 2718(Fill)N +1643(Factor)X +2268 2662(64)N +1964(32)X +1674(16)X +1384(8)X +1093(4)X +4 Ds +1 Dt +900 2280 MXY +1548 0 Dl +900 1879 MXY +1548 0 Dl +900 1506 MXY +1548 0 Dl +1563 2902 MXY +111 0 Dl +-1 Ds +900 MX +110 0 Dl +1425 2828(System)N +983(User)X +1895 2778 MXY + 1895 2778 lineto + 1950 2778 lineto + 1950 2833 lineto + 1895 2833 lineto + 1895 2778 lineto +closepath 21 1895 2778 1950 2833 Dp +1342 MX + 1342 2778 lineto + 1397 2778 lineto + 1397 2833 lineto + 1342 2833 lineto + 1342 2778 lineto +closepath 14 1342 2778 1397 2833 Dp +900 MX + 900 2778 lineto + 955 2778 lineto + 955 2833 lineto + 900 2833 lineto + 900 2778 lineto +closepath 3 900 2778 955 2833 Dp +5 Dt +2283 2211 MXY +96 0 Dl +1992 MX +97 0 Dl +1702 MX +97 0 Dl +1411 2252 MXY +97 0 Dl +4 Ds +1 Dt +2283 2211 MXY + 2283 2211 lineto + 2379 2211 lineto + 2379 2252 lineto + 2283 2252 lineto + 2283 2211 lineto +closepath 14 2283 2211 2379 2252 Dp +1992 MX + 1992 2211 lineto + 2089 2211 lineto + 2089 2252 lineto + 1992 2252 lineto + 1992 2211 lineto +closepath 14 1992 2211 2089 2252 Dp +1702 MX + 1702 2211 lineto + 1799 2211 lineto + 1799 2252 lineto + 1702 2252 lineto + 1702 2211 lineto +closepath 14 1702 2211 1799 2252 Dp +1411 2252 MXY + 1411 2252 lineto + 1508 2252 lineto + 1508 2294 lineto + 1411 2294 lineto + 1411 2252 lineto +closepath 14 1411 2252 1508 2294 Dp +2283 MX + 2283 2252 lineto + 2379 2252 lineto + 2379 2612 lineto + 2283 2612 lineto + 2283 2252 lineto +closepath 3 2283 2252 2379 2612 Dp +1992 MX + 1992 2252 lineto + 2089 2252 lineto + 2089 2612 lineto + 1992 2612 lineto + 1992 2252 lineto +closepath 3 1992 2252 2089 2612 Dp +1702 MX + 1702 2252 lineto + 1799 2252 lineto + 1799 2612 lineto + 1702 2612 lineto + 1702 2252 lineto +closepath 3 1702 2252 1799 2612 Dp +1411 2294 MXY + 1411 2294 lineto + 1508 2294 lineto + 1508 2612 lineto + 1411 2612 lineto + 1411 2294 lineto +closepath 3 1411 2294 1508 2612 Dp +-1 Ds +2158 2238 MXY + 2158 2238 lineto + 2255 2238 lineto + 2255 2252 lineto + 2158 2252 lineto + 2158 2238 lineto +closepath 21 2158 2238 2255 2252 Dp +1868 MX + 1868 2238 lineto + 1965 2238 lineto + 1965 2280 lineto + 1868 2280 lineto + 1868 2238 lineto +closepath 21 1868 2238 1965 2280 Dp +1577 MX + 1577 2238 lineto + 1674 2238 lineto + 1674 2308 lineto + 1577 2308 lineto + 1577 2238 lineto +closepath 21 1577 2238 1674 2308 Dp +1287 2308 MXY + 1287 2308 lineto + 1287 2280 lineto + 1384 2280 lineto + 1384 2308 lineto + 1287 2308 lineto +closepath 21 1287 2280 1384 2308 Dp +2158 2280 MXY + 2158 2280 lineto + 2158 2252 lineto + 2255 2252 lineto + 2255 2280 lineto + 2158 2280 lineto +closepath 14 2158 2252 2255 2280 Dp +1868 2308 MXY + 1868 2308 lineto + 1868 2280 lineto + 1965 2280 lineto + 1965 2308 lineto + 1868 2308 lineto +closepath 14 1868 2280 1965 2308 Dp +1577 2335 MXY + 1577 2335 lineto + 1577 2308 lineto + 1674 2308 lineto + 1674 2335 lineto + 1577 2335 lineto +closepath 14 1577 2308 1674 2335 Dp +1287 2363 MXY + 1287 2363 lineto + 1287 2308 lineto + 1384 2308 lineto + 1384 2363 lineto + 1287 2363 lineto +closepath 14 1287 2308 1384 2363 Dp +2158 2280 MXY + 2158 2280 lineto + 2255 2280 lineto + 2255 2612 lineto + 2158 2612 lineto + 2158 2280 lineto +closepath 3 2158 2280 2255 2612 Dp +1868 2308 MXY + 1868 2308 lineto + 1965 2308 lineto + 1965 2612 lineto + 1868 2612 lineto + 1868 2308 lineto +closepath 3 1868 2308 1965 2612 Dp +1577 2335 MXY + 1577 2335 lineto + 1674 2335 lineto + 1674 2612 lineto + 1577 2612 lineto + 1577 2335 lineto +closepath 3 1577 2335 1674 2612 Dp +1287 2363 MXY + 1287 2363 lineto + 1384 2363 lineto + 1384 2612 lineto + 1287 2612 lineto + 1287 2363 lineto +closepath 3 1287 2363 1384 2612 Dp +4 Ds +1121 2066 MXY + 1121 2066 lineto + 1218 2066 lineto + 1224 2080 lineto + 1127 2080 lineto + 1121 2066 lineto +closepath 21 1121 2066 1224 2080 Dp +2080 MY + 1121 2080 lineto + 1218 2080 lineto + 1218 2273 lineto + 1121 2273 lineto + 1121 2080 lineto +closepath 14 1121 2080 1218 2273 Dp +2273 MY + 1121 2273 lineto + 1218 2273 lineto + 1218 2612 lineto + 1121 2612 lineto + 1121 2273 lineto +closepath 3 1121 2273 1218 2612 Dp +-1 Ds +997 1589 MXY + 997 1589 lineto + 1093 1589 lineto + 1093 1644 lineto + 997 1644 lineto + 997 1589 lineto +closepath 21 997 1589 1093 1644 Dp +1644 MY + 997 1644 lineto + 1093 1644 lineto + 1093 2280 lineto + 997 2280 lineto + 997 1644 lineto +closepath 14 997 1644 1093 2280 Dp +2280 MY + 997 2280 lineto + 1093 2280 lineto + 1093 2612 lineto + 997 2612 lineto + 997 2280 lineto +closepath 3 997 2280 1093 2612 Dp +10 s +719 2093(s)N +712 2037(d)N +712 1982(n)N +714 1927(o)N +716 1872(c)N +716 1816(e)N +712 1761(S)N +804 2286(10)N +804 1899(20)N +804 1540(30)N +3 Dt +900 1506 MXY +0 1106 Dl +1548 0 Dl +7 s +1978 2828(Elapsed)N +1701 2925(Dynamically)N +2018(grown)X +2184(table)X +2317(\(right\))X +3 Dt +-1 Ds +8 s +720 3180(Figure)N +934(6:)X +1 f +1020(The)X +1152(total)X +1299(regions)X +1520(indicate)X +1755(the)X +1865(difference)X +2154(between)X +2398(the)X +720 3268(elapsed)N +931(time)X +1065(and)X +1177(the)X +1275(sum)X +1402(of)X +1475(the)X +1573(system)X +1771(and)X +1883(user)X +2008(time.)X +2173(The)X +2291(left)X +2395(bar)X +720 3356(of)N +798(each)X +939(set)X +1035(depicts)X +1241(the)X +1344(timing)X +1537(of)X +1615(the)X +1718(test)X +1831(run)X +1940(when)X +2102(the)X +2204(number)X +2423(of)X +720 3444(entries)N +910(is)X +973(known)X +1167(in)X +1237(advance.)X +1496(The)X +1614(right)X +1754(bars)X +1879(depict)X +2054(the)X +2151(timing)X +2338(when)X +720 3532(the)N +814(\256le)X +912(is)X +971(grown)X +1150(from)X +1290(a)X +1334(single)X +1503(bucket.)X +10 s +10 f +720 3708 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +1 f +892 3910(Since)N +1131(this)X +1307(hashing)X +1617(package)X +1942(provides)X +2279(buffer)X +720 3998(management,)N +1188(the)X +1323(amount)X +1600(of)X +1704(space)X +1920(allocated)X +2247(for)X +2378(the)X +720 4086(buffer)N +948(pool)X +1121(may)X +1290(be)X +1397(speci\256ed)X +1713(by)X +1824(the)X +1953(user.)X +2157(Using)X +2378(the)X +720 4174(same)N +910(data)X +1069(set)X +1183(and)X +1324(test)X +1459(procedure)X +1805(as)X +1896(used)X +2067(to)X +2153(derive)X +2378(the)X +720 4262(graphs)N +962(in)X +1052(Figures)X +1320(5a-c,)X +1507(Figure)X +1744(7)X +1812(shows)X +2039(the)X +2164(impact)X +2409(of)X +720 4350(varying)N +997(the)X +1126(size)X +1282(of)X +1380(the)X +1509(buffer)X +1737(pool.)X +1950(The)X +2106(bucket)X +2351(size)X +720 4438(was)N +873(set)X +989(to)X +1078(256)X +1225(bytes)X +1421(and)X +1564(the)X +1689(\256ll)X +1804(factor)X +2019(was)X +2171(set)X +2287(to)X +2376(16.)X +720 4526(The)N +869(buffer)X +1090(pool)X +1256(size)X +1404(was)X +1552(varied)X +1776(from)X +1955(0)X +2018(\(the)X +2166(minimum)X +720 4614(number)N +986(of)X +1074(pages)X +1277(required)X +1565(to)X +1647(be)X +1743(buffered\))X +2063(to)X +2145(1M.)X +2316(With)X +720 4702(1M)N +854(of)X +944(buffer)X +1164(space,)X +1386(the)X +1507(package)X +1794(performed)X +2151(no)X +2253(I/O)X +2382(for)X +720 4790(this)N +871(data)X +1040(set.)X +1204(As)X +1328(Figure)X +1572(7)X +1647(illustrates,)X +2013(increasing)X +2378(the)X +720 4878(buffer)N +944(pool)X +1113(size)X +1265(can)X +1404(have)X +1583(a)X +1646(dramatic)X +1954(affect)X +2165(on)X +2271(result-)X +720 4966(ing)N +842(performance.)X +2 f +8 s +1269 4941(7)N +1 f +16 s +720 5353 MXY +864 0 Dl +2 f +8 s +760 5408(7)N +1 f +9 s +826 5433(Some)N +1024(allocators)X +1338(are)X +1460(extremely)X +1782(inef\256cient)X +2107(at)X +2192(allocating)X +720 5513(memory.)N +1029(If)X +1110(you)X +1251(\256nd)X +1396(that)X +1536(applications)X +1916(are)X +2036(running)X +2292(out)X +2416(of)X +720 5593(memory)N +1005(before)X +1234(you)X +1386(think)X +1578(they)X +1746(should,)X +2000(try)X +2124(varying)X +2388(the)X +720 5673(pagesize)N +986(to)X +1060(get)X +1166(better)X +1348(utilization)X +1658(from)X +1816(the)X +1922(memory)X +2180(allocator.)X +10 s +2830 1975 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +2853 2004 MXY +0 -27 Dl +28 0 Dl +0 27 Dl +-28 0 Dl +2876 2016 MXY +0 -27 Dl +27 0 Dl +0 27 Dl +-27 0 Dl +2922 1998 MXY +0 -27 Dl +27 0 Dl +0 27 Dl +-27 0 Dl +2967 2025 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +3013 2031 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +3059 MX +0 -28 Dl +27 0 Dl +0 28 Dl +-27 0 Dl +3196 2052 MXY +0 -28 Dl +27 0 Dl +0 28 Dl +-27 0 Dl +3561 2102 MXY +0 -28 Dl +28 0 Dl +0 28 Dl +-28 0 Dl +4292 2105 MXY +0 -28 Dl +27 0 Dl +0 28 Dl +-27 0 Dl +4 Ds +1 Dt +2844 1961 MXY +23 30 Dl +23 12 Dl +45 -18 Dl +46 26 Dl +46 6 Dl +45 0 Dl +137 21 Dl +366 50 Dl +730 3 Dl +9 s +4227 2158(User)N +-1 Ds +3 Dt +2830 1211 MXY +27 Dc +2853 1261 MXY +27 Dc +2876 1267 MXY +27 Dc +2921 1341 MXY +27 Dc +2967 1385 MXY +27 Dc +3013 1450 MXY +27 Dc +3059 1497 MXY +27 Dc +3196 1686 MXY +27 Dc +3561 2109 MXY +27 Dc +4292 2295 MXY +27 Dc +20 Ds +1 Dt +2844 1211 MXY +23 50 Dl +23 6 Dl +45 74 Dl +46 44 Dl +46 65 Dl +45 47 Dl +137 189 Dl +366 423 Dl +730 186 Dl +4181 2270(System)N +-1 Ds +3 Dt +2844 583 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2867 672 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +2890 701 MXY +0 28 Dl +0 -14 Dl +13 0 Dl +-27 0 Dl +2935 819 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-27 0 Dl +2981 849 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +3027 908 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-28 0 Dl +3072 1026 MXY +0 27 Dl +0 -13 Dl +14 0 Dl +-27 0 Dl +3209 1292 MXY +0 27 Dl +0 -14 Dl +14 0 Dl +-27 0 Dl +3575 1823 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-28 0 Dl +4305 2059 MXY +0 28 Dl +0 -14 Dl +14 0 Dl +-27 0 Dl +5 Dt +2844 597 MXY +23 88 Dl +23 30 Dl +45 118 Dl +46 30 Dl +46 59 Dl +45 118 Dl +137 265 Dl +366 532 Dl +730 236 Dl +4328 2103(Total)N +2844 2310 MXY +1461 0 Dl +2844 MX +0 -1772 Dl +2310 MY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +2826 2416(0)N +-1 Ds +5 Dt +3209 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +3155 2416(256)N +-1 Ds +5 Dt +3575 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +3521 2416(512)N +-1 Ds +5 Dt +3940 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +3886 2416(768)N +-1 Ds +5 Dt +4305 2310 MXY +0 18 Dl +4 Ds +1 Dt +2310 MY +0 -1772 Dl +4233 2416(1024)N +-1 Ds +5 Dt +2844 2310 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2771 2340(0)N +-1 Ds +5 Dt +2844 2014 MXY +-18 0 Dl +2844 1719 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2735 1749(20)N +-1 Ds +5 Dt +2844 1423 MXY +-18 0 Dl +2844 1128 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2735 1158(40)N +-1 Ds +5 Dt +2844 833 MXY +-18 0 Dl +2844 538 MXY +-18 0 Dl +4 Ds +1 Dt +2844 MX +1461 0 Dl +2735 568(60)N +3239 2529(Buffer)N +3445(Pool)X +3595(Size)X +3737(\(in)X +3835(K\))X +2695 1259(S)N +2699 1324(e)N +2699 1388(c)N +2697 1452(o)N +2697 1517(n)N +2697 1581(d)N +2701 1645(s)N +3 Dt +-1 Ds +3 f +8 s +2706 2773(Figure)N +2908(7:)X +1 f +2982(User)X +3123(time)X +3258(is)X +3322(virtually)X +3560(insensitive)X +3854(to)X +3924(the)X +4022(amount)X +4234(of)X +4307(buffer)X +2706 2861(pool)N +2852(available,)X +3130(however,)X +3396(both)X +3541(system)X +3750(time)X +3895(and)X +4018(elapsed)X +4240(time)X +4385(are)X +2706 2949(inversely)N +2960(proportional)X +3296(to)X +3366(the)X +3464(size)X +3583(of)X +3656(the)X +3753(buffer)X +3927(pool.)X +4092(Even)X +4242(for)X +4335(large)X +2706 3037(data)N +2831(sets)X +2946(where)X +3120(one)X +3230(expects)X +3439(few)X +3552(collisions,)X +3832(specifying)X +4116(a)X +4162(large)X +4307(buffer)X +2706 3125(pool)N +2836(dramatically)X +3171(improves)X +3425(performance.)X +10 s +10 f +2706 3301 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +3175 3543(Enhanced)N +3536(Functionality)X +1 f +2878 3675(This)N +3046(hashing)X +3320(package)X +3609(provides)X +3910(a)X +3971(set)X +4085(of)X +4177(compati-)X +2706 3763(bility)N +2895(routines)X +3174(to)X +3257(implement)X +3620(the)X +2 f +3739(ndbm)X +1 f +3937(interface.)X +4279(How-)X +2706 3851(ever,)N +2893(when)X +3095(the)X +3220(native)X +3443(interface)X +3752(is)X +3832(used,)X +4026(the)X +4151(following)X +2706 3939(additional)N +3046(functionality)X +3475(is)X +3548(provided:)X +10 f +2798 4071(g)N +1 f +2946(Inserts)X +3197(never)X +3413(fail)X +3556(because)X +3847(too)X +3985(many)X +4199(keys)X +2946 4159(hash)N +3113(to)X +3195(the)X +3313(same)X +3498(value.)X +10 f +2798 4247(g)N +1 f +2946(Inserts)X +3187(never)X +3393(fail)X +3527(because)X +3808(key)X +3950(and/or)X +4181(asso-)X +2946 4335(ciated)N +3158(data)X +3312(is)X +3385(too)X +3507(large)X +10 f +2798 4423(g)N +1 f +2946(Hash)X +3131(functions)X +3449(may)X +3607(be)X +3703(user-speci\256ed.)X +10 f +2798 4511(g)N +1 f +2946(Multiple)X +3268(pages)X +3498(may)X +3683(be)X +3806(cached)X +4077(in)X +4186(main)X +2946 4599(memory.)N +2706 4731(It)N +2801(also)X +2976(provides)X +3298(a)X +3380(set)X +3514(of)X +3626(compatibility)X +4097(routines)X +4400(to)X +2706 4819(implement)N +3087(the)X +2 f +3224(hsearch)X +1 f +3516(interface.)X +3876(Again,)X +4130(the)X +4266(native)X +2706 4907(interface)N +3008(offers)X +3216(enhanced)X +3540(functionality:)X +10 f +2798 5039(g)N +1 f +2946(Files)X +3121(may)X +3279(grow)X +3464(beyond)X +2 f +3720(nelem)X +1 f +3932(elements.)X +10 f +2798 5127(g)N +1 f +2946(Multiple)X +3247(hash)X +3420(tables)X +3632(may)X +3795(be)X +3896(accessed)X +4203(con-)X +2946 5215(currently.)N +10 f +2798 5303(g)N +1 f +2946(Hash)X +3134(tables)X +3344(may)X +3505(be)X +3604(stored)X +3823(and)X +3962(accessed)X +4266(on)X +2946 5391(disk.)N +10 f +2798 5479(g)N +1 f +2946(Hash)X +3155(functions)X +3497(may)X +3679(be)X +3799(user-speci\256ed)X +4288(at)X +2946 5567(runtime.)N +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4424(9)X + +10 p +%%Page: 10 10 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +459 538(Relative)N +760(Performance)X +1227(of)X +1314(the)X +1441(New)X +1613(Implementation)X +1 f +604 670(The)N +761(performance)X +1200(testing)X +1445(of)X +1544(the)X +1674(new)X +1840(package)X +2135(is)X +432 758(divided)N +711(into)X +874(two)X +1033(test)X +1183(suites.)X +1424(The)X +1588(\256rst)X +1751(suite)X +1941(of)X +2046(tests)X +432 846(requires)N +727(that)X +882(the)X +1015(tables)X +1237(be)X +1348(read)X +1522(from)X +1713(and)X +1864(written)X +2126(to)X +432 934(disk.)N +640(In)X +742(these)X +942(tests,)X +1139(the)X +1272(basis)X +1467(for)X +1595(comparison)X +2003(is)X +2090(the)X +432 1022(4.3BSD-Reno)N +908(version)X +1169(of)X +2 f +1260(ndbm)X +1 f +1438(.)X +1502(Based)X +1722(on)X +1826(the)X +1948(designs)X +432 1110(of)N +2 f +521(sdbm)X +1 f +712(and)X +2 f +850(gdbm)X +1 f +1028(,)X +1070(they)X +1230(are)X +1351(expected)X +1659(to)X +1743(perform)X +2024(simi-)X +432 1198(larly)N +605(to)X +2 f +693(ndbm)X +1 f +871(,)X +917(and)X +1059(we)X +1179(do)X +1285(not)X +1413(show)X +1608(their)X +1781(performance)X +432 1286(numbers.)N +800(The)X +977(second)X +1252(suite)X +1454(contains)X +1772(the)X +1921(memory)X +432 1374(resident)N +712(test)X +849(which)X +1071(does)X +1243(not)X +1370(require)X +1623(that)X +1768(the)X +1891(\256les)X +2049(ever)X +432 1462(be)N +533(written)X +784(to)X +870(disk,)X +1047(only)X +1213(that)X +1357(hash)X +1528(tables)X +1739(may)X +1901(be)X +2001(mani-)X +432 1550(pulated)N +692(in)X +778(main)X +961(memory.)X +1291(In)X +1381(this)X +1519(test,)X +1673(we)X +1790(compare)X +2090(the)X +432 1638(performance)N +859(to)X +941(that)X +1081(of)X +1168(the)X +2 f +1286(hsearch)X +1 f +1560(routines.)X +604 1752(For)N +760(both)X +947(suites,)X +1194(two)X +1358(different)X +1679(databases)X +2031(were)X +432 1840(used.)N +656(The)X +818(\256rst)X +979(is)X +1069(the)X +1204(dictionary)X +1566(database)X +1880(described)X +432 1928(previously.)N +836(The)X +987(second)X +1236(was)X +1386(constructed)X +1781(from)X +1962(a)X +2023(pass-)X +432 2016(word)N +647(\256le)X +799(with)X +990(approximately)X +1502(300)X +1671(accounts.)X +2041(Two)X +432 2104(records)N +700(were)X +887(constructed)X +1287(for)X +1411(each)X +1589(account.)X +1909(The)X +2064(\256rst)X +432 2192(used)N +604(the)X +727(logname)X +1028(as)X +1120(the)X +1243(key)X +1384(and)X +1525(the)X +1648(remainder)X +1999(of)X +2090(the)X +432 2280(password)N +768(entry)X +965(for)X +1091(the)X +1221(data.)X +1427(The)X +1584(second)X +1839(was)X +1996(keyed)X +432 2368(by)N +541(uid)X +672(and)X +817(contained)X +1157(the)X +1283(entire)X +1494(password)X +1825(entry)X +2018(as)X +2113(its)X +432 2456(data)N +589(\256eld.)X +794(The)X +942(tests)X +1107(were)X +1287(all)X +1389(run)X +1518(on)X +1620(the)X +1740(HP)X +1864(9000)X +2046(with)X +432 2544(the)N +574(same)X +783(con\256guration)X +1254(previously)X +1636(described.)X +2027(Each)X +432 2632(test)N +576(was)X +734(run)X +874(\256ve)X +1027(times)X +1232(and)X +1380(the)X +1510(timing)X +1750(results)X +1991(of)X +2090(the)X +432 2720(runs)N +602(were)X +791(averaged.)X +1154(The)X +1311(variance)X +1616(across)X +1849(the)X +1979(5)X +2050(runs)X +432 2808(was)N +591(approximately)X +1088(1%)X +1229(of)X +1330(the)X +1462(average)X +1746(yielding)X +2041(95%)X +432 2896(con\256dence)N +800(intervals)X +1096(of)X +1183(approximately)X +1666(2%.)X +3 f +1021 3050(Disk)N +1196(Based)X +1420(Tests)X +1 f +604 3182(In)N +693(these)X +880(tests,)X +1064(we)X +1180(use)X +1308(a)X +1365(bucket)X +1600(size)X +1746(of)X +1834(1024)X +2015(and)X +2152(a)X +432 3270(\256ll)N +540(factor)X +748(of)X +835(32.)X +3 f +432 3384(create)N +663(test)X +1 f +547 3498(The)N +703(keys)X +881(are)X +1011(entered)X +1279(into)X +1433(the)X +1561(hash)X +1738(table,)X +1944(and)X +2090(the)X +547 3586(\256le)N +669(is)X +742(\257ushed)X +993(to)X +1075(disk.)X +3 f +432 3700(read)N +608(test)X +1 f +547 3814(A)N +640(lookup)X +897(is)X +984(performed)X +1353(for)X +1481(each)X +1663(key)X +1813(in)X +1909(the)X +2041(hash)X +547 3902(table.)N +3 f +432 4016(verify)N +653(test)X +1 f +547 4130(A)N +640(lookup)X +897(is)X +984(performed)X +1353(for)X +1481(each)X +1663(key)X +1813(in)X +1909(the)X +2041(hash)X +547 4218(table,)N +759(and)X +911(the)X +1045(data)X +1215(returned)X +1519(is)X +1608(compared)X +1961(against)X +547 4306(that)N +687(originally)X +1018(stored)X +1234(in)X +1316(the)X +1434(hash)X +1601(table.)X +3 f +432 4420(sequential)N +798(retrieve)X +1 f +547 4534(All)N +674(keys)X +846(are)X +970(retrieved)X +1281(in)X +1367(sequential)X +1716(order)X +1910(from)X +2090(the)X +547 4622(hash)N +724(table.)X +950(The)X +2 f +1105(ndbm)X +1 f +1313(interface)X +1625(allows)X +1863(sequential)X +547 4710(retrieval)N +848(of)X +948(the)X +1079(keys)X +1259(from)X +1448(the)X +1578(database,)X +1907(but)X +2041(does)X +547 4798(not)N +701(return)X +945(the)X +1094(data)X +1279(associated)X +1660(with)X +1853(each)X +2052(key.)X +547 4886(Therefore,)N +929(we)X +1067(compare)X +1388(the)X +1530(performance)X +1980(of)X +2090(the)X +547 4974(new)N +703(package)X +989(to)X +1073(two)X +1215(different)X +1514(runs)X +1674(of)X +2 f +1763(ndbm)X +1 f +1941(.)X +2002(In)X +2090(the)X +547 5062(\256rst)N +697(case,)X +2 f +882(ndbm)X +1 f +1086(returns)X +1335(only)X +1503(the)X +1627(keys)X +1800(while)X +2003(in)X +2090(the)X +547 5150(second,)N +2 f +823(ndbm)X +1 f +1034(returns)X +1290(both)X +1465(the)X +1596(keys)X +1776(and)X +1924(the)X +2054(data)X +547 5238(\(requiring)N +894(a)X +956(second)X +1204(call)X +1345(to)X +1432(the)X +1555(library\).)X +1861(There)X +2074(is)X +2152(a)X +547 5326(single)N +764(run)X +897(for)X +1017(the)X +1141(new)X +1300(library)X +1539(since)X +1729(it)X +1798(returns)X +2046(both)X +547 5414(the)N +665(key)X +801(and)X +937(the)X +1055(data.)X +3 f +3014 538(In-Memory)N +3431(Test)X +1 f +2590 670(This)N +2757(test)X +2892(uses)X +3054(a)X +3114(bucket)X +3352(size)X +3501(of)X +3592(256)X +3736(and)X +3876(a)X +3936(\256ll)X +4048(fac-)X +2418 758(tor)N +2527(of)X +2614(8.)X +3 f +2418 872(create/read)N +2827(test)X +1 f +2533 986(In)N +2627(this)X +2769(test,)X +2927(a)X +2989(hash)X +3162(table)X +3344(is)X +3423(created)X +3682(by)X +3788(inserting)X +4094(all)X +2533 1074(the)N +2660(key/data)X +2961(pairs.)X +3186(Then)X +3380(a)X +3445(keyed)X +3666(retrieval)X +3963(is)X +4044(per-)X +2533 1162(formed)N +2801(for)X +2931(each)X +3115(pair,)X +3295(and)X +3446(the)X +3579(hash)X +3761(table)X +3952(is)X +4040(des-)X +2533 1250(troyed.)N +3 f +2938 1404(Performance)N +3405(Results)X +1 f +2590 1536(Figures)N +2866(8a)X +2978(and)X +3130(8b)X +3246(show)X +3451(the)X +3585(user)X +3755(time,)X +3952(system)X +2418 1624(time,)N +2608(and)X +2752(elapsed)X +3021(time)X +3191(for)X +3312(each)X +3487(test)X +3625(for)X +3746(both)X +3915(the)X +4040(new)X +2418 1712(implementation)N +2951(and)X +3098(the)X +3227(old)X +3360(implementation)X +3893(\()X +2 f +3920(hsearch)X +1 f +2418 1800(or)N +2 f +2528(ndbm)X +1 f +2706(,)X +2769(whichever)X +3147(is)X +3243(appropriate\))X +3678(as)X +3787(well)X +3967(as)X +4076(the)X +2418 1888(improvement.)N +2929(The)X +3098(improvement)X +3569(is)X +3666(expressed)X +4027(as)X +4138(a)X +2418 1976(percentage)N +2787(of)X +2874(the)X +2992(old)X +3114(running)X +3383(time:)X +0 f +8 s +2418 2275(%)N +2494(=)X +2570(100)X +2722(*)X +2798 -0.4219(\(old_time)AX +3178(-)X +3254 -0.4219(new_time\))AX +3634(/)X +3710(old_time)X +1 f +10 s +2590 2600(In)N +2700(nearly)X +2944(all)X +3067(cases,)X +3299(the)X +3439(new)X +3615(routines)X +3915(perform)X +2418 2688(better)N +2628(than)X +2793(the)X +2918(old)X +3047(routines)X +3332(\(both)X +2 f +3527(hsearch)X +1 f +3807(and)X +2 f +3949(ndbm)X +1 f +4127(\).)X +2418 2776(Although)N +2755(the)X +3 f +2888(create)X +1 f +3134(tests)X +3311(exhibit)X +3567(superior)X +3864(user)X +4032(time)X +2418 2864(performance,)N +2869(the)X +2991(test)X +3126(time)X +3292(is)X +3369(dominated)X +3731(by)X +3834(the)X +3955(cost)X +4107(of)X +2418 2952(writing)N +2677(the)X +2803(actual)X +3023(\256le)X +3153(to)X +3243(disk.)X +3444(For)X +3583(the)X +3709(large)X +3897(database)X +2418 3040(\(the)N +2564(dictionary\),)X +2957(this)X +3093(completely)X +3470(overwhelmed)X +3927(the)X +4045(sys-)X +2418 3128(tem)N +2570(time.)X +2783(However,)X +3129(for)X +3254(the)X +3383(small)X +3587(data)X +3752(base,)X +3946(we)X +4071(see)X +2418 3216(that)N +2569(differences)X +2958(in)X +3051(both)X +3224(user)X +3389(and)X +3536(system)X +3788(time)X +3960(contri-)X +2418 3304(bute)N +2576(to)X +2658(the)X +2776(superior)X +3059(performance)X +3486(of)X +3573(the)X +3691(new)X +3845(package.)X +2590 3418(The)N +3 f +2764(read)X +1 f +2920(,)X +3 f +2989(verify)X +1 f +3190(,)X +3259(and)X +3 f +3424(sequential)X +1 f +3818(results)X +4075(are)X +2418 3506(deceptive)N +2758(for)X +2883(the)X +3012(small)X +3216(database)X +3524(since)X +3720(the)X +3849(entire)X +4063(test)X +2418 3594(ran)N +2551(in)X +2643(under)X +2856(a)X +2922(second.)X +3215(However,)X +3560(on)X +3669(the)X +3796(larger)X +4013(data-)X +2418 3682(base)N +2590(the)X +3 f +2716(read)X +1 f +2900(and)X +3 f +3044(verify)X +1 f +3273(tests)X +3443(bene\256t)X +3689(from)X +3873(the)X +3999(cach-)X +2418 3770(ing)N +2546(of)X +2639(buckets)X +2910(in)X +2998(the)X +3122(new)X +3282(package)X +3571(to)X +3658(improve)X +3950(perfor-)X +2418 3858(mance)N +2666(by)X +2784(over)X +2965(80%.)X +3169(Since)X +3384(the)X +3519(\256rst)X +3 f +3680(sequential)X +1 f +4063(test)X +2418 3946(does)N +2598(not)X +2733(require)X +2 f +2994(ndbm)X +1 f +3205(to)X +3299(return)X +3523(the)X +3653(data)X +3819(values,)X +4076(the)X +2418 4034(user)N +2573(time)X +2735(is)X +2808(lower)X +3011(than)X +3169(for)X +3283(the)X +3401(new)X +3555(package.)X +3879(However)X +2418 4122(when)N +2613(we)X +2728(require)X +2977(both)X +3139(packages)X +3454(to)X +3536(return)X +3748(data,)X +3922(the)X +4040(new)X +2418 4210(package)N +2702(excels)X +2923(in)X +3005(all)X +3105(three)X +3286(timings.)X +2590 4324(The)N +2773(small)X +3003(database)X +3337(runs)X +3532(so)X +3660(quickly)X +3957(in)X +4076(the)X +2418 4412(memory-resident)N +3000(case)X +3173(that)X +3326(the)X +3457(results)X +3699(are)X +3831(uninterest-)X +2418 4500(ing.)N +2589(However,)X +2933(for)X +3056(the)X +3183(larger)X +3400(database)X +3706(the)X +3833(new)X +3995(pack-)X +2418 4588(age)N +2567(pays)X +2751(a)X +2824(small)X +3033(penalty)X +3305(in)X +3403(system)X +3661(time)X +3839(because)X +4130(it)X +2418 4676(limits)N +2636(its)X +2748(main)X +2944(memory)X +3247(utilization)X +3607(and)X +3759(swaps)X +3991(pages)X +2418 4764(out)N +2550(to)X +2642(temporary)X +3002(storage)X +3264(in)X +3356(the)X +3484(\256le)X +3616(system)X +3868(while)X +4076(the)X +2 f +2418 4852(hsearch)N +1 f +2698(package)X +2988(requires)X +3273(that)X +3419(the)X +3543(application)X +3924(allocate)X +2418 4940(enough)N +2692(space)X +2909(for)X +3041(all)X +3159(key/data)X +3468(pair.)X +3670(However,)X +4022(even)X +2418 5028(with)N +2600(the)X +2738(system)X +3000(time)X +3182(penalty,)X +3477(the)X +3614(resulting)X +3933(elapsed)X +2418 5116(time)N +2580(improves)X +2898(by)X +2998(over)X +3161(50%.)X +3 f +432 5960(10)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +11 p +%%Page: 11 11 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +10 f +908 454(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +1379 546(hash)N +1652(ndbm)X +1950(%change)X +1 f +10 f +908 550(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 642(CREATE)N +10 f +908 646(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 738(user)N +1424(6.4)X +1671(12.2)X +2073(48)X +1157 826(sys)N +1384(32.5)X +1671(34.7)X +2113(6)X +3 f +1006 914(elapsed)N +10 f +1310 922(c)N +890(c)Y +810(c)Y +730(c)Y +3 f +1384 914(90.4)N +10 f +1581 922(c)N +890(c)Y +810(c)Y +730(c)Y +3 f +1671 914(99.6)N +10 f +1883 922(c)N +890(c)Y +810(c)Y +730(c)Y +3 f +2113 914(9)N +1 f +10 f +908 910(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 926(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 1010(READ)N +10 f +908 1014(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 1106(user)N +1424(3.4)X +1711(6.1)X +2073(44)X +1157 1194(sys)N +1424(1.2)X +1671(15.3)X +2073(92)X +3 f +1006 1282(elapsed)N +10 f +1310 1290(c)N +1258(c)Y +1178(c)Y +1098(c)Y +3 f +1424 1282(4.0)N +10 f +1581 1290(c)N +1258(c)Y +1178(c)Y +1098(c)Y +3 f +1671 1282(21.2)N +10 f +1883 1290(c)N +1258(c)Y +1178(c)Y +1098(c)Y +3 f +2073 1282(81)N +1 f +10 f +908 1278(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 1294(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 1378(VERIFY)N +10 f +908 1382(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 1474(user)N +1424(3.5)X +1711(6.3)X +2073(44)X +1157 1562(sys)N +1424(1.2)X +1671(15.3)X +2073(92)X +3 f +1006 1650(elapsed)N +10 f +1310 1658(c)N +1626(c)Y +1546(c)Y +1466(c)Y +3 f +1424 1650(4.0)N +10 f +1581 1658(c)N +1626(c)Y +1546(c)Y +1466(c)Y +3 f +1671 1650(21.2)N +10 f +1883 1658(c)N +1626(c)Y +1546(c)Y +1466(c)Y +3 f +2073 1650(81)N +1 f +10 f +908 1646(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 1662(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 1746(SEQUENTIAL)N +10 f +908 1750(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 1842(user)N +1424(2.7)X +1711(1.9)X +2046(-42)X +1157 1930(sys)N +1424(0.7)X +1711(3.9)X +2073(82)X +3 f +1006 2018(elapsed)N +10 f +1310 2026(c)N +1994(c)Y +1914(c)Y +1834(c)Y +3 f +1424 2018(3.0)N +10 f +1581 2026(c)N +1994(c)Y +1914(c)Y +1834(c)Y +3 f +1711 2018(5.0)N +10 f +1883 2026(c)N +1994(c)Y +1914(c)Y +1834(c)Y +3 f +2073 2018(40)N +1 f +10 f +908 2014(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +908 2030(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +948 2114(SEQUENTIAL)N +1467(\(with)X +1656(data)X +1810(retrieval\))X +10 f +908 2118(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1125 2210(user)N +1424(2.7)X +1711(8.2)X +2073(67)X +1157 2298(sys)N +1424(0.7)X +1711(4.3)X +2073(84)X +3 f +1006 2386(elapsed)N +1424(3.0)X +1671(12.0)X +2073(75)X +1 f +10 f +908 2390(i)N +927(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +899 2394(c)N +2378(c)Y +2298(c)Y +2218(c)Y +2138(c)Y +2058(c)Y +1978(c)Y +1898(c)Y +1818(c)Y +1738(c)Y +1658(c)Y +1578(c)Y +1498(c)Y +1418(c)Y +1338(c)Y +1258(c)Y +1178(c)Y +1098(c)Y +1018(c)Y +938(c)Y +858(c)Y +778(c)Y +698(c)Y +618(c)Y +538(c)Y +1310 2394(c)N +2362(c)Y +2282(c)Y +2202(c)Y +1581 2394(c)N +2362(c)Y +2282(c)Y +2202(c)Y +1883 2394(c)N +2362(c)Y +2282(c)Y +2202(c)Y +2278 2394(c)N +2378(c)Y +2298(c)Y +2218(c)Y +2138(c)Y +2058(c)Y +1978(c)Y +1898(c)Y +1818(c)Y +1738(c)Y +1658(c)Y +1578(c)Y +1498(c)Y +1418(c)Y +1338(c)Y +1258(c)Y +1178(c)Y +1098(c)Y +1018(c)Y +938(c)Y +858(c)Y +778(c)Y +698(c)Y +618(c)Y +538(c)Y +905 2574(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +1318 2666(hash)N +1585(hsearch)X +1953(%change)X +1 f +10 f +905 2670(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +945 2762(CREATE/READ)N +10 f +905 2766(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +1064 2858(user)N +1343(6.6)X +1642(17.2)X +2096(62)X +1096 2946(sys)N +1343(1.1)X +1682(0.3)X +2029(-266)X +3 f +945 3034(elapsed)N +1343(7.8)X +1642(17.0)X +2096(54)X +1 f +10 f +905 3038(i)N +930(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +896 3050(c)N +2978(c)Y +2898(c)Y +2818(c)Y +2738(c)Y +2658(c)Y +1249 3034(c)N +3010(c)Y +2930(c)Y +2850(c)Y +1520 3034(c)N +3010(c)Y +2930(c)Y +2850(c)Y +1886 3034(c)N +3010(c)Y +2930(c)Y +2850(c)Y +2281 3050(c)N +2978(c)Y +2898(c)Y +2818(c)Y +2738(c)Y +2658(c)Y +3 f +720 3174(Figure)N +967(8a:)X +1 f +1094(Timing)X +1349(results)X +1578(for)X +1692(the)X +1810(dictionary)X +2155(database.)X +10 f +720 3262 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +1407 3504(Conclusion)N +1 f +892 3636(This)N +1063(paper)X +1271(has)X +1407(presented)X +1744(the)X +1871(design,)X +2129(implemen-)X +720 3724(tation)N +928(and)X +1070(performance)X +1503(of)X +1596(a)X +1658(new)X +1818(hashing)X +2093(package)X +2382(for)X +720 3812(UNIX.)N +993(The)X +1150(new)X +1316(package)X +1612(provides)X +1919(a)X +1986(superset)X +2280(of)X +2378(the)X +720 3900(functionality)N +1159(of)X +1255(existing)X +1537(hashing)X +1815(packages)X +2139(and)X +2284(incor-)X +720 3988(porates)N +975(additional)X +1318(features)X +1596(such)X +1766(as)X +1855(large)X +2038(key)X +2176(handling,)X +720 4076(user)N +876(de\256ned)X +1134(hash)X +1302(functions,)X +1641(multiple)X +1928(hash)X +2096(tables,)X +2324(vari-)X +720 4164(able)N +894(sized)X +1099(pages,)X +1342(and)X +1498(linear)X +1721(hashing.)X +2050(In)X +2156(nearly)X +2396(all)X +720 4252(cases,)N +954(the)X +1096(new)X +1274(package)X +1582(provides)X +1902(improved)X +2252(perfor-)X +720 4340(mance)N +974(on)X +1098(the)X +1240(order)X +1454(of)X +1565(50-80%)X +1863(for)X +2001(the)X +2142(workloads)X +720 4428(shown.)N +990(Applications)X +1420(such)X +1588(as)X +1676(the)X +1794(loader,)X +2035(compiler,)X +2360(and)X +720 4516(mail,)N +921(which)X +1156(currently)X +1485(implement)X +1866(their)X +2051(own)X +2227(hashing)X +720 4604(routines,)N +1032(should)X +1279(be)X +1389(modi\256ed)X +1706(to)X +1801(use)X +1941(the)X +2072(generic)X +2342(rou-)X +720 4692(tines.)N +892 4806(This)N +1087(hashing)X +1389(package)X +1705(is)X +1810(one)X +1978(access)X +2236(method)X +720 4894(which)N +953(is)X +1043(part)X +1205(of)X +1309(a)X +1382(generic)X +1656(database)X +1970(access)X +2212(package)X +720 4982(being)N +955(developed)X +1342(at)X +1457(the)X +1612(University)X +2007(of)X +2131(California,)X +720 5070(Berkeley.)N +1089(It)X +1177(will)X +1340(include)X +1614(a)X +1688(btree)X +1887(access)X +2131(method)X +2409(as)X +720 5158(well)N +916(as)X +1041(\256xed)X +1259(and)X +1433(variable)X +1750(length)X +2007(record)X +2270(access)X +720 5246(methods)N +1024(in)X +1119(addition)X +1414(to)X +1509(the)X +1640(hashed)X +1896(support)X +2168(presented)X +720 5334(here.)N +948(All)X +1099(of)X +1215(the)X +1361(access)X +1615(methods)X +1934(are)X +2081(based)X +2312(on)X +2440(a)X +720 5422(key/data)N +1037(pair)X +1207(interface)X +1533(and)X +1693(appear)X +1952(identical)X +2272(to)X +2378(the)X +720 5510(application)N +1121(layer,)X +1347(allowing)X +1671(application)X +2071(implementa-)X +720 5598(tions)N +906(to)X +999(be)X +1106(largely)X +1360(independent)X +1783(of)X +1881(the)X +2010(database)X +2318(type.)X +720 5686(The)N +873(package)X +1165(is)X +1246(expected)X +1560(to)X +1650(be)X +1754(an)X +1858(integral)X +2131(part)X +2284(of)X +2378(the)X +2706 538(4.4BSD)N +3006(system,)X +3293(with)X +3479(various)X +3759(standard)X +4075(applications)X +2706 626(such)N +2879(as)X +2972(more\(1\),)X +3277(sort\(1\))X +3517(and)X +3659(vi\(1\))X +3841(based)X +4050(on)X +4156(it.)X +4266(While)X +2706 714(the)N +2833(current)X +3089(design)X +3326(does)X +3501(not)X +3631(support)X +3899(multi-user)X +4256(access)X +2706 802(or)N +2804(transactions,)X +3238(they)X +3407(could)X +3616(be)X +3723(incorporated)X +4159(relatively)X +2706 890(easily.)N +10 f +2894 938(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +3365 1030(hash)N +3638(ndbm)X +3936(%change)X +1 f +10 f +2894 1034(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 1126(CREATE)N +10 f +2894 1130(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 1222(user)N +3390(0.2)X +3677(0.4)X +4079(50)X +3143 1310(sys)N +3390(0.1)X +3677(1.0)X +4079(90)X +3 f +2992 1398(elapsed)N +10 f +3296 1406(c)N +1374(c)Y +1294(c)Y +1214(c)Y +3 f +3390 1398(0)N +10 f +3567 1406(c)N +1374(c)Y +1294(c)Y +1214(c)Y +3 f +3677 1398(3.2)N +10 f +3869 1406(c)N +1374(c)Y +1294(c)Y +1214(c)Y +3 f +4039 1398(100)N +1 f +10 f +2894 1394(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 1410(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 1494(READ)N +10 f +2894 1498(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 1590(user)N +3390(0.1)X +3677(0.1)X +4119(0)X +3143 1678(sys)N +3390(0.1)X +3677(0.4)X +4079(75)X +3 f +2992 1766(elapsed)N +10 f +3296 1774(c)N +1742(c)Y +1662(c)Y +1582(c)Y +3 f +3390 1766(0.0)N +10 f +3567 1774(c)N +1742(c)Y +1662(c)Y +1582(c)Y +3 f +3677 1766(0.0)N +10 f +3869 1774(c)N +1742(c)Y +1662(c)Y +1582(c)Y +3 f +4119 1766(0)N +1 f +10 f +2894 1762(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 1778(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 1862(VERIFY)N +10 f +2894 1866(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 1958(user)N +3390(0.1)X +3677(0.2)X +4079(50)X +3143 2046(sys)N +3390(0.1)X +3677(0.3)X +4079(67)X +3 f +2992 2134(elapsed)N +10 f +3296 2142(c)N +2110(c)Y +2030(c)Y +1950(c)Y +3 f +3390 2134(0.0)N +10 f +3567 2142(c)N +2110(c)Y +2030(c)Y +1950(c)Y +3 f +3677 2134(0.0)N +10 f +3869 2142(c)N +2110(c)Y +2030(c)Y +1950(c)Y +3 f +4119 2134(0)N +1 f +10 f +2894 2130(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 2146(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 2230(SEQUENTIAL)N +10 f +2894 2234(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 2326(user)N +3390(0.1)X +3677(0.0)X +4012(-100)X +3143 2414(sys)N +3390(0.1)X +3677(0.1)X +4119(0)X +3 f +2992 2502(elapsed)N +10 f +3296 2510(c)N +2478(c)Y +2398(c)Y +2318(c)Y +3 f +3390 2502(0.0)N +10 f +3567 2510(c)N +2478(c)Y +2398(c)Y +2318(c)Y +3 f +3677 2502(0.0)N +10 f +3869 2510(c)N +2478(c)Y +2398(c)Y +2318(c)Y +3 f +4119 2502(0)N +1 f +10 f +2894 2498(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2894 2514(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2934 2598(SEQUENTIAL)N +3453(\(with)X +3642(data)X +3796(retrieval\))X +10 f +2894 2602(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3111 2694(user)N +3390(0.1)X +3677(0.1)X +4119(0)X +3143 2782(sys)N +3390(0.1)X +3677(0.1)X +4119(0)X +3 f +2992 2870(elapsed)N +3390(0.0)X +3677(0.0)X +4119(0)X +1 f +10 f +2894 2874(i)N +2913(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2885 2878(c)N +2862(c)Y +2782(c)Y +2702(c)Y +2622(c)Y +2542(c)Y +2462(c)Y +2382(c)Y +2302(c)Y +2222(c)Y +2142(c)Y +2062(c)Y +1982(c)Y +1902(c)Y +1822(c)Y +1742(c)Y +1662(c)Y +1582(c)Y +1502(c)Y +1422(c)Y +1342(c)Y +1262(c)Y +1182(c)Y +1102(c)Y +1022(c)Y +3296 2878(c)N +2846(c)Y +2766(c)Y +2686(c)Y +3567 2878(c)N +2846(c)Y +2766(c)Y +2686(c)Y +3869 2878(c)N +2846(c)Y +2766(c)Y +2686(c)Y +4264 2878(c)N +2862(c)Y +2782(c)Y +2702(c)Y +2622(c)Y +2542(c)Y +2462(c)Y +2382(c)Y +2302(c)Y +2222(c)Y +2142(c)Y +2062(c)Y +1982(c)Y +1902(c)Y +1822(c)Y +1742(c)Y +1662(c)Y +1582(c)Y +1502(c)Y +1422(c)Y +1342(c)Y +1262(c)Y +1182(c)Y +1102(c)Y +1022(c)Y +2891 3058(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +3304 3150(hash)N +3571(hsearch)X +3939(%change)X +1 f +10 f +2891 3154(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2931 3246(CREATE/READ)N +10 f +2891 3250(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +3050 3342(user)N +3329(0.3)X +3648(0.4)X +4048(25)X +3082 3430(sys)N +3329(0.0)X +3648(0.0)X +4088(0)X +3 f +2931 3518(elapsed)N +3329(0.0)X +3648(0.0)X +4088(0)X +1 f +10 f +2891 3522(i)N +2916(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2882 3534(c)N +3462(c)Y +3382(c)Y +3302(c)Y +3222(c)Y +3142(c)Y +3235 3518(c)N +3494(c)Y +3414(c)Y +3334(c)Y +3506 3518(c)N +3494(c)Y +3414(c)Y +3334(c)Y +3872 3518(c)N +3494(c)Y +3414(c)Y +3334(c)Y +4267 3534(c)N +3462(c)Y +3382(c)Y +3302(c)Y +3222(c)Y +3142(c)Y +3 f +2706 3658(Figure)N +2953(8b:)X +1 f +3084(Timing)X +3339(results)X +3568(for)X +3682(the)X +3800(password)X +4123(database.)X +10 f +2706 3746 -0.0930(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)AN +3 f +3396 3988(References)N +1 f +2706 4120([ATT79])N +3058(AT&T,)X +3358(DBM\(3X\),)X +2 f +3773(Unix)X +3990(Programmer's)X +2878 4208(Manual,)N +3194(Seventh)X +3491(Edition,)X +3793(Volume)X +4085(1)X +1 f +(,)S +4192(January,)X +2878 4296(1979.)N +2706 4472([ATT85])N +3027(AT&T,)X +3296(HSEARCH\(BA_LIB\),)X +2 f +4053(Unix)X +4239(System)X +2878 4560(User's)N +3112(Manual,)X +3401(System)X +3644(V.3)X +1 f +3753(,)X +3793(pp.)X +3913(506-508,)X +4220(1985.)X +2706 4736([BRE73])N +3025(Brent,)X +3253(Richard)X +3537(P.,)X +3651(``Reducing)X +4041(the)X +4168(Retrieval)X +2878 4824(Time)N +3071(of)X +3162(Scatter)X +3409(Storage)X +3678(Techniques'',)X +2 f +4146(Commun-)X +2878 4912(ications)N +3175(of)X +3281(the)X +3422(ACM)X +1 f +3591(,)X +3654(Volume)X +3955(16,)X +4098(No.)X +4259(2,)X +4362(pp.)X +2878 5000(105-109,)N +3185(February,)X +3515(1973.)X +2706 5176([BSD86])N +3055(NDBM\(3\),)X +2 f +3469(4.3BSD)X +3775(Unix)X +3990(Programmer's)X +2878 5264(Manual)N +3155(Reference)X +3505(Guide)X +1 f +3701(,)X +3749(University)X +4114(of)X +4208(Califor-)X +2878 5352(nia,)N +3016(Berkeley,)X +3346(1986.)X +2706 5528([ENB88])N +3025(Enbody,)X +3319(R.)X +3417(J.,)X +3533(Du,)X +3676(H.)X +3779(C.,)X +3897(``Dynamic)X +4270(Hash-)X +2878 5616(ing)N +3034(Schemes'',)X +2 f +3427(ACM)X +3630(Computing)X +4019(Surveys)X +1 f +4269(,)X +4322(Vol.)X +2878 5704(20,)N +2998(No.)X +3136(2,)X +3216(pp.)X +3336(85-113,)X +3603(June)X +3770(1988.)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4384(11)X + +12 p +%%Page: 12 12 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 258(A)N +510(New)X +682(Hashing)X +985(Package)X +1290(for)X +1413(UNIX)X +3663(Seltzer)X +3920(&)X +4007(Yigit)X +1 f +432 538([FAG79])N +776(Ronald)X +1057(Fagin,)X +1308(Jurg)X +1495(Nievergelt,)X +1903(Nicholas)X +604 626(Pippenger,)N +1003(H.)X +1135(Raymond)X +1500(Strong,)X +1787(``Extendible)X +604 714(Hashing)N +901(--)X +985(A)X +1073(Fast)X +1236(Access)X +1493(Method)X +1771(for)X +1894(Dynamic)X +604 802(Files'',)N +2 f +855(ACM)X +1046(Transactions)X +1485(on)X +1586(Database)X +1914(Systems)X +1 f +2168(,)X +604 890(Volume)N +882(4,)X +962(No.)X +1100(3.,)X +1200(September)X +1563(1979,)X +1763(pp)X +1863(315-34)X +432 1066([KNU68],)N +802(Knuth,)X +1064(D.E.,)X +2 f +1273(The)X +1434(Art)X +1577(of)X +1680(Computer)X +2041(Pro-)X +604 1154(gramming)N +971(Vol.)X +1140(3:)X +1245(Sorting)X +1518(and)X +1676(Searching)X +1 f +2001(,)X +2058(sec-)X +604 1242(tions)N +779(6.3-6.4,)X +1046(pp)X +1146(481-550.)X +432 1418([LAR78])N +747(Larson,)X +1011(Per-Ake,)X +1319(``Dynamic)X +1687(Hashing'',)X +2 f +2048(BIT)X +1 f +(,)S +604 1506(Vol.)N +764(18,)X +884(1978,)X +1084(pp.)X +1204(184-201.)X +432 1682([LAR88])N +752(Larson,)X +1021(Per-Ake,)X +1335(``Dynamic)X +1709(Hash)X +1900(Tables'',)X +2 f +604 1770(Communications)N +1183(of)X +1281(the)X +1415(ACM)X +1 f +1584(,)X +1640(Volume)X +1934(31,)X +2070(No.)X +604 1858(4.,)N +704(April)X +893(1988,)X +1093(pp)X +1193(446-457.)X +432 2034([LIT80])N +731(Witold,)X +1013(Litwin,)X +1286(``Linear)X +1590(Hashing:)X +1939(A)X +2036(New)X +604 2122(Tool)N +786(for)X +911(File)X +1065(and)X +1211(Table)X +1424(Addressing'',)X +2 f +1893(Proceed-)X +604 2210(ings)N +761(of)X +847(the)X +969(6th)X +1095(International)X +1540(Conference)X +1933(on)X +2036(Very)X +604 2298(Large)N +815(Databases)X +1 f +1153(,)X +1193(1980.)X +432 2474([NEL90])N +743(Nelson,)X +1011(Philip)X +1222(A.,)X +2 f +1341(Gdbm)X +1558(1.4)X +1679(source)X +1913(distribu-)X +604 2562(tion)N +748(and)X +888(README)X +1 f +1209(,)X +1249(August)X +1500(1990.)X +432 2738([THOM90])N +840(Ken)X +1011(Thompson,)X +1410(private)X +1670(communication,)X +604 2826(Nov.)N +782(1990.)X +432 3002([TOR87])N +790(Torek,)X +1066(C.,)X +1222(``Re:)X +1470(dbm.a)X +1751(and)X +1950(ndbm.a)X +604 3090(archives'',)N +2 f +966(USENET)X +1279(newsgroup)X +1650(comp.unix)X +1 f +2002(1987.)X +432 3266([TOR88])N +760(Torek,)X +1006(C.,)X +1133(``Re:)X +1351(questions)X +1686(regarding)X +2027(data-)X +604 3354(bases)N +826(created)X +1106(with)X +1295(dbm)X +1484(and)X +1647(ndbm)X +1876(routines'')X +2 f +604 3442(USENET)N +937(newsgroup)X +1328(comp.unix.questions)X +1 f +1982(,)X +2041(June)X +604 3530(1988.)N +432 3706([WAL84])N +773(Wales,)X +1018(R.,)X +1135(``Discussion)X +1564(of)X +1655("dbm")X +1887(data)X +2045(base)X +604 3794(system'',)N +2 f +973(USENET)X +1339(newsgroup)X +1762(unix.wizards)X +1 f +2168(,)X +604 3882(January,)N +894(1984.)X +432 4058([YIG89])N +751(Ozan)X +963(S.)X +1069(Yigit,)X +1294(``How)X +1545(to)X +1648(Roll)X +1826(Your)X +2032(Own)X +604 4146(Dbm/Ndbm'',)N +2 f +1087(unpublished)X +1504(manuscript)X +1 f +(,)S +1910(Toronto,)X +604 4234(July,)N +777(1989)X +3 f +432 5960(12)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +13 p +%%Page: 13 13 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +720 258(Seltzer)N +977(&)X +1064(Yigit)X +3278(A)X +3356(New)X +3528(Hashing)X +3831(Package)X +4136(for)X +4259(UNIX)X +1 f +720 538(Margo)N +960(I.)X +1033(Seltzer)X +1282(is)X +1361(a)X +1423(Ph.D.)X +1631(student)X +1887(in)X +1974(the)X +2097(Department)X +720 626(of)N +823(Electrical)X +1167(Engineering)X +1595(and)X +1747(Computer)X +2102(Sciences)X +2418(at)X +720 714(the)N +850(University)X +1220(of)X +1318(California,)X +1694(Berkeley.)X +2055(Her)X +2207(research)X +720 802(interests)N +1017(include)X +1283(\256le)X +1415(systems,)X +1718(databases,)X +2076(and)X +2221(transac-)X +720 890(tion)N +896(processing)X +1291(systems.)X +1636(She)X +1807(spent)X +2027(several)X +2306(years)X +720 978(working)N +1026(at)X +1123(startup)X +1380(companies)X +1762(designing)X +2112(and)X +2267(imple-)X +720 1066(menting)N +1048(\256le)X +1216(systems)X +1535(and)X +1716(transaction)X +2133(processing)X +720 1154(software)N +1026(and)X +1170(designing)X +1509(microprocessors.)X +2103(Ms.)X +2253(Seltzer)X +720 1242(received)N +1057(her)X +1223(AB)X +1397(in)X +1522(Applied)X +1843(Mathematics)X +2320(from)X +720 1330 0.1953(Harvard/Radcliffe)AN +1325(College)X +1594(in)X +1676(1983.)X +720 1444(In)N +810(her)X +936(spare)X +1129(time,)X +1313(Margo)X +1549(can)X +1683(usually)X +1936(be)X +2034(found)X +2243(prepar-)X +720 1532(ing)N +868(massive)X +1171(quantities)X +1527(of)X +1639(food)X +1831(for)X +1970(hungry)X +2242(hoards,)X +720 1620(studying)N +1022(Japanese,)X +1355(or)X +1449(playing)X +1716(soccer)X +1948(with)X +2116(an)X +2218(exciting)X +720 1708(Bay)N +912(Area)X +1132(Women's)X +1507(Soccer)X +1788(team,)X +2026(the)X +2186(Berkeley)X +720 1796(Bruisers.)N +720 1910(Ozan)N +915(\()X +3 f +942(Oz)X +1 f +1040(\))X +1092(Yigit)X +1281(is)X +1358(currently)X +1672(a)X +1732(software)X +2033(engineer)X +2334(with)X +720 1998(the)N +886(Communications)X +1499(Research)X +1861(and)X +2044(Development)X +720 2086(group,)N +948(Computing)X +1328(Services,)X +1641(York)X +1826(University.)X +2224(His)X +2355(for-)X +720 2174(mative)N +967(years)X +1166(were)X +1352(also)X +1510(spent)X +1708(at)X +1795(York,)X +2009(where)X +2234(he)X +2338(held)X +720 2262(system)N +985(programmer)X +1425(and)X +1583(administrator)X +2052(positions)X +2382(for)X +720 2350(various)N +995(mixtures)X +1314(of)X +1420(of)X +1526(UNIX)X +1765(systems)X +2056(starting)X +2334(with)X +720 2438(Berkeley)N +1031(4.1)X +1151(in)X +1233(1982,)X +1433(while)X +1631(at)X +1709(the)X +1827(same)X +2012(time)X +2174(obtaining)X +720 2526(a)N +776(degree)X +1011(in)X +1093(Computer)X +1433(Science.)X +720 2640(In)N +813(his)X +931(copious)X +1205(free)X +1356(time,)X +1543(Oz)X +1662(enjoys)X +1896(working)X +2188(on)X +2293(what-)X +720 2728(ever)N +890(software)X +1197(looks)X +1400(interesting,)X +1788(which)X +2014(often)X +2209(includes)X +720 2816(language)N +1044(interpreters,)X +1464(preprocessors,)X +1960(and)X +2110(lately,)X +2342(pro-)X +720 2904(gram)N +905(generators)X +1260(and)X +1396(expert)X +1617(systems.)X +720 3018(Oz)N +836(has)X +964(authored)X +1266(several)X +1515(public-domain)X +2003(software)X +2301(tools,)X +720 3106(including)N +1069(an)X +1191(nroff-like)X +1545(text)X +1711(formatter)X +2 f +2056(proff)X +1 f +2257(that)X +2423(is)X +720 3194(apparently)N +1083(still)X +1226(used)X +1397(in)X +1483(some)X +1676(basement)X +2002(PCs.)X +2173(His)X +2307(latest)X +720 3282(obsessions)N +1143(include)X +1460(the)X +1639(incredible)X +2040(programming)X +720 3370(language)N +1030(Scheme,)X +1324(and)X +1460(Chinese)X +1738(Brush)X +1949(painting.)X +3 f +720 5960(USENIX)N +9 f +1042(-)X +3 f +1106(Winter)X +1371('91)X +9 f +1498(-)X +3 f +1562(Dallas,)X +1815(TX)X +4384(13)X + +14 p +%%Page: 14 14 +0(Courier)xf 0 f +10 s 10 xH 0 xS 0 f +3 f +432 5960(14)N +2970(USENIX)X +9 f +3292(-)X +3 f +3356(Winter)X +3621('91)X +9 f +3748(-)X +3 f +3812(Dallas,)X +4065(TX)X + +14 p +%%Trailer +xt + +xs diff --git a/lib/libc/db/doc/libtp.usenix.ps b/lib/libc/db/doc/libtp.usenix.ps new file mode 100644 index 0000000..ea821a9 --- /dev/null +++ b/lib/libc/db/doc/libtp.usenix.ps @@ -0,0 +1,12340 @@ +%!PS-Adobe-1.0 +%%Creator: utopia:margo (& Seltzer,608-13E,8072,) +%%Title: stdin (ditroff) +%%CreationDate: Thu Dec 12 15:32:11 1991 +%%EndComments +% @(#)psdit.pro 1.3 4/15/88 +% lib/psdit.pro -- prolog for psdit (ditroff) files +% Copyright (c) 1984, 1985 Adobe Systems Incorporated. All Rights Reserved. +% last edit: shore Sat Nov 23 20:28:03 1985 +% RCSID: $Header: psdit.pro,v 2.1 85/11/24 12:19:43 shore Rel $ + +% Changed by Edward Wang (edward@ucbarpa.berkeley.edu) to handle graphics, +% 17 Feb, 87. + +/$DITroff 140 dict def $DITroff begin +/fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def +/xi{0 72 11 mul translate 72 resolution div dup neg scale 0 0 moveto + /fontnum 1 def /fontsize 10 def /fontheight 10 def /fontslant 0 def F + /pagesave save def}def +/PB{save /psv exch def currentpoint translate + resolution 72 div dup neg scale 0 0 moveto}def +/PE{psv restore}def +/arctoobig 90 def /arctoosmall .05 def +/m1 matrix def /m2 matrix def /m3 matrix def /oldmat matrix def +/tan{dup sin exch cos div}def +/point{resolution 72 div mul}def +/dround {transform round exch round exch itransform}def +/xT{/devname exch def}def +/xr{/mh exch def /my exch def /resolution exch def}def +/xp{}def +/xs{docsave restore end}def +/xt{}def +/xf{/fontname exch def /slotno exch def fontnames slotno get fontname eq not + {fonts slotno fontname findfont put fontnames slotno fontname put}if}def +/xH{/fontheight exch def F}def +/xS{/fontslant exch def F}def +/s{/fontsize exch def /fontheight fontsize def F}def +/f{/fontnum exch def F}def +/F{fontheight 0 le{/fontheight fontsize def}if + fonts fontnum get fontsize point 0 0 fontheight point neg 0 0 m1 astore + fontslant 0 ne{1 0 fontslant tan 1 0 0 m2 astore m3 concatmatrix}if + makefont setfont .04 fontsize point mul 0 dround pop setlinewidth}def +/X{exch currentpoint exch pop moveto show}def +/N{3 1 roll moveto show}def +/Y{exch currentpoint pop exch moveto show}def +/S{show}def +/ditpush{}def/ditpop{}def +/AX{3 -1 roll currentpoint exch pop moveto 0 exch ashow}def +/AN{4 2 roll moveto 0 exch ashow}def +/AY{3 -1 roll currentpoint pop exch moveto 0 exch ashow}def +/AS{0 exch ashow}def +/MX{currentpoint exch pop moveto}def +/MY{currentpoint pop exch moveto}def +/MXY{moveto}def +/cb{pop}def % action on unknown char -- nothing for now +/n{}def/w{}def +/p{pop showpage pagesave restore /pagesave save def}def +/Dt{/Dlinewidth exch def}def 1 Dt +/Ds{/Ddash exch def}def -1 Ds +/Di{/Dstipple exch def}def 1 Di +/Dsetlinewidth{2 Dlinewidth mul setlinewidth}def +/Dsetdash{Ddash 4 eq{[8 12]}{Ddash 16 eq{[32 36]} + {Ddash 20 eq{[32 12 8 12]}{[]}ifelse}ifelse}ifelse 0 setdash}def +/Dstroke{gsave Dsetlinewidth Dsetdash 1 setlinecap stroke grestore + currentpoint newpath moveto}def +/Dl{rlineto Dstroke}def +/arcellipse{/diamv exch def /diamh exch def oldmat currentmatrix pop + currentpoint translate 1 diamv diamh div scale /rad diamh 2 div def + currentpoint exch rad add exch rad -180 180 arc oldmat setmatrix}def +/Dc{dup arcellipse Dstroke}def +/De{arcellipse Dstroke}def +/Da{/endv exch def /endh exch def /centerv exch def /centerh exch def + /cradius centerv centerv mul centerh centerh mul add sqrt def + /eradius endv endv mul endh endh mul add sqrt def + /endang endv endh atan def + /startang centerv neg centerh neg atan def + /sweep startang endang sub dup 0 lt{360 add}if def + sweep arctoobig gt + {/midang startang sweep 2 div sub def /midrad cradius eradius add 2 div def + /midh midang cos midrad mul def /midv midang sin midrad mul def + midh neg midv neg endh endv centerh centerv midh midv Da + Da} + {sweep arctoosmall ge + {/controldelt 1 sweep 2 div cos sub 3 sweep 2 div sin mul div 4 mul def + centerv neg controldelt mul centerh controldelt mul + endv neg controldelt mul centerh add endh add + endh controldelt mul centerv add endv add + centerh endh add centerv endv add rcurveto Dstroke} + {centerh endh add centerv endv add rlineto Dstroke} + ifelse} + ifelse}def +/Dpatterns[ +[%cf[widthbits] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000103810000000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0000000000000010>] +[8<0411040040114000>] +[8<0204081020408001>] +[8<0000001038100000>] +[8<6699996666999966>] +[8<0000800100001008>] +[8<81c36666c3810000>] +[8<0f0e0c0800000000>] +[8<0042660000246600>] +[8<0000990000990000>] +[8<0804020180402010>] +[8<2418814242811824>] +[8<6699996666999966>] +[8<8000000008000000>] +[8<00001c3e363e1c00>] +[8<0000000000000000>] +[32<00000040000000c00000004000000040000000e0000000000000000000000000>] +[32<00000000000060000000900000002000000040000000f0000000000000000000>] +[32<000000000000000000e0000000100000006000000010000000e0000000000000>] +[32<00000000000000002000000060000000a0000000f00000002000000000000000>] +[32<0000000e0000000000000000000000000000000f000000080000000e00000001>] +[32<0000090000000600000000000000000000000000000007000000080000000e00>] +[32<00010000000200000004000000040000000000000000000000000000000f0000>] +[32<0900000006000000090000000600000000000000000000000000000006000000>]] +[%ug +[8<0000020000000000>] +[8<0000020000002000>] +[8<0004020000002000>] +[8<0004020000402000>] +[8<0004060000402000>] +[8<0004060000406000>] +[8<0006060000406000>] +[8<0006060000606000>] +[8<00060e0000606000>] +[8<00060e000060e000>] +[8<00070e000060e000>] +[8<00070e000070e000>] +[8<00070e020070e000>] +[8<00070e020070e020>] +[8<04070e020070e020>] +[8<04070e024070e020>] +[8<04070e064070e020>] +[8<04070e064070e060>] +[8<06070e064070e060>] +[8<06070e066070e060>] +[8<06070f066070e060>] +[8<06070f066070f060>] +[8<060f0f066070f060>] +[8<060f0f0660f0f060>] +[8<060f0f0760f0f060>] +[8<060f0f0760f0f070>] +[8<0e0f0f0760f0f070>] +[8<0e0f0f07e0f0f070>] +[8<0e0f0f0fe0f0f070>] +[8<0e0f0f0fe0f0f0f0>] +[8<0f0f0f0fe0f0f0f0>] +[8<0f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff0f0f0f0>] +[8<1f0f0f0ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f0>] +[8<1f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff1f0f0f8>] +[8<9f0f0f8ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f8>] +[8<9f0f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f0f0f9>] +[8<9f8f0f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f0f9>] +[8<9f8f1f9ff9f8f1f9>] +[8<bf8f1f9ff9f8f1f9>] +[8<bf8f1f9ffbf8f1f9>] +[8<bf8f1fdffbf8f1f9>] +[8<bf8f1fdffbf8f1fd>] +[8<ff8f1fdffbf8f1fd>] +[8<ff8f1fdffff8f1fd>] +[8<ff8f1ffffff8f1fd>] +[8<ff8f1ffffff8f1ff>] +[8<ff9f1ffffff8f1ff>] +[8<ff9f1ffffff9f1ff>] +[8<ff9f9ffffff9f1ff>] +[8<ff9f9ffffff9f9ff>] +[8<ffbf9ffffff9f9ff>] +[8<ffbf9ffffffbf9ff>] +[8<ffbfdffffffbf9ff>] +[8<ffbfdffffffbfdff>] +[8<ffffdffffffbfdff>] +[8<ffffdffffffffdff>] +[8<fffffffffffffdff>] +[8<ffffffffffffffff>]] +[%mg +[8<8000000000000000>] +[8<0822080080228000>] +[8<0204081020408001>] +[8<40e0400000000000>] +[8<66999966>] +[8<8001000010080000>] +[8<81c36666c3810000>] +[8<f0e0c08000000000>] +[16<07c00f801f003e007c00f800f001e003c007800f001f003e007c00f801f003e0>] +[16<1f000f8007c003e001f000f8007c003e001f800fc007e003f001f8007c003e00>] +[8<c3c300000000c3c3>] +[16<0040008001000200040008001000200040008000000100020004000800100020>] +[16<0040002000100008000400020001800040002000100008000400020001000080>] +[16<1fc03fe07df0f8f8f07de03fc01f800fc01fe03ff07df8f87df03fe01fc00f80>] +[8<80>] +[8<8040201000000000>] +[8<84cc000048cc0000>] +[8<9900009900000000>] +[8<08040201804020100800020180002010>] +[8<2418814242811824>] +[8<66999966>] +[8<8000000008000000>] +[8<70f8d8f870000000>] +[8<0814224180402010>] +[8<aa00440a11a04400>] +[8<018245aa45820100>] +[8<221c224180808041>] +[8<88000000>] +[8<0855800080550800>] +[8<2844004482440044>] +[8<0810204080412214>] +[8<00>]]]def +/Dfill{ + transform /maxy exch def /maxx exch def + transform /miny exch def /minx exch def + minx maxx gt{/minx maxx /maxx minx def def}if + miny maxy gt{/miny maxy /maxy miny def def}if + Dpatterns Dstipple 1 sub get exch 1 sub get + aload pop /stip exch def /stipw exch def /stiph 128 def + /imatrix[stipw 0 0 stiph 0 0]def + /tmatrix[stipw 0 0 stiph 0 0]def + /minx minx cvi stiph idiv stiph mul def + /miny miny cvi stipw idiv stipw mul def + gsave eoclip 0 setgray + miny stiph maxy{ + tmatrix exch 5 exch put + minx stipw maxx{ + tmatrix exch 4 exch put tmatrix setmatrix + stipw stiph true imatrix {stip} imagemask + }for + }for + grestore +}def +/Dp{Dfill Dstroke}def +/DP{Dfill currentpoint newpath moveto}def +end + +/ditstart{$DITroff begin + /nfonts 60 def % NFONTS makedev/ditroff dependent! + /fonts[nfonts{0}repeat]def + /fontnames[nfonts{()}repeat]def +/docsave save def +}def + +% character outcalls +/oc{ + /pswid exch def /cc exch def /name exch def + /ditwid pswid fontsize mul resolution mul 72000 div def + /ditsiz fontsize resolution mul 72 div def + ocprocs name known{ocprocs name get exec}{name cb}ifelse +}def +/fractm [.65 0 0 .6 0 0] def +/fraction{ + /fden exch def /fnum exch def gsave /cf currentfont def + cf fractm makefont setfont 0 .3 dm 2 copy neg rmoveto + fnum show rmoveto currentfont cf setfont(\244)show setfont fden show + grestore ditwid 0 rmoveto +}def +/oce{grestore ditwid 0 rmoveto}def +/dm{ditsiz mul}def +/ocprocs 50 dict def ocprocs begin +(14){(1)(4)fraction}def +(12){(1)(2)fraction}def +(34){(3)(4)fraction}def +(13){(1)(3)fraction}def +(23){(2)(3)fraction}def +(18){(1)(8)fraction}def +(38){(3)(8)fraction}def +(58){(5)(8)fraction}def +(78){(7)(8)fraction}def +(sr){gsave 0 .06 dm rmoveto(\326)show oce}def +(is){gsave 0 .15 dm rmoveto(\362)show oce}def +(->){gsave 0 .02 dm rmoveto(\256)show oce}def +(<-){gsave 0 .02 dm rmoveto(\254)show oce}def +(==){gsave 0 .05 dm rmoveto(\272)show oce}def +(uc){gsave currentpoint 400 .009 dm mul add translate + 8 -8 scale ucseal oce}def +end + +% an attempt at a PostScript FONT to implement ditroff special chars +% this will enable us to +% cache the little buggers +% generate faster, more compact PS out of psdit +% confuse everyone (including myself)! +50 dict dup begin +/FontType 3 def +/FontName /DIThacks def +/FontMatrix [.001 0 0 .001 0 0] def +/FontBBox [-260 -260 900 900] def% a lie but ... +/Encoding 256 array def +0 1 255{Encoding exch /.notdef put}for +Encoding + dup 8#040/space put %space + dup 8#110/rc put %right ceil + dup 8#111/lt put %left top curl + dup 8#112/bv put %bold vert + dup 8#113/lk put %left mid curl + dup 8#114/lb put %left bot curl + dup 8#115/rt put %right top curl + dup 8#116/rk put %right mid curl + dup 8#117/rb put %right bot curl + dup 8#120/rf put %right floor + dup 8#121/lf put %left floor + dup 8#122/lc put %left ceil + dup 8#140/sq put %square + dup 8#141/bx put %box + dup 8#142/ci put %circle + dup 8#143/br put %box rule + dup 8#144/rn put %root extender + dup 8#145/vr put %vertical rule + dup 8#146/ob put %outline bullet + dup 8#147/bu put %bullet + dup 8#150/ru put %rule + dup 8#151/ul put %underline + pop +/DITfd 100 dict def +/BuildChar{0 begin + /cc exch def /fd exch def + /charname fd /Encoding get cc get def + /charwid fd /Metrics get charname get def + /charproc fd /CharProcs get charname get def + charwid 0 fd /FontBBox get aload pop setcachedevice + 2 setlinejoin 40 setlinewidth + newpath 0 0 moveto gsave charproc grestore + end}def +/BuildChar load 0 DITfd put +/CharProcs 50 dict def +CharProcs begin +/space{}def +/.notdef{}def +/ru{500 0 rls}def +/rn{0 840 moveto 500 0 rls}def +/vr{0 800 moveto 0 -770 rls}def +/bv{0 800 moveto 0 -1000 rls}def +/br{0 840 moveto 0 -1000 rls}def +/ul{0 -140 moveto 500 0 rls}def +/ob{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath stroke}def +/bu{200 250 rmoveto currentpoint newpath 200 0 360 arc closepath fill}def +/sq{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath stroke}def +/bx{80 0 rmoveto currentpoint dround newpath moveto + 640 0 rlineto 0 640 rlineto -640 0 rlineto closepath fill}def +/ci{500 360 rmoveto currentpoint newpath 333 0 360 arc + 50 setlinewidth stroke}def + +/lt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 add exch s4 a4p stroke}def +/lb{0 800 moveto 0 -550 rlineto currx -200 2cx s4 add exch s4 a4p stroke}def +/rt{0 -200 moveto 0 550 rlineto currx 800 2cx s4 sub exch s4 a4p stroke}def +/rb{0 800 moveto 0 -500 rlineto currx -200 2cx s4 sub exch s4 a4p stroke}def +/lk{0 800 moveto 0 300 -300 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/rk{0 800 moveto 0 300 s2 300 s4 arcto pop pop 1000 sub + 0 300 4 2 roll s4 a4p 0 -200 lineto stroke}def +/lf{0 800 moveto 0 -1000 rlineto s4 0 rls}def +/rf{0 800 moveto 0 -1000 rlineto s4 neg 0 rls}def +/lc{0 -200 moveto 0 1000 rlineto s4 0 rls}def +/rc{0 -200 moveto 0 1000 rlineto s4 neg 0 rls}def +end + +/Metrics 50 dict def Metrics begin +/.notdef 0 def +/space 500 def +/ru 500 def +/br 0 def +/lt 416 def +/lb 416 def +/rt 416 def +/rb 416 def +/lk 416 def +/rk 416 def +/rc 416 def +/lc 416 def +/rf 416 def +/lf 416 def +/bv 416 def +/ob 350 def +/bu 350 def +/ci 750 def +/bx 750 def +/sq 750 def +/rn 500 def +/ul 500 def +/vr 0 def +end + +DITfd begin +/s2 500 def /s4 250 def /s3 333 def +/a4p{arcto pop pop pop pop}def +/2cx{2 copy exch}def +/rls{rlineto stroke}def +/currx{currentpoint pop}def +/dround{transform round exch round exch itransform} def +end +end +/DIThacks exch definefont pop +ditstart +(psc)xT +576 1 1 xr +1(Times-Roman)xf 1 f +2(Times-Italic)xf 2 f +3(Times-Bold)xf 3 f +4(Times-BoldItalic)xf 4 f +5(Helvetica)xf 5 f +6(Helvetica-Bold)xf 6 f +7(Courier)xf 7 f +8(Courier-Bold)xf 8 f +9(Symbol)xf 9 f +10(DIThacks)xf 10 f +10 s +1 f +xi +%%EndProlog + +%%Page: 1 1 +10 s 10 xH 0 xS 1 f +3 f +14 s +1205 1206(LIBTP:)N +1633(Portable,)X +2100(M)X +2206(odular)X +2551(Transactions)X +3202(for)X +3374(UNIX)X +1 f +11 s +3661 1162(1)N +2 f +12 s +2182 1398(Margo)N +2467(Seltzer)X +2171 1494(Michael)N +2511(Olson)X +1800 1590(University)N +2225(of)X +2324(California,)X +2773(Berkeley)X +3 f +2277 1878(Abstract)N +1 f +10 s +755 2001(Transactions)N +1198(provide)X +1475(a)X +1543(useful)X +1771(programming)X +2239(paradigm)X +2574(for)X +2700(maintaining)X +3114(logical)X +3364(consistency,)X +3790(arbitrating)X +4156(con-)X +555 2091(current)N +808(access,)X +1059(and)X +1200(managing)X +1540(recovery.)X +1886(In)X +1977(traditional)X +2330(UNIX)X +2555(systems,)X +2852(the)X +2974(only)X +3140(easy)X +3307(way)X +3465(of)X +3556(using)X +3753(transactions)X +4160(is)X +4237(to)X +555 2181(purchase)N +876(a)X +947(database)X +1258(system.)X +1554(Such)X +1748(systems)X +2035(are)X +2168(often)X +2367(slow,)X +2572(costly,)X +2817(and)X +2967(may)X +3139(not)X +3275(provide)X +3554(the)X +3686(exact)X +3890(functionality)X +555 2271(desired.)N +848(This)X +1011(paper)X +1210(presents)X +1493(the)X +1611(design,)X +1860(implementation,)X +2402(and)X +2538(performance)X +2965(of)X +3052(LIBTP,)X +3314(a)X +3370(simple,)X +3623(non-proprietary)X +4147(tran-)X +555 2361(saction)N +809(library)X +1050(using)X +1249(the)X +1373(4.4BSD)X +1654(database)X +1957(access)X +2189(routines)X +2473(\()X +3 f +2500(db)X +1 f +2588(\(3\)\).)X +2775(On)X +2899(a)X +2961(conventional)X +3401(transaction)X +3779(processing)X +4148(style)X +555 2451(benchmark,)N +959(its)X +1061(performance)X +1495(is)X +1575(approximately)X +2065(85%)X +2239(that)X +2386(of)X +2480(the)X +2604(database)X +2907(access)X +3139(routines)X +3423(without)X +3693(transaction)X +4071(protec-)X +555 2541(tion,)N +725(200%)X +938(that)X +1084(of)X +1177(using)X +3 f +1376(fsync)X +1 f +1554(\(2\))X +1674(to)X +1761(commit)X +2030(modi\256cations)X +2490(to)X +2577(disk,)X +2755(and)X +2896(125%)X +3108(that)X +3253(of)X +3345(a)X +3406(commercial)X +3810(relational)X +4138(data-)X +555 2631(base)N +718(system.)X +3 f +555 2817(1.)N +655(Introduction)X +1 f +755 2940(Transactions)N +1186(are)X +1306(used)X +1474(in)X +1557(database)X +1855(systems)X +2129(to)X +2212(enable)X +2443(concurrent)X +2807(users)X +2992(to)X +3074(apply)X +3272(multi-operation)X +3790(updates)X +4055(without)X +555 3030(violating)N +863(the)X +985(integrity)X +1280(of)X +1371(the)X +1493(database.)X +1814(They)X +2003(provide)X +2271(the)X +2392(properties)X +2736(of)X +2826(atomicity,)X +3171(consistency,)X +3588(isolation,)X +3906(and)X +4045(durabil-)X +555 3120(ity.)N +701(By)X +816(atomicity,)X +1160(we)X +1276(mean)X +1472(that)X +1614(the)X +1734(set)X +1845(of)X +1934(updates)X +2200(comprising)X +2581(a)X +2638(transaction)X +3011(must)X +3187(be)X +3284(applied)X +3541(as)X +3629(a)X +3686(single)X +3898(unit;)X +4085(that)X +4226(is,)X +555 3210(they)N +714(must)X +890(either)X +1094(all)X +1195(be)X +1292(applied)X +1549(to)X +1632(the)X +1751(database)X +2049(or)X +2137(all)X +2238(be)X +2335(absent.)X +2601(Consistency)X +3013(requires)X +3293(that)X +3434(a)X +3491(transaction)X +3864(take)X +4019(the)X +4138(data-)X +555 3300(base)N +725(from)X +908(one)X +1051(logically)X +1358(consistent)X +1704(state)X +1877(to)X +1965(another.)X +2272(The)X +2423(property)X +2721(of)X +2814(isolation)X +3115(requires)X +3400(that)X +3546(concurrent)X +3916(transactions)X +555 3390(yield)N +750(results)X +994(which)X +1225(are)X +1358(indistinguishable)X +1938(from)X +2128(the)X +2260(results)X +2503(which)X +2733(would)X +2967(be)X +3077(obtained)X +3387(by)X +3501(running)X +3784(the)X +3916(transactions)X +555 3480(sequentially.)N +1002(Finally,)X +1268(durability)X +1599(requires)X +1878(that)X +2018(once)X +2190(transactions)X +2593(have)X +2765(been)X +2937(committed,)X +3319(their)X +3486(results)X +3715(must)X +3890(be)X +3986(preserved)X +555 3570(across)N +776(system)X +1018(failures)X +1279([TPCB90].)X +755 3693(Although)N +1080(these)X +1268(properties)X +1612(are)X +1734(most)X +1912(frequently)X +2265(discussed)X +2595(in)X +2680(the)X +2801(context)X +3060(of)X +3150(databases,)X +3501(they)X +3661(are)X +3782(useful)X +4000(program-)X +555 3783(ming)N +750(paradigms)X +1114(for)X +1238(more)X +1433(general)X +1700(purpose)X +1984(applications.)X +2441(There)X +2659(are)X +2788(several)X +3046(different)X +3353(situations)X +3689(where)X +3916(transactions)X +555 3873(can)N +687(be)X +783(used)X +950(to)X +1032(replace)X +1285(current)X +1533(ad-hoc)X +1772(mechanisms.)X +755 3996(One)N +910(situation)X +1206(is)X +1280(when)X +1475(multiple)X +1762(\256les)X +1916(or)X +2004(parts)X +2181(of)X +2269(\256les)X +2422(need)X +2594(to)X +2676(be)X +2772(updated)X +3046(in)X +3128(an)X +3224(atomic)X +3462(fashion.)X +3758(For)X +3889(example,)X +4201(the)X +555 4086(traditional)N +907(UNIX)X +1131(\256le)X +1256(system)X +1501(uses)X +1661(ordering)X +1955(constraints)X +2324(to)X +2408(achieve)X +2676(recoverability)X +3144(in)X +3228(the)X +3348(face)X +3505(of)X +3594(crashes.)X +3893(When)X +4107(a)X +4165(new)X +555 4176(\256le)N +678(is)X +752(created,)X +1026(its)X +1122(inode)X +1321(is)X +1395(written)X +1642(to)X +1724(disk)X +1877(before)X +2103(the)X +2221(new)X +2375(\256le)X +2497(is)X +2570(added)X +2782(to)X +2864(the)X +2982(directory)X +3292(structure.)X +3633(This)X +3795(guarantees)X +4159(that,)X +555 4266(if)N +627(the)X +748(system)X +993(crashes)X +1253(between)X +1544(the)X +1665(two)X +1808(I/O's,)X +2016(the)X +2137(directory)X +2450(does)X +2620(not)X +2744(contain)X +3002(a)X +3060 0.4531(reference)AX +3383(to)X +3467(an)X +3565(invalid)X +3809(inode.)X +4049(In)X +4138(actu-)X +555 4356(ality,)N +741(the)X +863(desired)X +1119(effect)X +1326(is)X +1402(that)X +1545(these)X +1733(two)X +1876(updates)X +2144(have)X +2319(the)X +2440(transactional)X +2873(property)X +3168(of)X +3258(atomicity)X +3583(\(either)X +3816(both)X +3981(writes)X +4200(are)X +555 4446(visible)N +790(or)X +879(neither)X +1124(is\).)X +1266(Rather)X +1501(than)X +1660(building)X +1947(special)X +2191(purpose)X +2466(recovery)X +2769(mechanisms)X +3186(into)X +3331(the)X +3450(\256le)X +3573(system)X +3816(or)X +3904(related)X +4144(tools)X +555 4536(\()N +2 f +582(e.g.)X +3 f +726(fsck)X +1 f +864(\(8\)\),)X +1033(one)X +1177(could)X +1383(use)X +1518(general)X +1783(purpose)X +2064(transaction)X +2443(recovery)X +2752(protocols)X +3077(after)X +3252(system)X +3501(failure.)X +3778(Any)X +3943(application)X +555 4626(that)N +705(needs)X +918(to)X +1010(keep)X +1192(multiple,)X +1508(related)X +1757(\256les)X +1920(\(or)X +2044(directories\))X +2440(consistent)X +2790(should)X +3032(do)X +3141(so)X +3241(using)X +3443(transactions.)X +3895(Source)X +4147(code)X +555 4716(control)N +805(systems,)X +1101(such)X +1271(as)X +1361(RCS)X +1534(and)X +1673(SCCS,)X +1910(should)X +2146(use)X +2276(transaction)X +2651(semantics)X +2990(to)X +3075(allow)X +3276(the)X +3397(``checking)X +3764(in'')X +3903(of)X +3992(groups)X +4232(of)X +555 4806(related)N +801(\256les.)X +1001(In)X +1095(this)X +1237(way,)X +1418(if)X +1493(the)X +1617 0.2841(``check-in'')AX +2028(fails,)X +2212(the)X +2336(transaction)X +2714(may)X +2878(be)X +2980(aborted,)X +3267(backing)X +3547(out)X +3675(the)X +3799(partial)X +4030(``check-)X +555 4896(in'')N +691(leaving)X +947(the)X +1065(source)X +1295(repository)X +1640(in)X +1722(a)X +1778(consistent)X +2118(state.)X +755 5019(A)N +842(second)X +1094(situation)X +1398(where)X +1624(transactions)X +2036(can)X +2177(be)X +2282(used)X +2458(to)X +2549(replace)X +2811(current)X +3068(ad-hoc)X +3316(mechanisms)X +3741(is)X +3822(in)X +3912(applications)X +555 5109(where)N +776(concurrent)X +1144(updates)X +1413(to)X +1499(a)X +1559(shared)X +1793(\256le)X +1919(are)X +2042(desired,)X +2318(but)X +2444(there)X +2629(is)X +2706(logical)X +2948(consistency)X +3345(of)X +3435(the)X +3556(data)X +3713(which)X +3932(needs)X +4138(to)X +4223(be)X +555 5199(preserved.)N +928(For)X +1059(example,)X +1371(when)X +1565(the)X +1683(password)X +2006(\256le)X +2128(is)X +2201(updated,)X +2495(\256le)X +2617(locking)X +2877(is)X +2950(used)X +3117(to)X +3199(disallow)X +3490(concurrent)X +3854(access.)X +4120(Tran-)X +555 5289(saction)N +804(semantics)X +1142(on)X +1244(the)X +1364(password)X +1689(\256les)X +1844(would)X +2066(allow)X +2266(concurrent)X +2632(updates,)X +2919(while)X +3119(preserving)X +3479(the)X +3598(logical)X +3837(consistency)X +4232(of)X +555 5379(the)N +681(password)X +1012(database.)X +1357(Similarly,)X +1702(UNIX)X +1930(utilities)X +2196(which)X +2419(rewrite)X +2674(\256les)X +2834(face)X +2996(a)X +3059(potential)X +3366(race)X +3528(condition)X +3857(between)X +4152(their)X +555 5469(rewriting)N +871(a)X +929(\256le)X +1053(and)X +1191(another)X +1453(process)X +1715(reading)X +1977(the)X +2096(\256le.)X +2259(For)X +2391(example,)X +2704(the)X +2823(compiler)X +3129(\(more)X +3342(precisely,)X +3673(the)X +3792(assembler\))X +4161(may)X +8 s +10 f +555 5541(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5619(1)N +8 s +763 5644(To)N +850(appear)X +1035(in)X +1101(the)X +2 f +1195(Proceedings)X +1530(of)X +1596(the)X +1690(1992)X +1834(Winter)X +2024(Usenix)X +1 f +2201(,)X +2233(San)X +2345(Francisco,)X +2625(CA,)X +2746(January)X +2960(1992.)X + +2 p +%%Page: 2 2 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(have)N +737(to)X +829(rewrite)X +1087(a)X +1152(\256le)X +1283(to)X +1374(which)X +1599(it)X +1672(has)X +1808(write)X +2002(permission)X +2382(in)X +2473(a)X +2538(directory)X +2857(to)X +2948(which)X +3173(it)X +3246(does)X +3422(not)X +3553(have)X +3734(write)X +3928(permission.)X +555 720(While)N +779(the)X +904(``.o'')X +1099(\256le)X +1228(is)X +1308(being)X +1513(written,)X +1787(another)X +2055(utility)X +2272(such)X +2446(as)X +3 f +2540(nm)X +1 f +2651(\(1\))X +2772(or)X +3 f +2866(ar)X +1 f +2942(\(1\))X +3063(may)X +3228(read)X +3394(the)X +3519(\256le)X +3648(and)X +3791(produce)X +4077(invalid)X +555 810(results)N +790(since)X +981(the)X +1105(\256le)X +1233(has)X +1366(not)X +1494(been)X +1672(completely)X +2054(written.)X +2347(Currently,)X +2700(some)X +2895(utilities)X +3160(use)X +3293(special)X +3542(purpose)X +3821(code)X +3998(to)X +4085(handle)X +555 900(such)N +722(cases)X +912(while)X +1110(others)X +1326(ignore)X +1551(the)X +1669(problem)X +1956(and)X +2092(force)X +2278(users)X +2463(to)X +2545(live)X +2685(with)X +2847(the)X +2965(consequences.)X +755 1023(In)N +845(this)X +983(paper,)X +1205(we)X +1322(present)X +1577(a)X +1635(simple)X +1870(library)X +2106(which)X +2324(provides)X +2622(transaction)X +2996(semantics)X +3334(\(atomicity,)X +3705(consistency,)X +4121(isola-)X +555 1113(tion,)N +720(and)X +857(durability\).)X +1236(The)X +1382(4.4BSD)X +1658(database)X +1956(access)X +2182(methods)X +2473(have)X +2645(been)X +2817(modi\256ed)X +3121(to)X +3203(use)X +3330(this)X +3465(library,)X +3719(optionally)X +4063(provid-)X +555 1203(ing)N +682(shared)X +917(buffer)X +1139(management)X +1574(between)X +1867(applications,)X +2298(locking,)X +2582(and)X +2722(transaction)X +3098(semantics.)X +3478(Any)X +3640(UNIX)X +3865(program)X +4161(may)X +555 1293(transaction)N +930(protect)X +1176(its)X +1274(data)X +1430(by)X +1532(requesting)X +1888(transaction)X +2262(protection)X +2609(with)X +2773(the)X +3 f +2893(db)X +1 f +2981(\(3\))X +3097(library)X +3333(or)X +3422(by)X +3524(adding)X +3764(appropriate)X +4152(calls)X +555 1383(to)N +646(the)X +773(transaction)X +1154(manager,)X +1480(buffer)X +1706(manager,)X +2032(lock)X +2199(manager,)X +2525(and)X +2670(log)X +2801(manager.)X +3147(The)X +3301(library)X +3543(routines)X +3829(may)X +3995(be)X +4099(linked)X +555 1473(into)N +708(the)X +834(host)X +995(application)X +1379(and)X +1523(called)X +1743(by)X +1851(subroutine)X +2217(interface,)X +2547(or)X +2642(they)X +2808(may)X +2974(reside)X +3194(in)X +3284(a)X +3348(separate)X +3640(server)X +3865(process.)X +4174(The)X +555 1563(server)N +772(architecture)X +1172(provides)X +1468(for)X +1582(network)X +1865(access)X +2091(and)X +2227(better)X +2430(protection)X +2775(mechanisms.)X +3 f +555 1749(2.)N +655(Related)X +938(Work)X +1 f +755 1872(There)N +1000(has)X +1164(been)X +1373(much)X +1608(discussion)X +1998(in)X +2117(recent)X +2371(years)X +2597(about)X +2831(new)X +3021(transaction)X +3429(models)X +3716(and)X +3888(architectures)X +555 1962 0.1172([SPEC88][NODI90][CHEN91][MOHA91].)AN +2009(Much)X +2220(of)X +2310(this)X +2448(work)X +2636(focuses)X +2900(on)X +3003(new)X +3160(ways)X +3348(to)X +3433(model)X +3656(transactions)X +4062(and)X +4201(the)X +555 2052(interactions)N +953(between)X +1245(them,)X +1449(while)X +1651(the)X +1772(work)X +1960(presented)X +2291(here)X +2453(focuses)X +2717(on)X +2820(the)X +2941(implementation)X +3466(and)X +3605(performance)X +4035(of)X +4125(tradi-)X +555 2142(tional)N +757(transaction)X +1129(techniques)X +1492(\(write-ahead)X +1919(logging)X +2183(and)X +2319(two-phase)X +2669(locking\))X +2956(on)X +3056(a)X +3112(standard)X +3404(operating)X +3727(system)X +3969(\(UNIX\).)X +755 2265(Such)N +947(traditional)X +1308(operating)X +1643(systems)X +1928(are)X +2059(often)X +2256(criticized)X +2587(for)X +2713(their)X +2892(inability)X +3190(to)X +3283(perform)X +3573(transaction)X +3956(processing)X +555 2355(adequately.)N +971([STON81])X +1342(cites)X +1517(three)X +1706(main)X +1894(areas)X +2088(of)X +2183(inadequate)X +2559(support:)X +2849(buffer)X +3074(management,)X +3532(the)X +3658(\256le)X +3788(system,)X +4058(and)X +4201(the)X +555 2445(process)N +823(structure.)X +1191(These)X +1410(arguments)X +1771(are)X +1897(summarized)X +2316(in)X +2405(table)X +2587(one.)X +2769(Fortunately,)X +3184(much)X +3388(has)X +3521(changed)X +3815(since)X +4006(1981.)X +4232(In)X +555 2535(the)N +683(area)X +848(of)X +945(buffer)X +1172(management,)X +1632(most)X +1817(UNIX)X +2048(systems)X +2331(provide)X +2606(the)X +2734(ability)X +2968(to)X +3060(memory)X +3357(map)X +3525(\256les,)X +3708(thus)X +3870(obviating)X +4201(the)X +555 2625(need)N +734(for)X +855(a)X +918(copy)X +1101(between)X +1396(kernel)X +1624(and)X +1766(user)X +1926(space.)X +2171(If)X +2251(a)X +2313(database)X +2616(system)X +2864(is)X +2943(going)X +3151(to)X +3239(use)X +3372(the)X +3496(\256le)X +3624(system)X +3872(buffer)X +4095(cache,)X +555 2715(then)N +719(a)X +781(system)X +1029(call)X +1171(is)X +1250(required.)X +1584(However,)X +1924(if)X +1998(buffering)X +2322(is)X +2400(provided)X +2710(at)X +2793(user)X +2952(level)X +3133(using)X +3331(shared)X +3566(memory,)X +3878(as)X +3970(in)X +4057(LIBTP,)X +555 2805(buffer)N +776(management)X +1210(is)X +1287(only)X +1452(as)X +1542(slow)X +1716(as)X +1806(access)X +2035(to)X +2120(shared)X +2353(memory)X +2643(and)X +2782(any)X +2921(replacement)X +3337(algorithm)X +3671(may)X +3832(be)X +3931(used.)X +4121(Since)X +555 2895(multiple)N +849(processes)X +1185(can)X +1325(access)X +1559(the)X +1685(shared)X +1923(data,)X +2105(prefetching)X +2499(may)X +2665(be)X +2769(accomplished)X +3238(by)X +3346(separate)X +3638(processes)X +3973(or)X +4067(threads)X +555 2985(whose)N +782(sole)X +932(purpose)X +1207(is)X +1281(to)X +1364(prefetch)X +1649(pages)X +1853(and)X +1990(wait)X +2149(on)X +2250(them.)X +2471(There)X +2680(is)X +2754(still)X +2894(no)X +2995(way)X +3150(to)X +3233(enforce)X +3496(write)X +3682(ordering)X +3975(other)X +4161(than)X +555 3075(keeping)N +829(pages)X +1032(in)X +1114(user)X +1268(memory)X +1555(and)X +1691(using)X +1884(the)X +3 f +2002(fsync)X +1 f +2180(\(3\))X +2294(system)X +2536(call)X +2672(to)X +2754(perform)X +3033(synchronous)X +3458(writes.)X +755 3198(In)N +845(the)X +966(area)X +1124(of)X +1214(\256le)X +1339(systems,)X +1635(the)X +1756(fast)X +1895(\256le)X +2020(system)X +2265(\(FFS\))X +2474([MCKU84])X +2871(allows)X +3103(allocation)X +3442(in)X +3527(units)X +3704(up)X +3806(to)X +3890(64KBytes)X +4232(as)X +555 3288(opposed)N +846(to)X +932(the)X +1054(4KByte)X +1327(and)X +1466(8KByte)X +1738(\256gures)X +1979(quoted)X +2220(in)X +2305([STON81].)X +2711(The)X +2859(measurements)X +3341(in)X +3426(this)X +3564(paper)X +3766(were)X +3946(taken)X +4143(from)X +555 3378(an)N +655(8KByte)X +928(FFS,)X +1104(but)X +1230(as)X +1320(LIBTP)X +1565(runs)X +1726(exclusively)X +2114(in)X +2199(user)X +2356(space,)X +2578(there)X +2762(is)X +2838(nothing)X +3105(to)X +3190(prevent)X +3454(it)X +3521(from)X +3700(being)X +3901(run)X +4031(on)X +4134(other)X +555 3468(UNIX)N +776(compatible)X +1152(\256le)X +1274(systems)X +1547(\(e.g.)X +1710(log-structured)X +2180([ROSE91],)X +2558(extent-based,)X +3004(or)X +3091(multi-block)X +3484([SELT91]\).)X +755 3591(Finally,)N +1029(with)X +1199(regard)X +1433(to)X +1523(the)X +1648(process)X +1916(structure,)X +2244(neither)X +2494(context)X +2757(switch)X +2993(time)X +3162(nor)X +3296(scheduling)X +3670(around)X +3920(semaphores)X +555 3681(seems)N +785(to)X +881(affect)X +1099(the)X +1231(system)X +1487(performance.)X +1968(However,)X +2317(the)X +2449(implementation)X +2984(of)X +3084(semaphores)X +3496(can)X +3641(impact)X +3892(performance)X +555 3771(tremendously.)N +1051(This)X +1213(is)X +1286(discussed)X +1613(in)X +1695(more)X +1880(detail)X +2078(in)X +2160(section)X +2407(4.3.)X +755 3894(The)N +908(Tuxedo)X +1181(system)X +1431(from)X +1615(AT&T)X +1861(is)X +1941(a)X +2004(transaction)X +2383(manager)X +2687(which)X +2910(coordinates)X +3307(distributed)X +3676(transaction)X +4055(commit)X +555 3984(from)N +738(a)X +801(variety)X +1051(of)X +1145(different)X +1449(local)X +1632(transaction)X +2011(managers.)X +2386(At)X +2493(this)X +2634(time,)X +2822(LIBTP)X +3070(does)X +3243(not)X +3371(have)X +3549(its)X +3650(own)X +3814(mechanism)X +4205(for)X +555 4074(distributed)N +942(commit)X +1231(processing,)X +1639(but)X +1786(could)X +2009(be)X +2130(used)X +2322(as)X +2434(a)X +2515(local)X +2716(transaction)X +3113(agent)X +3331(by)X +3455(systems)X +3752(such)X +3943(as)X +4054(Tuxedo)X +555 4164([ANDR89].)N +10 f +863 4393(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +903 4483(Buffer)N +1133(Management)X +10 f +1672(g)X +1 f +1720(Data)X +1892(must)X +2067(be)X +2163(copied)X +2397(between)X +2685(kernel)X +2906(space)X +3105(and)X +3241(user)X +3395(space.)X +10 f +1672 4573(g)N +1 f +1720(Buffer)X +1950(pool)X +2112(access)X +2338(is)X +2411(too)X +2533(slow.)X +10 f +1672 4663(g)N +1 f +1720(There)X +1928(is)X +2001(no)X +2101(way)X +2255(to)X +2337(request)X +2589(prefetch.)X +10 f +1672 4753(g)N +1 f +1720(Replacement)X +2159(is)X +2232(usually)X +2483(LRU)X +2663(which)X +2879(may)X +3037(be)X +3133(suboptimal)X +3508(for)X +3622(databases.)X +10 f +1672 4843(g)N +1 f +1720(There)X +1928(is)X +2001(no)X +2101(way)X +2255(to)X +2337(guarantee)X +2670(write)X +2855(ordering.)X +10 f +863 4853(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +903 4943(File)N +1047(System)X +10 f +1672(g)X +1 f +1720(Allocation)X +2078(is)X +2151(done)X +2327(in)X +2409(small)X +2602(blocks)X +2831(\(usually)X +3109(4K)X +3227(or)X +3314(8K\).)X +10 f +1672 5033(g)N +1 f +1720(Logical)X +1985(organization)X +2406(of)X +2493(\256les)X +2646(is)X +2719(redundantly)X +3122(expressed.)X +10 f +863 5043(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +903 5133(Process)N +1168(Structure)X +10 f +1672(g)X +1 f +1720(Context)X +1993(switching)X +2324(and)X +2460(message)X +2752(passing)X +3012(are)X +3131(too)X +3253(slow.)X +10 f +1672 5223(g)N +1 f +1720(A)X +1798(process)X +2059(may)X +2217(be)X +2313(descheduled)X +2730(while)X +2928(holding)X +3192(a)X +3248(semaphore.)X +10 f +863 5233(i)N +870(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +863(c)X +5193(c)Y +5113(c)Y +5033(c)Y +4953(c)Y +4873(c)Y +4793(c)Y +4713(c)Y +4633(c)Y +4553(c)Y +4473(c)Y +3990 5233(c)N +5193(c)Y +5113(c)Y +5033(c)Y +4953(c)Y +4873(c)Y +4793(c)Y +4713(c)Y +4633(c)Y +4553(c)Y +4473(c)Y +3 f +1156 5446(Table)N +1371(One:)X +1560(Shortcomings)X +2051(of)X +2138(UNIX)X +2363(transaction)X +2770(support)X +3056(cited)X +3241(in)X +3327([STON81].)X + +3 p +%%Page: 3 3 +10 s 10 xH 0 xS 3 f +1 f +755 630(The)N +901(transaction)X +1274(architecture)X +1675(presented)X +2004(in)X +2087([YOUN91])X +2474(is)X +2548(very)X +2712(similar)X +2955(to)X +3038(that)X +3179(implemented)X +3618(in)X +3701(the)X +3820(LIBTP.)X +4103(While)X +555 720([YOUN91])N +947(presents)X +1236(a)X +1298(model)X +1524(for)X +1644(providing)X +1981(transaction)X +2359(services,)X +2663(this)X +2803(paper)X +3007(focuses)X +3273(on)X +3378(the)X +3501(implementation)X +4028(and)X +4169(per-)X +555 810(formance)N +881(of)X +970(a)X +1028(particular)X +1358(system.)X +1642(In)X +1731(addition,)X +2034(we)X +2149(provide)X +2415(detailed)X +2690(comparisons)X +3116(with)X +3279(alternative)X +3639(solutions:)X +3970(traditional)X +555 900(UNIX)N +776(services)X +1055(and)X +1191(commercial)X +1590(database)X +1887(management)X +2317(systems.)X +3 f +555 1086(3.)N +655(Architecture)X +1 f +755 1209(The)N +906(library)X +1146(is)X +1224(designed)X +1534(to)X +1621(provide)X +1891(well)X +2054(de\256ned)X +2315(interfaces)X +2653(to)X +2740(the)X +2863(services)X +3147(required)X +3440(for)X +3559(transaction)X +3936(processing.)X +555 1299(These)N +777(services)X +1066(are)X +1195(recovery,)X +1527(concurrency)X +1955(control,)X +2232(and)X +2378(the)X +2506(management)X +2946(of)X +3043(shared)X +3283(data.)X +3487(First)X +3663(we)X +3787(will)X +3941(discuss)X +4201(the)X +555 1389(design)N +795(tradeoffs)X +1112(in)X +1205(the)X +1334(selection)X +1650(of)X +1748(recovery,)X +2081(concurrency)X +2510(control,)X +2787(and)X +2933(buffer)X +3160(management)X +3600(implementations,)X +4183(and)X +555 1479(then)N +713(we)X +827(will)X +971(present)X +1223(the)X +1341(overall)X +1584(library)X +1818(architecture)X +2218(and)X +2354(module)X +2614(descriptions.)X +3 f +555 1665(3.1.)N +715(Design)X +966(Tradeoffs)X +1 f +3 f +555 1851(3.1.1.)N +775(Crash)X +1004(Recovery)X +1 f +755 1974(The)N +909(recovery)X +1220(protocol)X +1516(is)X +1598(responsible)X +1992(for)X +2115(providing)X +2455(the)X +2582(transaction)X +2963(semantics)X +3308(discussed)X +3644(earlier.)X +3919(There)X +4136(are)X +4263(a)X +555 2064(wide)N +739(range)X +946(of)X +1041(recovery)X +1351(protocols)X +1677(available)X +1995([HAER83],)X +2395(but)X +2525(we)X +2647(can)X +2786(crudely)X +3054(divide)X +3281(them)X +3468(into)X +3619(two)X +3766(main)X +3953(categories.)X +555 2154(The)N +706(\256rst)X +856(category)X +1159(records)X +1422(all)X +1528(modi\256cations)X +1989(to)X +2077(the)X +2201(database)X +2504(in)X +2592(a)X +2653(separate)X +2942(\256le,)X +3089(and)X +3230(uses)X +3393(this)X +3533(\256le)X +3660(\(log\))X +3841(to)X +3928(back)X +4105(out)X +4232(or)X +555 2244(reapply)N +825(these)X +1019(modi\256cations)X +1483(if)X +1561(a)X +1626(transaction)X +2007(aborts)X +2232(or)X +2328(the)X +2455(system)X +2706(crashes.)X +3012(We)X +3153(call)X +3298(this)X +3442(set)X +3560(the)X +3 f +3687(logging)X +3963(protocols)X +1 f +4279(.)X +555 2334(The)N +703(second)X +949(category)X +1249(avoids)X +1481(the)X +1602(use)X +1732(of)X +1822(a)X +1881(log)X +2006(by)X +2109(carefully)X +2418(controlling)X +2792(when)X +2989(data)X +3146(are)X +3268(written)X +3518(to)X +3603(disk.)X +3799(We)X +3934(call)X +4073(this)X +4210(set)X +555 2424(the)N +3 f +673(non-logging)X +1096(protocols)X +1 f +1412(.)X +755 2547(Non-logging)N +1185(protocols)X +1504(hold)X +1666(dirty)X +1837(buffers)X +2085(in)X +2167(main)X +2347(memory)X +2634(or)X +2721(temporary)X +3071(\256les)X +3224(until)X +3390(commit)X +3654(and)X +3790(then)X +3948(force)X +4134(these)X +555 2637(pages)N +769(to)X +862(disk)X +1026(at)X +1115(transaction)X +1498(commit.)X +1813(While)X +2040(we)X +2165(can)X +2308(use)X +2446(temporary)X +2807(\256les)X +2971(to)X +3064(hold)X +3237(dirty)X +3418(pages)X +3631(that)X +3781(may)X +3949(need)X +4131(to)X +4223(be)X +555 2727(evicted)N +810(from)X +988(memory)X +1277(during)X +1508(a)X +1566(long-running)X +2006(transaction,)X +2400(the)X +2520(only)X +2684(user-level)X +3023(mechanism)X +3410(to)X +3494(force)X +3682(pages)X +3887(to)X +3971(disk)X +4126(is)X +4201(the)X +3 f +555 2817(fsync)N +1 f +733(\(2\))X +850(system)X +1095(call.)X +1274(Unfortunately,)X +3 f +1767(fsync)X +1 f +1945(\(2\))X +2062(is)X +2138(an)X +2237(expensive)X +2581(system)X +2826(call)X +2965(in)X +3050(that)X +3193(it)X +3260(forces)X +3480(all)X +3583(pages)X +3789(of)X +3879(a)X +3938(\256le)X +4062(to)X +4146(disk,)X +555 2907(and)N +691(transactions)X +1094(that)X +1234(manage)X +1504(more)X +1689(than)X +1847(one)X +1983(\256le)X +2105(must)X +2280(issue)X +2460(one)X +2596(call)X +2732(per)X +2855(\256le.)X +755 3030(In)N +853(addition,)X +3 f +1166(fsync)X +1 f +1344(\(2\))X +1469(provides)X +1776(no)X +1887(way)X +2051(to)X +2143(control)X +2400(the)X +2528(order)X +2728(in)X +2820(which)X +3046(dirty)X +3227(pages)X +3440(are)X +3569(written)X +3826(to)X +3918(disk.)X +4121(Since)X +555 3120(non-logging)N +976(protocols)X +1304(must)X +1489(sometimes)X +1861(order)X +2061(writes)X +2287(carefully)X +2603([SULL92],)X +2987(they)X +3155(are)X +3284(dif\256cult)X +3567(to)X +3659(implement)X +4030(on)X +4139(Unix)X +555 3210(systems.)N +868(As)X +977(a)X +1033(result,)X +1251(we)X +1365(have)X +1537(chosen)X +1780(to)X +1862(implement)X +2224(a)X +2280(logging)X +2544(protocol.)X +755 3333(Logging)N +1050(protocols)X +1372(may)X +1534(be)X +1634(categorized)X +2029(based)X +2236(on)X +2340(how)X +2502(information)X +2904(is)X +2981(logged)X +3223(\(physically)X +3602(or)X +3692(logically\))X +4022(and)X +4161(how)X +555 3423(much)N +767(is)X +854(logged)X +1106(\(before)X +1373(images,)X +1654(after)X +1836(images)X +2097(or)X +2198(both\).)X +2441(In)X +3 f +2542(physical)X +2855(logging)X +1 f +3103(,)X +3157(images)X +3417(of)X +3517(complete)X +3844(physical)X +4144(units)X +555 3513(\(pages)N +786(or)X +874(buffers\))X +1150(are)X +1270(recorded,)X +1593(while)X +1792(in)X +3 f +1875(logical)X +2118(logging)X +1 f +2387(a)X +2444(description)X +2820(of)X +2907(the)X +3025(operation)X +3348(is)X +3421(recorded.)X +3763(Therefore,)X +4121(while)X +555 3603(we)N +675(may)X +839(record)X +1071(entire)X +1280(pages)X +1489(in)X +1577(a)X +1639(physical)X +1932(log,)X +2080(we)X +2200(need)X +2378(only)X +2546(record)X +2777(the)X +2900(records)X +3162(being)X +3365(modi\256ed)X +3674(in)X +3761(a)X +3822(logical)X +4065(log.)X +4232(In)X +555 3693(fact,)N +718(physical)X +1006(logging)X +1271(can)X +1404(be)X +1501(thought)X +1766(of)X +1854(as)X +1942(a)X +1999(special)X +2243(case)X +2403(of)X +2491(logical)X +2730(logging,)X +3015(since)X +3201(the)X +3320 0.3125(``records'')AX +3686(that)X +3827(we)X +3942(log)X +4065(in)X +4148(logi-)X +555 3783(cal)N +673(logging)X +941(might)X +1151(be)X +1251(physical)X +1542(pages.)X +1789(Since)X +1991(logical)X +2233(logging)X +2501(is)X +2578(both)X +2743(more)X +2931(space-ef\256cient)X +3423(and)X +3562(more)X +3750(general,)X +4030(we)X +4147(have)X +555 3873(chosen)N +798(it)X +862(for)X +976(our)X +1103(logging)X +1367(protocol.)X +755 3996(In)N +3 f +843(before-image)X +1315(logging)X +1 f +1563(,)X +1604(we)X +1719(log)X +1842(a)X +1899(copy)X +2076(of)X +2164(the)X +2283(data)X +2438(before)X +2665(the)X +2784(update,)X +3039(while)X +3238(in)X +3 f +3321(after-image)X +3739(logging)X +1 f +3987(,)X +4027(we)X +4141(log)X +4263(a)X +555 4086(copy)N +740(of)X +836(the)X +963(data)X +1126(after)X +1303(the)X +1429(update.)X +1711(If)X +1793(we)X +1915(log)X +2045(only)X +2215(before-images,)X +2723(then)X +2889(there)X +3078(is)X +3159(suf\256cient)X +3485(information)X +3891(in)X +3981(the)X +4107(log)X +4237(to)X +555 4176(allow)N +761(us)X +860(to)X +3 f +950(undo)X +1 f +1150(the)X +1276(transaction)X +1656(\(go)X +1791(back)X +1971(to)X +2061(the)X +2187(state)X +2361(represented)X +2759(by)X +2866(the)X +2991(before-image\).)X +3514(However,)X +3876(if)X +3952(the)X +4077(system)X +555 4266(crashes)N +814(and)X +952(a)X +1010(committed)X +1374(transaction's)X +1806(changes)X +2087(have)X +2261(not)X +2385(reached)X +2658(the)X +2778(disk,)X +2953(we)X +3068(have)X +3241(no)X +3342(means)X +3568(to)X +3 f +3651(redo)X +1 f +3828(the)X +3947(transaction)X +555 4356(\(reapply)N +849(the)X +973(updates\).)X +1311(Therefore,)X +1675(logging)X +1945(only)X +2113(before-images)X +2599(necessitates)X +3004(forcing)X +3262(dirty)X +3439(pages)X +3648(at)X +3732(commit)X +4002(time.)X +4210(As)X +555 4446(mentioned)N +913(above,)X +1145(forcing)X +1397(pages)X +1600(at)X +1678(commit)X +1942(is)X +2015(considered)X +2383(too)X +2505(costly.)X +755 4569(If)N +834(we)X +953(log)X +1080(only)X +1247(after-images,)X +1694(then)X +1857(there)X +2043(is)X +2121(suf\256cient)X +2444(information)X +2847(in)X +2934(the)X +3057(log)X +3184(to)X +3271(allow)X +3474(us)X +3570(to)X +3657(redo)X +3825(the)X +3947(transaction)X +555 4659(\(go)N +687(forward)X +967(to)X +1054(the)X +1177(state)X +1348(represented)X +1743(by)X +1847(the)X +1969(after-image\),)X +2411(but)X +2537(we)X +2655(do)X +2759(not)X +2885(have)X +3061(the)X +3183(information)X +3585(required)X +3877(to)X +3963(undo)X +4147(tran-)X +555 4749(sactions)N +845(which)X +1073(aborted)X +1346(after)X +1526(dirty)X +1709(pages)X +1924(were)X +2113(written)X +2372(to)X +2466(disk.)X +2670(Therefore,)X +3039(logging)X +3314(only)X +3487(after-images)X +3920(necessitates)X +555 4839(holding)N +819(all)X +919(dirty)X +1090(buffers)X +1338(in)X +1420(main)X +1600(memory)X +1887(until)X +2053(commit)X +2317(or)X +2404(writing)X +2655(them)X +2835(to)X +2917(a)X +2973(temporary)X +3323(\256le.)X +755 4962(Since)N +956(neither)X +1202(constraint)X +1541(\(forcing)X +1823(pages)X +2029(on)X +2132(commit)X +2399(or)X +2489(buffering)X +2811(pages)X +3016(until)X +3184(commit\))X +3477(was)X +3624(feasible,)X +3916(we)X +4032(chose)X +4237(to)X +555 5052(log)N +683(both)X +851(before)X +1083(and)X +1225(after)X +1399(images.)X +1672(The)X +1823(only)X +1991(remaining)X +2342(consideration)X +2800(is)X +2879(when)X +3079(changes)X +3363(get)X +3486(written)X +3738(to)X +3825(disk.)X +4023(Changes)X +555 5142(affect)N +764(both)X +931(data)X +1090(pages)X +1298(and)X +1438(the)X +1560(log.)X +1726(If)X +1804(the)X +1926(changed)X +2218(data)X +2376(page)X +2552(is)X +2629(written)X +2880(before)X +3110(the)X +3232(log)X +3358(page,)X +3554(and)X +3694(the)X +3816(system)X +4062(crashes)X +555 5232(before)N +787(the)X +911(log)X +1039(page)X +1217(is)X +1296(written,)X +1569(the)X +1693(log)X +1820(will)X +1969(contain)X +2230(insuf\256cient)X +2615(information)X +3018(to)X +3105(undo)X +3290(the)X +3413(change.)X +3706(This)X +3873(violates)X +4147(tran-)X +555 5322(saction)N +803(semantics,)X +1160(since)X +1346(some)X +1536(changed)X +1825(data)X +1980(pages)X +2184(may)X +2343(not)X +2466(have)X +2638(been)X +2810(written,)X +3077(and)X +3213(the)X +3331(database)X +3628(cannot)X +3862(be)X +3958(restored)X +4237(to)X +555 5412(its)N +650(pre-transaction)X +1152(state.)X +755 5535(The)N +914(log)X +1050(record)X +1290(describing)X +1658(an)X +1768(update)X +2016(must)X +2205(be)X +2315(written)X +2576(to)X +2672(stable)X +2893(storage)X +3159(before)X +3398(the)X +3529(modi\256ed)X +3846(page.)X +4071(This)X +4246(is)X +3 f +555 5625(write-ahead)N +992(logging)X +1 f +1240(.)X +1307(If)X +1388(log)X +1517(records)X +1781(are)X +1907(safely)X +2126(written)X +2380(to)X +2469(disk,)X +2649(data)X +2810(pages)X +3020(may)X +3185(be)X +3288(written)X +3542(at)X +3627(any)X +3770(time)X +3939(afterwards.)X +555 5715(This)N +721(means)X +950(that)X +1094(the)X +1216(only)X +1382(\256le)X +1508(that)X +1652(ever)X +1815(needs)X +2022(to)X +2108(be)X +2208(forced)X +2438(to)X +2524(disk)X +2681(is)X +2758(the)X +2880(log.)X +3046(Since)X +3248(the)X +3370(log)X +3495(is)X +3571(append-only,)X +4015(modi\256ed)X + +4 p +%%Page: 4 4 +10 s 10 xH 0 xS 1 f +3 f +1 f +555 630(pages)N +760(always)X +1005(appear)X +1242(at)X +1322(the)X +1442(end)X +1580(and)X +1718(may)X +1878(be)X +1976(written)X +2224(to)X +2307(disk)X +2461(ef\256ciently)X +2807(in)X +2890(any)X +3027(\256le)X +3150(system)X +3393(that)X +3534(favors)X +3756(sequential)X +4102(order-)X +555 720(ing)N +677(\()X +2 f +704(e.g.)X +1 f +820(,)X +860(FFS,)X +1032(log-structured)X +1502(\256le)X +1624(system,)X +1886(or)X +1973(an)X +2069(extent-based)X +2495(system\).)X +3 f +555 906(3.1.2.)N +775(Concurrency)X +1245(Control)X +1 f +755 1029(The)N +918(concurrency)X +1354(control)X +1619(protocol)X +1923(is)X +2013(responsible)X +2415(for)X +2546(maintaining)X +2965(consistency)X +3376(in)X +3475(the)X +3610(presence)X +3929(of)X +4033(multiple)X +555 1119(accesses.)N +897(There)X +1114(are)X +1242(several)X +1499(alternative)X +1867(solutions)X +2183(such)X +2358(as)X +2453(locking,)X +2741(optimistic)X +3088(concurrency)X +3514(control)X +3769([KUNG81],)X +4183(and)X +555 1209(timestamp)N +912(ordering)X +1208([BERN80].)X +1619(Since)X +1821(optimistic)X +2164(methods)X +2459(and)X +2599(timestamp)X +2956(ordering)X +3252(are)X +3374(generally)X +3696(more)X +3884(complex)X +4183(and)X +555 1299(restrict)N +804(concurrency)X +1228(without)X +1498(eliminating)X +1888(starvation)X +2230(or)X +2323(deadlocks,)X +2690(we)X +2810(chose)X +3018(two-phase)X +3373(locking)X +3638(\(2PL\).)X +3890(Strict)X +4088(2PL)X +4246(is)X +555 1389(suboptimal)N +935(for)X +1054(certain)X +1297(data)X +1455(structures)X +1791(such)X +1962(as)X +2053(B-trees)X +2309(because)X +2588(it)X +2656(can)X +2792(limit)X +2966(concurrency,)X +3408(so)X +3503(we)X +3621(use)X +3752(a)X +3812(special)X +4059(locking)X +555 1479(protocol)N +842(based)X +1045(on)X +1145(one)X +1281(described)X +1609(in)X +1691([LEHM81].)X +755 1602(The)N +901(B-tree)X +1123(locking)X +1384(protocol)X +1672(we)X +1787(implemented)X +2226(releases)X +2502(locks)X +2691(at)X +2769(internal)X +3034(nodes)X +3241(in)X +3323(the)X +3441(tree)X +3582(as)X +3669(it)X +3733(descends.)X +4083(A)X +4161(lock)X +555 1692(on)N +658(an)X +757(internal)X +1025(page)X +1200(is)X +1276(always)X +1522(released)X +1808(before)X +2036(a)X +2094(lock)X +2254(on)X +2356(its)X +2453(child)X +2635(is)X +2710(obtained)X +3008(\(that)X +3177(is,)X +3272(locks)X +3463(are)X +3584(not)X +3 f +3708(coupled)X +1 f +3996([BAY77])X +555 1782(during)N +786(descent\).)X +1116(When)X +1330(a)X +1388(leaf)X +1531(\(or)X +1647(internal\))X +1941(page)X +2115(is)X +2190(split,)X +2369(a)X +2427(write)X +2614(lock)X +2774(is)X +2849(acquired)X +3148(on)X +3250(the)X +3370(parent)X +3593(before)X +3821(the)X +3941(lock)X +4100(on)X +4201(the)X +555 1872(just-split)N +855(page)X +1028(is)X +1102(released)X +1387(\(locks)X +1604(are)X +3 f +1724(coupled)X +1 f +2011(during)X +2241(ascent\).)X +2530(Write)X +2734(locks)X +2924(on)X +3025(internal)X +3291(pages)X +3495(are)X +3615(released)X +3899(immediately)X +555 1962(after)N +723(the)X +841(page)X +1013(is)X +1086(updated,)X +1380(but)X +1502(locks)X +1691(on)X +1791(leaf)X +1932(pages)X +2135(are)X +2254(held)X +2412(until)X +2578(the)X +2696(end)X +2832(of)X +2919(the)X +3037(transaction.)X +755 2085(Since)N +964(locks)X +1164(are)X +1294(released)X +1589(during)X +1828(descent,)X +2119(the)X +2247(structure)X +2558(of)X +2655(the)X +2783(tree)X +2934(may)X +3102(change)X +3360(above)X +3582(a)X +3648(node)X +3834(being)X +4042(used)X +4219(by)X +555 2175(some)N +752(process.)X +1061(If)X +1143(that)X +1291(process)X +1560(must)X +1743(later)X +1914(ascend)X +2161(the)X +2287(tree)X +2435(because)X +2717(of)X +2811(a)X +2874(page)X +3053(split,)X +3237(any)X +3380(such)X +3554(change)X +3809(must)X +3991(not)X +4120(cause)X +555 2265(confusion.)N +938(We)X +1077(use)X +1211(the)X +1336(technique)X +1675(described)X +2010(in)X +2099([LEHM81])X +2487(which)X +2710(exploits)X +2989(the)X +3113(ordering)X +3411(of)X +3504(data)X +3664(on)X +3770(a)X +3832(B-tree)X +4059(page)X +4237(to)X +555 2355(guarantee)N +888(that)X +1028(no)X +1128(process)X +1389(ever)X +1548(gets)X +1697(lost)X +1832(as)X +1919(a)X +1975(result)X +2173(of)X +2260(internal)X +2525(page)X +2697(updates)X +2962(made)X +3156(by)X +3256(other)X +3441(processes.)X +755 2478(If)N +836(a)X +899(transaction)X +1278(that)X +1425(updates)X +1697(a)X +1760(B-tree)X +1988(aborts,)X +2231(the)X +2356(user-visible)X +2757(changes)X +3043(to)X +3131(the)X +3255(tree)X +3402(must)X +3583(be)X +3685(rolled)X +3898(back.)X +4116(How-)X +555 2568(ever,)N +735(changes)X +1015(to)X +1097(the)X +1215(internal)X +1480(nodes)X +1687(of)X +1774(the)X +1892(tree)X +2033(need)X +2205(not)X +2327(be)X +2423(rolled)X +2630(back,)X +2822(since)X +3007(these)X +3192(pages)X +3395(contain)X +3651(no)X +3751(user-visible)X +4145(data.)X +555 2658(When)N +771(rolling)X +1008(back)X +1184(a)X +1244(transaction,)X +1640(we)X +1758(roll)X +1893(back)X +2069(all)X +2173(leaf)X +2318(page)X +2494(updates,)X +2783(but)X +2909(no)X +3013(internal)X +3281(insertions)X +3615(or)X +3705(page)X +3880(splits.)X +4111(In)X +4201(the)X +555 2748(worst)N +759(case,)X +944(this)X +1085(will)X +1235(leave)X +1431(a)X +1493(leaf)X +1640(page)X +1818(less)X +1964(than)X +2128(half)X +2279(full.)X +2456(This)X +2624(may)X +2788(cause)X +2993(poor)X +3166(space)X +3371(utilization,)X +3741(but)X +3869(does)X +4042(not)X +4170(lose)X +555 2838(user)N +709(data.)X +755 2961(Holding)N +1038(locks)X +1228(on)X +1329(leaf)X +1471(pages)X +1675(until)X +1842(transaction)X +2215(commit)X +2480(guarantees)X +2845(that)X +2986(no)X +3087(other)X +3273(process)X +3535(can)X +3668(insert)X +3866(or)X +3953(delete)X +4165(data)X +555 3051(that)N +711(has)X +854(been)X +1042(touched)X +1332(by)X +1448(this)X +1598(process.)X +1914(Rolling)X +2188(back)X +2375(insertions)X +2721(and)X +2872(deletions)X +3196(on)X +3311(leaf)X +3467(pages)X +3685(guarantees)X +4064(that)X +4219(no)X +555 3141(aborted)N +819(updates)X +1087(are)X +1209(ever)X +1371(visible)X +1607(to)X +1692(other)X +1880(transactions.)X +2326(Leaving)X +2612(page)X +2787(splits)X +2978(intact)X +3179(permits)X +3442(us)X +3536(to)X +3621(release)X +3867(internal)X +4134(write)X +555 3231(locks)N +744(early.)X +965(Thus)X +1145(transaction)X +1517(semantics)X +1853(are)X +1972(preserved,)X +2325(and)X +2461(locks)X +2650(are)X +2769(held)X +2927(for)X +3041(shorter)X +3284(periods.)X +755 3354(The)N +901(extra)X +1083(complexity)X +1464(introduced)X +1828(by)X +1929(this)X +2065(locking)X +2326(protocol)X +2614(appears)X +2881(substantial,)X +3264(but)X +3387(it)X +3452(is)X +3525(important)X +3856(for)X +3970(multi-user)X +555 3444(execution.)N +950(The)X +1118(bene\256ts)X +1410(of)X +1520(non-two-phase)X +2040(locking)X +2323(on)X +2446(B-trees)X +2721(are)X +2863(well)X +3044(established)X +3443(in)X +3548(the)X +3689(database)X +4009(literature)X +555 3534([BAY77],)N +899([LEHM81].)X +1320(If)X +1394(a)X +1450(process)X +1711(held)X +1869(locks)X +2058(until)X +2224(it)X +2288(committed,)X +2670(then)X +2828(a)X +2884(long-running)X +3322(update)X +3556(could)X +3754(lock)X +3912(out)X +4034(all)X +4134(other)X +555 3624(transactions)N +967(by)X +1076(preventing)X +1448(any)X +1593(other)X +1787(process)X +2057(from)X +2241(locking)X +2509(the)X +2635(root)X +2792(page)X +2972(of)X +3067(the)X +3193(tree.)X +3382(The)X +3535(B-tree)X +3764(locking)X +4032(protocol)X +555 3714(described)N +884(above)X +1096(guarantees)X +1460(that)X +1600(locks)X +1789(on)X +1889(internal)X +2154(pages)X +2357(are)X +2476(held)X +2634(for)X +2748(extremely)X +3089(short)X +3269(periods,)X +3545(thereby)X +3806(increasing)X +4156(con-)X +555 3804(currency.)N +3 f +555 3990(3.1.3.)N +775(Management)X +1245(of)X +1332(Shared)X +1596(Data)X +1 f +755 4113(Database)N +1075(systems)X +1353(permit)X +1587(many)X +1790(users)X +1980(to)X +2067(examine)X +2364(and)X +2505(update)X +2744(the)X +2866(same)X +3055(data)X +3213(concurrently.)X +3683(In)X +3774(order)X +3968(to)X +4054(provide)X +555 4203(this)N +702(concurrent)X +1078(access)X +1316(and)X +1464(enforce)X +1738(the)X +1868(write-ahead)X +2280(logging)X +2556(protocol)X +2855(described)X +3195(in)X +3289(section)X +3548(3.1.1,)X +3759(we)X +3884(use)X +4022(a)X +4089(shared)X +555 4293(memory)N +848(buffer)X +1071(manager.)X +1414(Not)X +1559(only)X +1726(does)X +1898(this)X +2038(provide)X +2308(the)X +2431(guarantees)X +2800(we)X +2919(require,)X +3192(but)X +3319(a)X +3380(user-level)X +3722(buffer)X +3944(manager)X +4246(is)X +555 4383(frequently)N +916(faster)X +1126(than)X +1295(using)X +1498(the)X +1626(\256le)X +1758(system)X +2010(buffer)X +2237(cache.)X +2491(Reads)X +2717(or)X +2814(writes)X +3040(involving)X +3376(the)X +3504(\256le)X +3636(system)X +3888(buffer)X +4115(cache)X +555 4473(often)N +746(require)X +1000(copying)X +1284(data)X +1444(between)X +1738(user)X +1898(and)X +2040(kernel)X +2266(space)X +2470(while)X +2673(a)X +2734(user-level)X +3076(buffer)X +3298(manager)X +3600(can)X +3737(return)X +3954(pointers)X +4237(to)X +555 4563(data)N +709(pages)X +912(directly.)X +1217(Additionally,)X +1661(if)X +1730(more)X +1915(than)X +2073(one)X +2209(process)X +2470(uses)X +2628(the)X +2746(same)X +2931(page,)X +3123(then)X +3281(fewer)X +3485(copies)X +3710(may)X +3868(be)X +3964(required.)X +3 f +555 4749(3.2.)N +715(Module)X +997(Architecture)X +1 f +755 4872(The)N +913(preceding)X +1262(sections)X +1552(described)X +1892(modules)X +2195(for)X +2321(managing)X +2669(the)X +2799(transaction)X +3183(log,)X +3337(locks,)X +3558(and)X +3706(a)X +3774(cache)X +3990(of)X +4089(shared)X +555 4962(buffers.)N +847(In)X +938(addition,)X +1244(we)X +1362(need)X +1538(to)X +1624(provide)X +1893(functionality)X +2326(for)X +2444(transaction)X +2 f +2819(begin)X +1 f +2997(,)X +2 f +3040(commit)X +1 f +3276(,)X +3319(and)X +2 f +3458(abort)X +1 f +3654(processing,)X +4040(necessi-)X +555 5052(tating)N +769(a)X +837(transaction)X +1221(manager.)X +1570(In)X +1669(order)X +1871(to)X +1965(arbitrate)X +2265(concurrent)X +2641(access)X +2879(to)X +2973(locks)X +3173(and)X +3320(buffers,)X +3599(we)X +3724(include)X +3991(a)X +4058(process)X +555 5142(management)N +995(module)X +1264(which)X +1489(manages)X +1799(a)X +1864(collection)X +2209(of)X +2305(semaphores)X +2713(used)X +2889(to)X +2980(block)X +3187(and)X +3332(release)X +3585(processes.)X +3962(Finally,)X +4237(in)X +555 5232(order)N +752(to)X +841(provide)X +1113(a)X +1176(simple,)X +1436(standard)X +1735(interface)X +2044(we)X +2165(have)X +2344(modi\256ed)X +2655(the)X +2780(database)X +3084(access)X +3317(routines)X +3602(\()X +3 f +3629(db)X +1 f +3717(\(3\)\).)X +3904(For)X +4041(the)X +4165(pur-)X +555 5322(poses)N +758(of)X +850(this)X +990(paper)X +1194(we)X +1313(call)X +1453(the)X +1575(modi\256ed)X +1883(package)X +2171(the)X +3 f +2293(Record)X +2567(Manager)X +1 f +2879(.)X +2943(Figure)X +3176(one)X +3316(shows)X +3540(the)X +3662(main)X +3846(interfaces)X +4183(and)X +555 5412(architecture)N +955(of)X +1042(LIBTP.)X + +5 p +%%Page: 5 5 +10 s 10 xH 0 xS 1 f +3 f +1 f +11 s +1851 1520(log_commit)N +2764 2077(buf_unpin)N +2764 1987(buf_get)N +3633 1408(buf_unpin)N +3633 1319(buf_pin)N +3633 1230(buf_get)N +3 f +17 s +1163 960(Txn)N +1430(M)X +1559(anager)X +2582(Record)X +3040(M)X +3169(anager)X +1 Dt +2363 726 MXY +0 355 Dl +1426 0 Dl +0 -355 Dl +-1426 0 Dl +3255 1616 MXY +0 535 Dl +534 0 Dl +0 -535 Dl +-534 0 Dl +2185 MX +0 535 Dl +535 0 Dl +0 -535 Dl +-535 0 Dl +1116 MX +0 535 Dl +534 0 Dl +0 -535 Dl +-534 0 Dl +726 MY +0 355 Dl +891 0 Dl +0 -355 Dl +-891 0 Dl +1 f +11 s +2207 1297(lock)N +2564 1386(log)N +865(unlock_all)X +1851 1609(log_unroll)N +1650 2508 MXY +0 178 Dl +1605 0 Dl +0 -178 Dl +-1605 0 Dl +1294 1616 MXY +19 -30 Dl +-19 11 Dl +-20 -11 Dl +20 30 Dl +0 -535 Dl +2319 2508 MXY +-22 -30 Dl +4 23 Dl +-18 14 Dl +36 -7 Dl +-936 -357 Dl +3277 2455(sleep_on)N +1405 1616 MXY +36 4 Dl +-18 -13 Dl +1 -22 Dl +-19 31 Dl +1070 -535 Dl +2631 2508 MXY +36 6 Dl +-18 -14 Dl +3 -22 Dl +-21 30 Dl +891 -357 Dl +1426 2455(sleep_on)N +3255 1884 MXY +-31 -20 Dl +11 20 Dl +-11 19 Dl +31 -19 Dl +-535 0 Dl +1554 2366(wake)N +3277(wake)X +2185 1884 MXY +-31 -20 Dl +12 20 Dl +-12 19 Dl +31 -19 Dl +-356 0 Dl +0 -803 Dl +3 f +17 s +1236 1851(Lock)N +1118 2030(M)N +1247(anager)X +2339 1851(Log)N +2187 2030(M)N +2316(anager)X +3333 1851(Buffer)N +3257 2030(M)N +3386(anager)X +3522 1616 MXY +20 -30 Dl +-20 11 Dl +-20 -11 Dl +20 30 Dl +0 -535 Dl +1950 2654(Process)N +2424(M)X +2553(anager)X +2542 1616 MXY +19 -30 Dl +-19 11 Dl +-20 -11 Dl +20 30 Dl +0 -535 Dl +1 f +11 s +2207 1364(unlock)N +2452 2508 MXY +20 -31 Dl +-20 11 Dl +-19 -11 Dl +19 31 Dl +0 -357 Dl +2497 2322(sleep_on)N +2497 2233(wake)N +3 Dt +-1 Ds +3 f +10 s +1790 2830(Figure)N +2037(1:)X +2144(Library)X +2435(module)X +2708(interfaces.)X +1 f +10 f +555 3010(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +3 f +555 3286(3.2.1.)N +775(The)X +928(Log)X +1081(Manager)X +1 f +755 3409(The)N +3 f +907(Log)X +1067(Manager)X +1 f +1406(enforces)X +1706(the)X +1831(write-ahead)X +2238(logging)X +2509(protocol.)X +2843(Its)X +2949(primitive)X +3268(operations)X +3628(are)X +2 f +3753(log)X +1 f +3855(,)X +2 f +3901(log_commit)X +1 f +4279(,)X +2 f +555 3499(log_read)N +1 f +844(,)X +2 f +889(log_roll)X +1 f +1171(and)X +2 f +1312(log_unroll)X +1 f +1649(.)X +1714(The)X +2 f +1864(log)X +1 f +1991(call)X +2132(performs)X +2447(a)X +2508(buffered)X +2806(write)X +2996(of)X +3088(the)X +3211(speci\256ed)X +3520(log)X +3646(record)X +3876(and)X +4016(returns)X +4263(a)X +555 3589(unique)N +809(log)X +947(sequence)X +1278(number)X +1559(\(LSN\).)X +1840(This)X +2017(LSN)X +2203(may)X +2376(then)X +2549(be)X +2660(used)X +2842(to)X +2939(retrieve)X +3220(a)X +3291(record)X +3532(from)X +3723(the)X +3856(log)X +3993(using)X +4201(the)X +2 f +555 3679(log_read)N +1 f +865(call.)X +1042(The)X +2 f +1188(log)X +1 f +1311(interface)X +1614(knows)X +1844(very)X +2008(little)X +2175(about)X +2374(the)X +2493(internal)X +2759(format)X +2993(of)X +3080(the)X +3198(log)X +3320(records)X +3577(it)X +3641(receives.)X +3965(Rather,)X +4219(all)X +555 3769(log)N +681(records)X +942(are)X +1065 0.4028(referenced)AX +1430(by)X +1534(a)X +1594(header)X +1833(structure,)X +2158(a)X +2218(log)X +2344(record)X +2574(type,)X +2756(and)X +2896(a)X +2956(character)X +3276(buffer)X +3497(containing)X +3859(the)X +3981(data)X +4138(to)X +4223(be)X +555 3859(logged.)N +834(The)X +980(log)X +1103(record)X +1330(type)X +1489(is)X +1563(used)X +1731(to)X +1814(call)X +1951(the)X +2070(appropriate)X +2457(redo)X +2621(and)X +2758(undo)X +2939(routines)X +3217(during)X +2 f +3446(abort)X +1 f +3639(and)X +2 f +3775(commit)X +1 f +4031(process-)X +555 3949(ing.)N +721(While)X +941(we)X +1059(have)X +1235(used)X +1406(the)X +3 f +1528(Log)X +1684(Manager)X +1 f +2019(to)X +2104(provide)X +2372(before)X +2601(and)X +2740(after)X +2911(image)X +3130(logging,)X +3417(it)X +3484(may)X +3645(also)X +3797(be)X +3896(used)X +4066(for)X +4183(any)X +555 4039(of)N +642(the)X +760(logging)X +1024(algorithms)X +1386(discussed.)X +755 4162(The)N +2 f +905(log_commit)X +1 f +1308(operation)X +1636(behaves)X +1920(exactly)X +2177(like)X +2322(the)X +2 f +2445(log)X +1 f +2572(operation)X +2900(but)X +3026(guarantees)X +3394(that)X +3538(the)X +3660(log)X +3786(has)X +3917(been)X +4093(forced)X +555 4252(to)N +643(disk)X +802(before)X +1034(returning.)X +1394(A)X +1478(discussion)X +1837(of)X +1930(our)X +2063(commit)X +2333(strategy)X +2613(appears)X +2884(in)X +2971(the)X +3094(implementation)X +3621(section)X +3873(\(section)X +4152(4.2\).)X +2 f +555 4342(Log_unroll)N +1 f +935(reads)X +1126(log)X +1249(records)X +1507(from)X +1684(the)X +1803(log,)X +1946(following)X +2278(backward)X +2611(transaction)X +2983(pointers)X +3261(and)X +3397(calling)X +3635(the)X +3753(appropriate)X +4139(undo)X +555 4432(routines)N +839(to)X +927(implement)X +1295(transaction)X +1673(abort.)X +1904(In)X +1997(a)X +2059(similar)X +2307(manner,)X +2 f +2594(log_roll)X +1 f +2877(reads)X +3073(log)X +3201(records)X +3464(sequentially)X +3877(forward,)X +4178(cal-)X +555 4522(ling)N +699(the)X +817(appropriate)X +1203(redo)X +1366(routines)X +1644(to)X +1726(recover)X +1988(committed)X +2350(transactions)X +2753(after)X +2921(a)X +2977(system)X +3219(crash.)X +3 f +555 4708(3.2.2.)N +775(The)X +928(Buffer)X +1171(Manager)X +1 f +755 4831(The)N +3 f +912(Buffer)X +1167(Manager)X +1 f +1511(uses)X +1681(a)X +1749(pool)X +1923(of)X +2022(shared)X +2264(memory)X +2563(to)X +2657(provide)X +2934(a)X +3002(least-recently-used)X +3641(\(LRU\))X +3886(block)X +4095(cache.)X +555 4921(Although)N +886(the)X +1013(current)X +1270(library)X +1513(provides)X +1818(an)X +1923(LRU)X +2112(cache,)X +2345(it)X +2418(would)X +2647(be)X +2752(simple)X +2994(to)X +3085(add)X +3229(alternate)X +3534(replacement)X +3955(policies)X +4232(as)X +555 5011(suggested)N +903(by)X +1015([CHOU85])X +1408(or)X +1507(to)X +1601(provide)X +1878(multiple)X +2176(buffer)X +2405(pools)X +2610(with)X +2784(different)X +3092(policies.)X +3412(Transactions)X +3853(request)X +4116(pages)X +555 5101(from)N +736(the)X +859(buffer)X +1081(manager)X +1383(and)X +1524(keep)X +1701(them)X +3 f +1886(pinned)X +1 f +2145(to)X +2232(ensure)X +2466(that)X +2610(they)X +2772(are)X +2895(not)X +3021(written)X +3272(to)X +3358(disk)X +3515(while)X +3717(they)X +3879(are)X +4002(in)X +4088(a)X +4148(logi-)X +555 5191(cally)N +732(inconsistent)X +1135(state.)X +1343(When)X +1556(page)X +1729(replacement)X +2143(is)X +2217(necessary,)X +2571(the)X +3 f +2689(Buffer)X +2932(Manager)X +1 f +3264(\256nds)X +3439(an)X +3535(unpinned)X +3853(page)X +4025(and)X +4161(then)X +555 5281(checks)N +794(with)X +956(the)X +3 f +1074(Log)X +1227(Manager)X +1 f +1559(to)X +1641(ensure)X +1871(that)X +2011(the)X +2129(write-ahead)X +2529(protocol)X +2816(is)X +2889(enforced.)X +3 f +555 5467(3.2.3.)N +775(The)X +928(Lock)X +1121(Manager)X +1 f +755 5590(The)N +3 f +901(Lock)X +1095(Manager)X +1 f +1428(supports)X +1720(general)X +1978(purpose)X +2253(locking)X +2514(\(single)X +2753(writer,)X +2986(multiple)X +3273(readers\))X +3553(which)X +3769(is)X +3842(currently)X +4152(used)X +555 5680(to)N +638(provide)X +904(two-phase)X +1254(locking)X +1514(and)X +1650(high)X +1812(concurrency)X +2230(B-tree)X +2451(locking.)X +2751(However,)X +3086(the)X +3204(general)X +3461(purpose)X +3735(nature)X +3956(of)X +4043(the)X +4161(lock)X + +6 p +%%Page: 6 6 +10 s 10 xH 0 xS 1 f +3 f +1 f +555 630(manager)N +857(provides)X +1158(the)X +1281(ability)X +1510(to)X +1597(support)X +1862(a)X +1923(variety)X +2171(of)X +2263(locking)X +2528(protocols.)X +2890(Currently,)X +3241(all)X +3345(locks)X +3538(are)X +3661(issued)X +3885(at)X +3967(the)X +4089(granu-)X +555 720(larity)N +747(of)X +837(a)X +896(page)X +1071(\(the)X +1219(size)X +1367(of)X +1457(a)X +1516(buffer)X +1736(in)X +1821(the)X +1942(buffer)X +2161(pool\))X +2352(which)X +2570(is)X +2645(identi\256ed)X +2969(by)X +3071(two)X +3213(4-byte)X +3440(integers)X +3716(\(a)X +3801(\256le)X +3925(id)X +4009(and)X +4147(page)X +555 810(number\).)N +898(This)X +1071(provides)X +1378(the)X +1507(necessary)X +1851(information)X +2259(to)X +2351(extend)X +2595(the)X +3 f +2723(Lock)X +2926(Manager)X +1 f +3268(to)X +3360(perform)X +3649(hierarchical)X +4059(locking)X +555 900([GRAY76].)N +982(The)X +1133(current)X +1387(implementation)X +1915(does)X +2088(not)X +2216(support)X +2482(locks)X +2677(at)X +2760(other)X +2950(granularities)X +3376(and)X +3517(does)X +3689(not)X +3816(promote)X +4108(locks;)X +555 990(these)N +740(are)X +859(obvious)X +1132(future)X +1344(additions)X +1657(to)X +1739(the)X +1857(system.)X +755 1113(If)N +831(an)X +929(incoming)X +1253(lock)X +1413(request)X +1667(cannot)X +1903(be)X +2001(granted,)X +2284(the)X +2404(requesting)X +2760(process)X +3023(is)X +3098(queued)X +3352(for)X +3467(the)X +3586(lock)X +3745(and)X +3882(descheduled.)X +555 1203(When)N +769(a)X +827(lock)X +987(is)X +1062(released,)X +1368(the)X +1488(wait)X +1647(queue)X +1860(is)X +1934(traversed)X +2250(and)X +2387(any)X +2524(newly)X +2741(compatible)X +3118(locks)X +3308(are)X +3428(granted.)X +3730(Locks)X +3947(are)X +4067(located)X +555 1293(via)N +680(a)X +743(\256le)X +872(and)X +1015(page)X +1194(hash)X +1368(table)X +1551(and)X +1694(are)X +1820(chained)X +2097(both)X +2266(by)X +2373(object)X +2595(and)X +2737(by)X +2843(transaction,)X +3241(facilitating)X +3614(rapid)X +3805(traversal)X +4108(of)X +4201(the)X +555 1383(lock)N +713(table)X +889(during)X +1118(transaction)X +1490(commit)X +1754(and)X +1890(abort.)X +755 1506(The)N +907(primary)X +1188(interfaces)X +1528(to)X +1617(the)X +1742(lock)X +1907(manager)X +2211(are)X +2 f +2337(lock)X +1 f +2471(,)X +2 f +2518(unlock)X +1 f +2732(,)X +2779(and)X +2 f +2922(lock_unlock_all)X +1 f +3434(.)X +2 f +3500(Lock)X +1 f +3682(obtains)X +3939(a)X +4001(new)X +4161(lock)X +555 1596(for)N +680(a)X +747(speci\256c)X +1023(object.)X +1290(There)X +1509(are)X +1638(also)X +1797(two)X +1947(variants)X +2231(of)X +2328(the)X +2 f +2456(lock)X +1 f +2620(request,)X +2 f +2902(lock_upgrade)X +1 f +3373(and)X +2 f +3519(lock_downgrade)X +1 f +4053(,)X +4103(which)X +555 1686(allow)N +755(the)X +875(caller)X +1076(to)X +1160(atomically)X +1519(trade)X +1701(a)X +1758(lock)X +1917(of)X +2005(one)X +2142(type)X +2301(for)X +2416(a)X +2473(lock)X +2632(of)X +2720(another.)X +2 f +3022(Unlock)X +1 f +3275(releases)X +3551(a)X +3608(speci\256c)X +3874(mode)X +4073(of)X +4161(lock)X +555 1776(on)N +655(a)X +711(speci\256c)X +976(object.)X +2 f +1232(Lock_unlock_all)X +1 f +1786(releases)X +2061(all)X +2161(the)X +2279(locks)X +2468(associated)X +2818(with)X +2980(a)X +3036(speci\256c)X +3301(transaction.)X +3 f +555 1962(3.2.4.)N +775(The)X +928(Process)X +1207(Manager)X +1 f +755 2085(The)N +3 f +900(Process)X +1179(Manager)X +1 f +1511(acts)X +1656(as)X +1743(a)X +1799(user-level)X +2136(scheduler)X +2464(to)X +2546(make)X +2740(processes)X +3068(wait)X +3226(on)X +3326(unavailable)X +3716(locks)X +3905(and)X +4041(pending)X +555 2175(buffer)N +778(cache)X +988(I/O.)X +1161(For)X +1297(each)X +1470(process,)X +1756(a)X +1817(semaphore)X +2190(is)X +2268(maintained)X +2649(upon)X +2834(which)X +3055(that)X +3200(process)X +3466(waits)X +3660(when)X +3859(it)X +3928(needs)X +4136(to)X +4223(be)X +555 2265(descheduled.)N +1014(When)X +1228(a)X +1286(process)X +1549(needs)X +1754(to)X +1838(be)X +1936(run,)X +2084(its)X +2180(semaphore)X +2549(is)X +2623(cleared,)X +2897(and)X +3034(the)X +3153(operating)X +3477(system)X +3720(reschedules)X +4116(it.)X +4201(No)X +555 2355(sophisticated)N +1002(scheduling)X +1378(algorithm)X +1718(is)X +1799(applied;)X +2085(if)X +2162(the)X +2288(lock)X +2454(for)X +2576(which)X +2800(a)X +2864(process)X +3133(was)X +3286(waiting)X +3554(becomes)X +3863(available,)X +4201(the)X +555 2445(process)N +824(is)X +905(made)X +1107(runnable.)X +1456(It)X +1533(would)X +1761(have)X +1941(been)X +2121(possible)X +2411(to)X +2501(change)X +2757(the)X +2883(kernel's)X +3170(process)X +3439(scheduler)X +3775(to)X +3865(interact)X +4134(more)X +555 2535(ef\256ciently)N +900(with)X +1062(the)X +1180(lock)X +1338(manager,)X +1655(but)X +1777(doing)X +1979(so)X +2070(would)X +2290(have)X +2462(compromised)X +2918(our)X +3045(commitment)X +3469(to)X +3551(a)X +3607(user-level)X +3944(package.)X +3 f +555 2721(3.2.5.)N +775(The)X +928(Transaction)X +1361(Manager)X +1 f +755 2844(The)N +3 f +901(Transaction)X +1335(Manager)X +1 f +1668(provides)X +1965(the)X +2084(standard)X +2377(interface)X +2680(of)X +2 f +2768(txn_begin)X +1 f +3084(,)X +2 f +3125(txn_commit)X +1 f +3499(,)X +3540(and)X +2 f +3676(txn_abort)X +1 f +3987(.)X +4047(It)X +4116(keeps)X +555 2934(track)N +742(of)X +835(all)X +941(active)X +1159(transactions,)X +1588(assigns)X +1845(unique)X +2089(transaction)X +2467(identi\256ers,)X +2833(and)X +2974(directs)X +3213(the)X +3336(abort)X +3526(and)X +3667(commit)X +3936(processing.)X +555 3024(When)N +772(a)X +2 f +833(txn_begin)X +1 f +1174(is)X +1252(issued,)X +1497(the)X +3 f +1620(Transaction)X +2058(Manager)X +1 f +2395(assigns)X +2651(the)X +2773(next)X +2935(available)X +3249(transaction)X +3625(identi\256er,)X +3958(allocates)X +4263(a)X +555 3114(per-process)N +948(transaction)X +1322(structure)X +1625(in)X +1709(shared)X +1941(memory,)X +2249(increments)X +2622(the)X +2741(count)X +2940(of)X +3028(active)X +3241(transactions,)X +3665(and)X +3802(returns)X +4046(the)X +4165(new)X +555 3204(transaction)N +937(identi\256er)X +1256(to)X +1348(the)X +1476(calling)X +1724(process.)X +2034(The)X +2188(in-memory)X +2573(transaction)X +2954(structure)X +3264(contains)X +3560(a)X +3625(pointer)X +3881(into)X +4034(the)X +4161(lock)X +555 3294(table)N +734(for)X +851(locks)X +1043(held)X +1204(by)X +1307(this)X +1445(transaction,)X +1840(the)X +1961(last)X +2095(log)X +2220(sequence)X +2538(number,)X +2826(a)X +2885(transaction)X +3260(state)X +3430(\()X +2 f +3457(idle)X +1 f +(,)S +2 f +3620(running)X +1 f +3873(,)X +2 f +3915(aborting)X +1 f +4190(,)X +4232(or)X +2 f +555 3384(committing\))N +1 f +942(,)X +982(an)X +1078(error)X +1255(code,)X +1447(and)X +1583(a)X +1639(semaphore)X +2007(identi\256er.)X +755 3507(At)N +859(commit,)X +1147(the)X +3 f +1269(Transaction)X +1706(Manager)X +1 f +2042(calls)X +2 f +2213(log_commit)X +1 f +2615(to)X +2700(record)X +2929(the)X +3050(end)X +3189(of)X +3279(transaction)X +3654(and)X +3793(to)X +3878(\257ush)X +4056(the)X +4177(log.)X +555 3597(Then)N +743(it)X +810(directs)X +1047(the)X +3 f +1168(Lock)X +1364(Manager)X +1 f +1699(to)X +1784(release)X +2031(all)X +2134(locks)X +2325(associated)X +2677(with)X +2841(the)X +2961(given)X +3161(transaction.)X +3575(If)X +3651(a)X +3709(transaction)X +4083(aborts,)X +555 3687(the)N +3 f +680(Transaction)X +1120(Manager)X +1 f +1459(calls)X +1633(on)X +2 f +1739(log_unroll)X +1 f +2102(to)X +2190(read)X +2355(the)X +2479(transaction's)X +2915(log)X +3043(records)X +3306(and)X +3448(undo)X +3634(any)X +3776(modi\256cations)X +4237(to)X +555 3777(the)N +673(database.)X +1010(As)X +1119(in)X +1201(the)X +1319(commit)X +1583(case,)X +1762(it)X +1826(then)X +1984(calls)X +2 f +2151(lock_unlock_all)X +1 f +2683(to)X +2765(release)X +3009(the)X +3127(transaction's)X +3557(locks.)X +3 f +555 3963(3.2.6.)N +775(The)X +928(Record)X +1198(Manager)X +1 f +755 4086(The)N +3 f +919(Record)X +1208(Manager)X +1 f +1559(supports)X +1869(the)X +2006(abstraction)X +2397(of)X +2503(reading)X +2783(and)X +2938(writing)X +3208(records)X +3484(to)X +3585(a)X +3660(database.)X +3996(We)X +4147(have)X +555 4176(modi\256ed)N +861(the)X +981(the)X +1101(database)X +1399(access)X +1626(routines)X +3 f +1905(db)X +1 f +1993(\(3\))X +2108([BSD91])X +2418(to)X +2501(call)X +2638(the)X +2757(log,)X +2900(lock,)X +3079(and)X +3216(buffer)X +3434(managers.)X +3803(In)X +3891(order)X +4082(to)X +4165(pro-)X +555 4266(vide)N +718(functionality)X +1152(to)X +1239(perform)X +1523(undo)X +1708(and)X +1849(redo,)X +2037(the)X +3 f +2160(Record)X +2434(Manager)X +1 f +2770(de\256nes)X +3021(a)X +3081(collection)X +3421(of)X +3512(log)X +3638(record)X +3868(types)X +4061(and)X +4201(the)X +555 4356(associated)N +920(undo)X +1115(and)X +1266(redo)X +1444(routines.)X +1777(The)X +3 f +1937(Log)X +2105(Manager)X +1 f +2452(performs)X +2777(a)X +2848(table)X +3039(lookup)X +3296(on)X +3411(the)X +3543(record)X +3783(type)X +3955(to)X +4051(call)X +4201(the)X +555 4446(appropriate)N +951(routines.)X +1299(For)X +1440(example,)X +1762(the)X +1890(B-tree)X +2121(access)X +2356(method)X +2625(requires)X +2913(two)X +3062(log)X +3193(record)X +3428(types:)X +3648(insert)X +3855(and)X +4000(delete.)X +4241(A)X +555 4536(replace)N +808(operation)X +1131(is)X +1204(implemented)X +1642(as)X +1729(a)X +1785(delete)X +1997(followed)X +2302(by)X +2402(an)X +2498(insert)X +2696(and)X +2832(is)X +2905(logged)X +3143(accordingly.)X +3 f +555 4722(3.3.)N +715(Application)X +1134(Architectures)X +1 f +755 4845(The)N +907(structure)X +1215(of)X +1309(LIBTP)X +1558(allows)X +1794(application)X +2177(designers)X +2507(to)X +2596(trade)X +2784(off)X +2905(performance)X +3339(and)X +3481(protection.)X +3872(Since)X +4076(a)X +4138(large)X +555 4935(portion)N +810(of)X +901(LIBTP's)X +1205(functionality)X +1638(is)X +1715(provided)X +2024(by)X +2128(managing)X +2468(structures)X +2804(in)X +2889(shared)X +3122(memory,)X +3432(its)X +3530(structures)X +3865(are)X +3987(subject)X +4237(to)X +555 5025(corruption)N +926(by)X +1043(applications)X +1467(when)X +1678(the)X +1813(library)X +2064(is)X +2154(linked)X +2391(directly)X +2673(with)X +2852(the)X +2987(application.)X +3420(For)X +3568(this)X +3720(reason,)X +3987(LIBTP)X +4246(is)X +555 5115(designed)N +864(to)X +950(allow)X +1152(compilation)X +1558(into)X +1706(a)X +1766(separate)X +2053(server)X +2273(process)X +2537(which)X +2756(may)X +2917(be)X +3016(accessed)X +3321(via)X +3442(a)X +3501(socket)X +3729(interface.)X +4094(In)X +4184(this)X +555 5205(way)N +712(LIBTP's)X +1015(data)X +1172(structures)X +1507(are)X +1629(protected)X +1951(from)X +2130(application)X +2509(code,)X +2704(but)X +2829(communication)X +3349(overhead)X +3666(is)X +3741(increased.)X +4107(When)X +555 5295(applications)N +975(are)X +1107(trusted,)X +1377(LIBTP)X +1631(may)X +1801(be)X +1909(compiled)X +2239(directly)X +2516(into)X +2672(the)X +2802(application)X +3190(providing)X +3533(improved)X +3872(performance.)X +555 5385(Figures)N +815(two)X +955(and)X +1091(three)X +1272(show)X +1461(the)X +1579(two)X +1719(alternate)X +2016(application)X +2392(architectures.)X +755 5508(There)N +964(are)X +1084(potentially)X +1447(two)X +1588(modes)X +1818(in)X +1901(which)X +2118(one)X +2255(might)X +2462(use)X +2590(LIBTP)X +2833(in)X +2916(a)X +2972(server)X +3189(based)X +3392(architecture.)X +3832(In)X +3919(the)X +4037(\256rst,)X +4201(the)X +555 5598(server)N +778(would)X +1004(provide)X +1275(the)X +1399(capability)X +1741(to)X +1829(respond)X +2109(to)X +2197(requests)X +2486(to)X +2574(each)X +2747(of)X +2839(the)X +2962(low)X +3107(level)X +3288(modules)X +3584(\(lock,)X +3794(log,)X +3941(buffer,)X +4183(and)X +555 5688(transaction)N +944(managers\).)X +1356(Unfortunately,)X +1863(the)X +1998(performance)X +2442(of)X +2546(such)X +2730(a)X +2803(system)X +3062(is)X +3152(likely)X +3371(to)X +3470(be)X +3583(blindingly)X +3947(slow)X +4134(since)X + +7 p +%%Page: 7 7 +10 s 10 xH 0 xS 1 f +3 f +1 f +1 Dt +1864 1125 MXY +15 -26 Dl +-15 10 Dl +-14 -10 Dl +14 26 Dl +0 -266 Dl +1315 1125 MXY +15 -26 Dl +-15 10 Dl +-14 -10 Dl +14 26 Dl +0 -266 Dl +3 Dt +1133 1125 MXY +0 798 Dl +931 0 Dl +0 -798 Dl +-931 0 Dl +1 Dt +1266 1257 MXY +0 133 Dl +665 0 Dl +0 -133 Dl +-665 0 Dl +3 f +8 s +1513 1351(driver)N +1502 1617(LIBTP)N +1266 1390 MXY +0 400 Dl +665 0 Dl +0 -400 Dl +-665 0 Dl +3 Dt +1133 726 MXY +0 133 Dl +931 0 Dl +0 -133 Dl +-931 0 Dl +1 f +1029 1098(txn_abort)N +964 1015(txn_commit)N +1018 932(txn_begin)N +1910 1015(db_ops)N +3 f +1308 820(Application)N +1645(Program)X +1398 1218(Server)N +1594(Process)X +1 f +1390 986(socket)N +1569(interface)X +1 Dt +1848 967 MXY +-23 -14 Dl +8 14 Dl +-8 15 Dl +23 -15 Dl +-50 0 Dl +1324 MX +23 15 Dl +-9 -15 Dl +9 -14 Dl +-23 14 Dl +50 0 Dl +3 Dt +2862 859 MXY +0 1064 Dl +932 0 Dl +0 -1064 Dl +-932 0 Dl +1 Dt +3178 1390 MXY +24 -12 Dl +-17 0 Dl +-8 -15 Dl +1 27 Dl +150 -265 Dl +3494 1390 MXY +0 -27 Dl +-8 15 Dl +-16 1 Dl +24 11 Dl +-166 -265 Dl +3 f +3232 1617(LIBTP)N +2995 1390 MXY +0 400 Dl +666 0 Dl +0 -400 Dl +-666 0 Dl +992 MY +0 133 Dl +666 0 Dl +0 -133 Dl +-666 0 Dl +3168 1086(Application)N +1 f +2939 1201(txn_begin)N +2885 1284(txn_commit)N +2950 1368(txn_abort)N +3465 1284(db_ops)N +3 f +3155 766(Single)N +3339(Process)X +3 Dt +-1 Ds +811 2100(Figure)N +1023(2:)X +1107(Server)X +1318(Architecture.)X +1 f +1727(In)X +1811(this)X +1934(con\256guration,)X +811 2190(the)N +916(library)X +1113(is)X +1183(loaded)X +1380(into)X +1507(a)X +1562(server)X +1744(process)X +1962(which)X +2145(is)X +2214(ac-)X +811 2280(cessed)N +993(via)X +1087(a)X +1131(socket)X +1310(interface.)X +3 f +2563 2100(Figure)N +2803(3:)X +2914(Single)X +3140(Process)X +3403(Architecture.)X +1 f +3839(In)X +3950(this)X +2563 2190(con\256guration,)N +2948(the)X +3053(library)X +3250(routines)X +3483(are)X +3587(loaded)X +3784(as)X +3864(part)X +3990(of)X +2563 2280(the)N +2657(application)X +2957(and)X +3065(accessed)X +3303(via)X +3397(a)X +3441(subroutine)X +3727(interface.)X +10 s +10 f +555 2403(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 f +555 2679(modifying)N +909(a)X +966(piece)X +1157(of)X +1245(data)X +1400(would)X +1621(require)X +1870(three)X +2051(or)X +2138(possibly)X +2424(four)X +2578(separate)X +2862(communications:)X +3433(one)X +3569(to)X +3651(lock)X +3809(the)X +3927(data,)X +4101(one)X +4237(to)X +555 2769(obtain)N +781(the)X +905(data,)X +1085(one)X +1227(to)X +1315(log)X +1443(the)X +1567(modi\256cation,)X +2017(and)X +2159(possibly)X +2451(one)X +2593(to)X +2681(transmit)X +2969(the)X +3093(modi\256ed)X +3403(data.)X +3583(Figure)X +3817(four)X +3976(shows)X +4201(the)X +555 2859(relative)N +826(performance)X +1263(for)X +1387(retrieving)X +1728(a)X +1793(single)X +2013(record)X +2248(using)X +2450(the)X +2577(record)X +2812(level)X +2997(call)X +3142(versus)X +3376(using)X +3578(the)X +3705(lower)X +3917(level)X +4102(buffer)X +555 2949(management)N +987(and)X +1125(locking)X +1387(calls.)X +1616(The)X +1763(2:1)X +1887(ratio)X +2056(observed)X +2367(in)X +2450(the)X +2569(single)X +2781(process)X +3043(case)X +3203(re\257ects)X +3456(the)X +3575(additional)X +3916(overhead)X +4232(of)X +555 3039(parsing)N +819(eight)X +1006(commands)X +1380(rather)X +1595(than)X +1760(one)X +1903(while)X +2108(the)X +2233(3:1)X +2362(ratio)X +2536(observed)X +2853(in)X +2942(the)X +3067(client/server)X +3491(architecture)X +3898(re\257ects)X +4157(both)X +555 3129(the)N +679(parsing)X +941(and)X +1083(the)X +1207(communication)X +1731(overheard.)X +2118(Although)X +2445(there)X +2631(may)X +2794(be)X +2895(applications)X +3307(which)X +3528(could)X +3731(tolerate)X +3997(such)X +4169(per-)X +555 3219(formance,)N +904(it)X +973(seems)X +1194(far)X +1309(more)X +1499(feasible)X +1774(to)X +1861(support)X +2126(a)X +2187(higher)X +2417(level)X +2597(interface,)X +2923(such)X +3094(as)X +3185(that)X +3329(provided)X +3638(by)X +3742(a)X +3802(query)X +4009(language)X +555 3309(\()N +2 f +582(e.g.)X +1 f +718(SQL)X +889([SQL86]\).)X +755 3432(Although)N +1081(LIBTP)X +1327(does)X +1498(not)X +1624(have)X +1800(an)X +1900(SQL)X +2075(parser,)X +2316(we)X +2433(have)X +2608(built)X +2777(a)X +2836(server)X +3056(application)X +3435(using)X +3631(the)X +3752(toolkit)X +3983(command)X +555 3522(language)N +882(\(TCL\))X +1124([OUST90].)X +1544(The)X +1706(server)X +1940(supports)X +2248(a)X +2321(command)X +2674(line)X +2831(interface)X +3150(similar)X +3409(to)X +3508(the)X +3643(subroutine)X +4017(interface)X +555 3612(de\256ned)N +811(in)X +3 f +893(db)X +1 f +981(\(3\).)X +1135(Since)X +1333(it)X +1397(is)X +1470(based)X +1673(on)X +1773(TCL,)X +1964(it)X +2028(provides)X +2324(control)X +2571(structures)X +2903(as)X +2990(well.)X +3 f +555 3798(4.)N +655(Implementation)X +1 f +3 f +555 3984(4.1.)N +715(Locking)X +1014(and)X +1162(Deadlock)X +1502(Detection)X +1 f +755 4107(LIBTP)N +1007(uses)X +1175(two-phase)X +1535(locking)X +1805(for)X +1929(user)X +2093(data.)X +2297(Strictly)X +2562(speaking,)X +2897(the)X +3024(two)X +3173(phases)X +3416(in)X +3507(two-phase)X +3866(locking)X +4135(are)X +4263(a)X +3 f +555 4197(grow)N +1 f +756(phase,)X +986(during)X +1221(which)X +1443(locks)X +1638(are)X +1763(acquired,)X +2086(and)X +2228(a)X +3 f +2290(shrink)X +1 f +2537(phase,)X +2766(during)X +3001(which)X +3223(locks)X +3418(are)X +3543(released.)X +3873(No)X +3997(lock)X +4161(may)X +555 4287(ever)N +720(be)X +822(acquired)X +1124(during)X +1358(the)X +1481(shrink)X +1706(phase.)X +1954(The)X +2104(grow)X +2294(phase)X +2502(lasts)X +2669(until)X +2840(the)X +2963(\256rst)X +3112(release,)X +3381(which)X +3602(marks)X +3823(the)X +3946(start)X +4109(of)X +4201(the)X +555 4377(shrink)N +780(phase.)X +1028(In)X +1120(practice,)X +1420(the)X +1543(grow)X +1733(phase)X +1941(lasts)X +2108(for)X +2227(the)X +2350(duration)X +2642(of)X +2734(a)X +2795(transaction)X +3172(in)X +3259(LIBTP)X +3506(and)X +3647(in)X +3734(commercial)X +4138(data-)X +555 4467(base)N +721(systems.)X +1037(The)X +1184(shrink)X +1406(phase)X +1611(takes)X +1798(place)X +1990(during)X +2221(transaction)X +2595(commit)X +2861(or)X +2950(abort.)X +3177(This)X +3341(means)X +3568(that)X +3710(locks)X +3901(are)X +4022(acquired)X +555 4557(on)N +655(demand)X +929(during)X +1158(the)X +1276(lifetime)X +1545(of)X +1632(a)X +1688(transaction,)X +2080(and)X +2216(held)X +2374(until)X +2540(commit)X +2804(time,)X +2986(at)X +3064(which)X +3280(point)X +3464(all)X +3564(locks)X +3753(are)X +3872(released.)X +755 4680(If)N +832(multiple)X +1121(transactions)X +1527(are)X +1649(active)X +1864(concurrently,)X +2313(deadlocks)X +2657(can)X +2792(occur)X +2994(and)X +3133(must)X +3311(be)X +3410(detected)X +3701(and)X +3840(resolved.)X +4174(The)X +555 4770(lock)N +715(table)X +893(can)X +1027(be)X +1125(thought)X +1391(of)X +1480(as)X +1569(a)X +1627(representation)X +2104(of)X +2193(a)X +2251(directed)X +2532(graph.)X +2777(The)X +2924(nodes)X +3133(in)X +3216(the)X +3335(graph)X +3539(are)X +3659(transactions.)X +4103(Edges)X +555 4860(represent)N +878(the)X +3 f +1004(waits-for)X +1 f +1340(relation)X +1613(between)X +1909(transactions;)X +2342(if)X +2419(transaction)X +2 f +2799(A)X +1 f +2876(is)X +2957(waiting)X +3225(for)X +3347(a)X +3411(lock)X +3577(held)X +3743(by)X +3851(transaction)X +2 f +4230(B)X +1 f +4279(,)X +555 4950(then)N +716(a)X +775(directed)X +1057(edge)X +1232(exists)X +1437(from)X +2 f +1616(A)X +1 f +1687(to)X +2 f +1771(B)X +1 f +1842(in)X +1926(the)X +2046(graph.)X +2291(A)X +2371(deadlock)X +2683(exists)X +2887(if)X +2958(a)X +3016(cycle)X +3208(appears)X +3476(in)X +3560(the)X +3680(graph.)X +3925(By)X +4040(conven-)X +555 5040(tion,)N +719(no)X +819(transaction)X +1191(ever)X +1350(waits)X +1539(for)X +1653(a)X +1709(lock)X +1867(it)X +1931(already)X +2188(holds,)X +2401(so)X +2492(re\257exive)X +2793(edges)X +2996(are)X +3115(impossible.)X +755 5163(A)N +836(distinguished)X +1285(process)X +1549(monitors)X +1856(the)X +1977(lock)X +2138(table,)X +2337(searching)X +2668(for)X +2785(cycles.)X +3048(The)X +3195(frequency)X +3539(with)X +3703(which)X +3921(this)X +4058(process)X +555 5253(runs)N +716(is)X +792(user-settable;)X +1243(for)X +1360(the)X +1481(multi-user)X +1833(tests)X +1998(discussed)X +2328(in)X +2413(section)X +2663(5.1.2,)X +2866(it)X +2933(has)X +3063(been)X +3238(set)X +3350(to)X +3435(wake)X +3628(up)X +3731(every)X +3932(second,)X +4197(but)X +555 5343(more)N +742(sophisticated)X +1182(schedules)X +1516(are)X +1636(certainly)X +1938(possible.)X +2261(When)X +2474(a)X +2531(cycle)X +2722(is)X +2796(detected,)X +3105(one)X +3242(of)X +3330(the)X +3449(transactions)X +3853(in)X +3936(the)X +4055(cycle)X +4246(is)X +555 5433(nominated)N +917(and)X +1057(aborted.)X +1362(When)X +1578(the)X +1700(transaction)X +2076(aborts,)X +2315(it)X +2382(rolls)X +2547(back)X +2722(its)X +2820(changes)X +3102(and)X +3241(releases)X +3519(its)X +3617(locks,)X +3829(thereby)X +4093(break-)X +555 5523(ing)N +677(the)X +795(cycle)X +985(in)X +1067(the)X +1185(graph.)X + +8 p +%%Page: 8 8 +10 s 10 xH 0 xS 1 f +3 f +1 f +4 Ds +1 Dt +1866 865 MXY +1338 0 Dl +1866 1031 MXY +1338 0 Dl +1866 1199 MXY +1338 0 Dl +1866 1366 MXY +1338 0 Dl +1866 1533 MXY +1338 0 Dl +1866 1701 MXY +1338 0 Dl +-1 Ds +5 Dt +1866 1868 MXY +1338 0 Dl +1 Dt +1 Di +2981 MX + 2981 1868 lineto + 2981 1575 lineto + 3092 1575 lineto + 3092 1868 lineto + 2981 1868 lineto +closepath 21 2981 1575 3092 1868 Dp +2646 MX + 2646 1868 lineto + 2646 949 lineto + 2758 949 lineto + 2758 1868 lineto + 2646 1868 lineto +closepath 14 2646 949 2758 1868 Dp +2312 MX + 2312 1868 lineto + 2312 1701 lineto + 2423 1701 lineto + 2423 1868 lineto + 2312 1868 lineto +closepath 3 2312 1701 2423 1868 Dp +1977 MX + 1977 1868 lineto + 1977 1512 lineto + 2089 1512 lineto + 2089 1868 lineto + 1977 1868 lineto +closepath 19 1977 1512 2089 1868 Dp +3 f +2640 2047(Client/Server)N +1957(Single)X +2185(Process)X +7 s +2957 1957(record)N +2570(component)X +2289(record)X +1890(components)X +1733 1724(.1)N +1733 1556(.2)N +1733 1389(.3)N +1733 1222(.4)N +1733 1055(.5)N +1733 889(.6)N +1590 726(Elapsed)N +1794(Time)X +1613 782(\(in)N +1693(seconds\))X +3 Dt +-1 Ds +8 s +555 2255(Figure)N +756(4:)X +829(Comparison)X +1187(of)X +1260(High)X +1416(and)X +1540(Low)X +1681(Level)X +1850(Interfaces.)X +1 f +2174(Elapsed)X +2395(time)X +2528(in)X +2597(seconds)X +2818(to)X +2887(perform)X +3111(a)X +3158(single)X +3330(record)X +3511(retrieval)X +3742(from)X +3885(a)X +3932(command)X +4203(line)X +555 2345(\(rather)N +751(than)X +888(a)X +943(procedural)X +1241(interface\))X +1510(is)X +1579(shown)X +1772(on)X +1862(the)X +1966(y)X +2024(axis.)X +2185(The)X +2310(``component'')X +2704(numbers)X +2950(re\257ect)X +3135(the)X +3239(timings)X +3458(when)X +3622(the)X +3726(record)X +3914(is)X +3983(retrieved)X +4235(by)X +555 2435(separate)N +785(calls)X +924(to)X +996(the)X +1096(lock)X +1228(manager)X +1469(and)X +1583(buffer)X +1760(manager)X +2001(while)X +2165(the)X +2264(``record'')X +2531(timings)X +2745(were)X +2889(obtained)X +3130(by)X +3215(using)X +3375(a)X +3424(single)X +3598(call)X +3711(to)X +3782(the)X +3881(record)X +4064(manager.)X +555 2525(The)N +674(2:1)X +776(ratio)X +913(observed)X +1163(for)X +1257(the)X +1355(single)X +1528(process)X +1739(case)X +1868(is)X +1930(a)X +1977(re\257ection)X +2237(of)X +2309(the)X +2406(parsing)X +2613(overhead)X +2865(for)X +2958(executing)X +3225(eight)X +3372(separate)X +3599(commands)X +3895(rather)X +4062(than)X +4191(one.)X +555 2615(The)N +673(additional)X +948(factor)X +1115(of)X +1187(one)X +1298(re\257ected)X +1536(in)X +1605(the)X +1702(3:1)X +1803(ratio)X +1939(for)X +2031(the)X +2127(client/server)X +2460(architecture)X +2794(is)X +2855(due)X +2965(to)X +3033(the)X +3129(communication)X +3545(overhead.)X +3828(The)X +3945(true)X +4062(ratios)X +4222(are)X +555 2705(actually)N +775(worse)X +945(since)X +1094(the)X +1190(component)X +1492(timings)X +1703(do)X +1785(not)X +1884(re\257ect)X +2060(the)X +2155(search)X +2334(times)X +2490(within)X +2671(each)X +2804(page)X +2941(or)X +3011(the)X +3106(time)X +3237(required)X +3466(to)X +3533(transmit)X +3760(the)X +3855(page)X +3992(between)X +4221(the)X +555 2795(two)N +667(processes.)X +10 s +10 f +555 2885(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +3 f +555 3161(4.2.)N +715(Group)X +961(Commit)X +1 f +755 3284(Since)N +959(the)X +1083(log)X +1211(must)X +1392(be)X +1494(\257ushed)X +1751(to)X +1839(disk)X +1997(at)X +2080(commit)X +2349(time,)X +2536(disk)X +2694(bandwidth)X +3057(fundamentally)X +3545(limits)X +3751(the)X +3874(rate)X +4020(at)X +4103(which)X +555 3374(transactions)N +959(complete.)X +1314(Since)X +1513(most)X +1688(transactions)X +2091(write)X +2276(only)X +2438(a)X +2494(few)X +2635(small)X +2828(records)X +3085(to)X +3167(the)X +3285(log,)X +3427(the)X +3545(last)X +3676(page)X +3848(of)X +3935(the)X +4053(log)X +4175(will)X +555 3464(be)N +658(\257ushed)X +916(once)X +1095(by)X +1202(every)X +1408(transaction)X +1787(which)X +2010(writes)X +2233(to)X +2322(it.)X +2433(In)X +2527(the)X +2652(naive)X +2853(implementation,)X +3402(these)X +3593(\257ushes)X +3841(would)X +4067(happen)X +555 3554(serially.)N +755 3677(LIBTP)N +1008(uses)X +3 f +1177(group)X +1412(commit)X +1 f +1702([DEWI84])X +2077(in)X +2170(order)X +2371(to)X +2464(amortize)X +2775(the)X +2903(cost)X +3062(of)X +3159(one)X +3305(synchronous)X +3740(disk)X +3903(write)X +4098(across)X +555 3767(multiple)N +851(transactions.)X +1304(Group)X +1539(commit)X +1812(provides)X +2117(a)X +2182(way)X +2345(for)X +2468(a)X +2533(group)X +2749(of)X +2845(transactions)X +3257(to)X +3348(commit)X +3621(simultaneously.)X +4174(The)X +555 3857(\256rst)N +709(several)X +967(transactions)X +1380(to)X +1472(commit)X +1745(write)X +1939(their)X +2115(changes)X +2403(to)X +2494(the)X +2621(in-memory)X +3006(log)X +3137(page,)X +3338(then)X +3505(sleep)X +3699(on)X +3808(a)X +3873(distinguished)X +555 3947(semaphore.)N +966(Later,)X +1179(a)X +1238(committing)X +1629(transaction)X +2004(\257ushes)X +2249(the)X +2370(page)X +2545(to)X +2630(disk,)X +2805(and)X +2943(wakes)X +3166(up)X +3268(all)X +3370(its)X +3467(sleeping)X +3756(peers.)X +3988(The)X +4135(point)X +555 4037(at)N +635(which)X +853(changes)X +1134(are)X +1255(actually)X +1531(written)X +1780(is)X +1855(determined)X +2238(by)X +2340(three)X +2523(thresholds.)X +2914(The)X +3061(\256rst)X +3207(is)X +3281(the)X +2 f +3400(group)X +3612(threshold)X +1 f +3935(and)X +4072(de\256nes)X +555 4127(the)N +674(minimum)X +1005(number)X +1271(of)X +1359(transactions)X +1763(which)X +1979(must)X +2154(be)X +2250(active)X +2462(in)X +2544(the)X +2662(system)X +2904(before)X +3130(transactions)X +3533(are)X +3652(forced)X +3878(to)X +3960(participate)X +555 4217(in)N +646(a)X +711(group)X +927(commit.)X +1240(The)X +1394(second)X +1646(is)X +1728(the)X +2 f +1855(wait)X +2021(threshold)X +1 f +2352(which)X +2577(is)X +2658(expressed)X +3003(as)X +3098(the)X +3224(percentage)X +3601(of)X +3696(active)X +3916(transactions)X +555 4307(waiting)N +826(to)X +919(be)X +1026(committed.)X +1439(The)X +1595(last)X +1737(is)X +1821(the)X +2 f +1950(logdelay)X +2257(threshold)X +1 f +2590(which)X +2816(indicates)X +3131(how)X +3299(much)X +3507(un\257ushed)X +3848(log)X +3980(should)X +4223(be)X +555 4397(allowed)N +829(to)X +911(accumulate)X +1297(before)X +1523(a)X +1579(waiting)X +1839(transaction's)X +2289(commit)X +2553(record)X +2779(is)X +2852(\257ushed.)X +755 4520(Group)N +981(commit)X +1246(can)X +1379(substantially)X +1803(improve)X +2090(performance)X +2517(for)X +2631(high-concurrency)X +3218(environments.)X +3714(If)X +3788(only)X +3950(a)X +4006(few)X +4147(tran-)X +555 4610(sactions)N +836(are)X +957(running,)X +1248(it)X +1314(is)X +1389(unlikely)X +1673(to)X +1757(improve)X +2046(things)X +2263(at)X +2343(all.)X +2485(The)X +2632(crossover)X +2962(point)X +3148(is)X +3223(the)X +3343(point)X +3529(at)X +3609(which)X +3827(the)X +3947(transaction)X +555 4700(commit)N +823(rate)X +968(is)X +1045(limited)X +1295(by)X +1399(the)X +1521(bandwidth)X +1883(of)X +1974(the)X +2096(device)X +2330(on)X +2434(which)X +2654(the)X +2776(log)X +2902(resides.)X +3189(If)X +3267(processes)X +3599(are)X +3722(trying)X +3937(to)X +4023(\257ush)X +4201(the)X +555 4790(log)N +677(faster)X +876(than)X +1034(the)X +1152(log)X +1274(disk)X +1427(can)X +1559(accept)X +1785(data,)X +1959(then)X +2117(group)X +2324(commit)X +2588(will)X +2732(increase)X +3016(the)X +3134(commit)X +3398(rate.)X +3 f +555 4976(4.3.)N +715(Kernel)X +971(Intervention)X +1418(for)X +1541(Synchronization)X +1 f +755 5099(Since)N +954(LIBTP)X +1197(uses)X +1356(data)X +1511(in)X +1594(shared)X +1825(memory)X +2113(\()X +2 f +2140(e.g.)X +1 f +2277(the)X +2395(lock)X +2553(table)X +2729(and)X +2865(buffer)X +3082(pool\))X +3271(it)X +3335(must)X +3510(be)X +3606(possible)X +3888(for)X +4002(a)X +4058(process)X +555 5189(to)N +640(acquire)X +900(exclusive)X +1226(access)X +1454(to)X +1538(shared)X +1770(data)X +1926(in)X +2010(order)X +2202(to)X +2286(prevent)X +2549(corruption.)X +2945(In)X +3034(addition,)X +3338(the)X +3458(process)X +3721(manager)X +4020(must)X +4197(put)X +555 5279(processes)N +886(to)X +971(sleep)X +1159(when)X +1356(the)X +1477(lock)X +1638(or)X +1728(buffer)X +1948(they)X +2109(request)X +2364(is)X +2440(in)X +2525(use)X +2655(by)X +2758(some)X +2950(other)X +3138(process.)X +3441(In)X +3530(the)X +3650(LIBTP)X +3894(implementa-)X +555 5385(tion)N +705(under)X +914(Ultrix)X +1131(4.0)X +7 s +5353(2)Y +10 s +5385(,)Y +1305(we)X +1424(use)X +1556(System)X +1816(V)X +1899(semaphores)X +2303(to)X +2390(provide)X +2660(this)X +2800(synchronization.)X +3377(Semaphores)X +3794(implemented)X +4237(in)X +555 5475(this)N +701(fashion)X +968(turn)X +1128(out)X +1261(to)X +1354(be)X +1461(an)X +1568(expensive)X +1920(choice)X +2161(for)X +2285(synchronization,)X +2847(because)X +3132(each)X +3310(access)X +3546(traps)X +3732(to)X +3824(the)X +3952(kernel)X +4183(and)X +8 s +10 f +555 5547(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5625(2)N +8 s +763 5650(Ultrix)N +932(and)X +1040(DEC)X +1184(are)X +1277(trademarks)X +1576(of)X +1645(Digital)X +1839(Equipment)X +2136(Corporation.)X + +9 p +%%Page: 9 9 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(executes)N +852(atomically)X +1210(there.)X +755 753(On)N +878(architectures)X +1314(that)X +1459(support)X +1724(atomic)X +1967(test-and-set,)X +2382(a)X +2443(much)X +2646(better)X +2854(choice)X +3089(would)X +3314(be)X +3415(to)X +3502(attempt)X +3767(to)X +3854(obtain)X +4079(a)X +4139(spin-)X +555 843(lock)N +714(with)X +877(a)X +934(test-and-set,)X +1345(and)X +1482(issue)X +1663(a)X +1720(system)X +1963(call)X +2100(only)X +2263(if)X +2333(the)X +2452(spinlock)X +2744(is)X +2818(unavailable.)X +3249(Since)X +3447(virtually)X +3738(all)X +3838(semaphores)X +4237(in)X +555 933(LIBTP)N +801(are)X +924(uncontested)X +1330(and)X +1469(are)X +1591(held)X +1752(for)X +1869(very)X +2035(short)X +2218(periods)X +2477(of)X +2567(time,)X +2752(this)X +2890(would)X +3113(improve)X +3403(performance.)X +3873(For)X +4007(example,)X +555 1023(processes)N +885(must)X +1062(acquire)X +1321(exclusive)X +1646(access)X +1874(to)X +1958(buffer)X +2177(pool)X +2341(metadata)X +2653(in)X +2737(order)X +2929(to)X +3013(\256nd)X +3159(and)X +3297(pin)X +3421(a)X +3479(buffer)X +3698(in)X +3781(shared)X +4012(memory.)X +555 1113(This)N +721(semaphore)X +1093(is)X +1170(requested)X +1502(most)X +1681(frequently)X +2034(in)X +2119(LIBTP.)X +2404(However,)X +2742(once)X +2917(it)X +2984(is)X +3060(acquired,)X +3380(only)X +3545(a)X +3604(few)X +3748(instructions)X +4144(must)X +555 1203(be)N +656(executed)X +966(before)X +1196(it)X +1264(is)X +1341(released.)X +1669(On)X +1791(one)X +1931(architecture)X +2335(for)X +2453(which)X +2673(we)X +2791(were)X +2972(able)X +3130(to)X +3216(gather)X +3441(detailed)X +3719(pro\256ling)X +4018(informa-)X +555 1293(tion,)N +729(the)X +857(cost)X +1015(of)X +1111(the)X +1238(semaphore)X +1615(calls)X +1791(accounted)X +2146(for)X +2269(25%)X +2445(of)X +2541(the)X +2668(total)X +2839(time)X +3010(spent)X +3208(updating)X +3517(the)X +3644(metadata.)X +4003(This)X +4174(was)X +555 1383(fairly)N +749(consistent)X +1089(across)X +1310(most)X +1485(of)X +1572(the)X +1690(critical)X +1933(sections.)X +755 1506(In)N +848(an)X +950(attempt)X +1216(to)X +1304(quantify)X +1597(the)X +1720(overhead)X +2040(of)X +2132(kernel)X +2358(synchronization,)X +2915(we)X +3034(ran)X +3162(tests)X +3329(on)X +3434(a)X +3495(version)X +3756(of)X +3848(4.3BSD-Reno)X +555 1596(which)N +786(had)X +937(been)X +1123(modi\256ed)X +1441(to)X +1537(support)X +1811(binary)X +2050(semaphore)X +2432(facilities)X +2742(similar)X +2998(to)X +3094(those)X +3297(described)X +3639(in)X +3735([POSIX91].)X +4174(The)X +555 1686(hardware)N +880(platform)X +1181(consisted)X +1504(of)X +1595(an)X +1695(HP300)X +1941(\(33MHz)X +2237(MC68030\))X +2612(workstation)X +3014(with)X +3180(16MBytes)X +3537(of)X +3628(main)X +3812(memory,)X +4123(and)X +4263(a)X +555 1776(600MByte)N +920(HP7959)X +1205(SCSI)X +1396(disk)X +1552(\(17)X +1682(ms)X +1798(average)X +2072(seek)X +2237(time\).)X +2468(We)X +2602(ran)X +2727(three)X +2910(sets)X +3052(of)X +3141(comparisons)X +3568(which)X +3786(are)X +3907(summarized)X +555 1866(in)N +645(\256gure)X +860(\256ve.)X +1028(In)X +1123(each)X +1299(comparison)X +1701(we)X +1823(ran)X +1954(two)X +2102(tests,)X +2292(one)X +2436(using)X +2637(hardware)X +2965(spinlocks)X +3295(and)X +3438(the)X +3563(other)X +3755(using)X +3955(kernel)X +4183(call)X +555 1956(synchronization.)N +1135(Since)X +1341(the)X +1467(test)X +1606(was)X +1758(run)X +1892(single-user,)X +2291(none)X +2474(of)X +2568(the)X +2693(the)X +2818(locks)X +3014(were)X +3198(contested.)X +3568(In)X +3662(the)X +3787(\256rst)X +3938(two)X +4085(sets)X +4232(of)X +555 2046(tests,)N +743(we)X +863(ran)X +992(the)X +1116(full)X +1253(transaction)X +1631(processing)X +2000(benchmark)X +2383(described)X +2717(in)X +2805(section)X +3058(5.1.)X +3223(In)X +3315(one)X +3456(case)X +3620(we)X +3739(ran)X +3867(with)X +4034(both)X +4201(the)X +555 2136(database)N +854(and)X +992(log)X +1116(on)X +1218(the)X +1338(same)X +1525(disk)X +1680(\(1)X +1769(Disk\))X +1969(and)X +2107(in)X +2191(the)X +2311(second,)X +2576(we)X +2692(ran)X +2817(with)X +2981(the)X +3101(database)X +3400(and)X +3538(log)X +3661(on)X +3762(separate)X +4047(disks)X +4232(\(2)X +555 2226(Disk\).)N +800(In)X +894(the)X +1019(last)X +1157(test,)X +1315(we)X +1436(wanted)X +1695(to)X +1784(create)X +2004(a)X +2067(CPU)X +2249(bound)X +2476(environment,)X +2928(so)X +3026(we)X +3146(used)X +3319(a)X +3381(database)X +3684(small)X +3883(enough)X +4145(to)X +4233(\256t)X +555 2316(completely)N +941(in)X +1033(the)X +1161(cache)X +1375(and)X +1521(issued)X +1751(read-only)X +2089(transactions.)X +2541(The)X +2695(results)X +2933(in)X +3024(\256gure)X +3240(\256ve)X +3389(express)X +3659(the)X +3786(kernel)X +4016(call)X +4161(syn-)X +555 2406(chronization)N +980(performance)X +1411(as)X +1502(a)X +1562(percentage)X +1935(of)X +2026(the)X +2148(spinlock)X +2443(performance.)X +2914(For)X +3049(example,)X +3365(in)X +3451(the)X +3573(1)X +3637(disk)X +3794(case,)X +3977(the)X +4098(kernel)X +555 2496(call)N +697(implementation)X +1225(achieved)X +1537(4.4)X +1662(TPS)X +1824(\(transactions)X +2259(per)X +2387(second\))X +2662(while)X +2865(the)X +2988(semaphore)X +3361(implementation)X +3888(achieved)X +4199(4.6)X +555 2586(TPS,)N +735(and)X +874(the)X +995(relative)X +1259(performance)X +1689(of)X +1779(the)X +1900(kernel)X +2123(synchronization)X +2657(is)X +2732(96%)X +2901(that)X +3043(of)X +3132(the)X +3252(spinlock)X +3545(\(100)X +3714(*)X +3776(4.4)X +3898(/)X +3942(4.6\).)X +4111(There)X +555 2676(are)N +674(two)X +814(striking)X +1078(observations)X +1503(from)X +1679(these)X +1864(results:)X +10 f +635 2799(g)N +1 f +755(even)X +927(when)X +1121(the)X +1239(system)X +1481(is)X +1554(disk)X +1707(bound,)X +1947(the)X +2065(CPU)X +2240(cost)X +2389(of)X +2476(synchronization)X +3008(is)X +3081(noticeable,)X +3451(and)X +10 f +635 2922(g)N +1 f +755(when)X +949(we)X +1063(are)X +1182(CPU)X +1357(bound,)X +1597(the)X +1715(difference)X +2062(is)X +2135(dramatic)X +2436(\(67%\).)X +3 f +555 3108(4.4.)N +715(Transaction)X +1148(Protected)X +1499(Access)X +1747(Methods)X +1 f +755 3231(The)N +903(B-tree)X +1127(and)X +1266(\256xed)X +1449(length)X +1671(recno)X +1872(\(record)X +2127(number\))X +2421(access)X +2649(methods)X +2942(have)X +3116(been)X +3290(modi\256ed)X +3596(to)X +3680(provide)X +3947(transaction)X +555 3321(protection.)N +941(Whereas)X +1244(the)X +1363(previously)X +1722(published)X +2054(interface)X +2357(to)X +2440(the)X +2559(access)X +2786(routines)X +3065(had)X +3202(separate)X +3487(open)X +3664(calls)X +3832(for)X +3946(each)X +4114(of)X +4201(the)X +10 f +555 3507(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 Dt +2978 5036 MXY + 2978 5036 lineto + 2978 4662 lineto + 3093 4662 lineto + 3093 5036 lineto + 2978 5036 lineto +closepath 21 2978 4662 3093 5036 Dp +2518 MX + 2518 5036 lineto + 2518 3960 lineto + 2633 3960 lineto + 2633 5036 lineto + 2518 5036 lineto +closepath 3 2518 3960 2633 5036 Dp +2059 MX + 2059 5036 lineto + 2059 3946 lineto + 2174 3946 lineto + 2174 5036 lineto + 2059 5036 lineto +closepath 1 2059 3946 2174 5036 Dp +3 f +7 s +2912 5141(Read-only)N +1426 3767(of)N +1487(Spinlock)X +1710(Throughput)X +1480 3710(Throughput)N +1786(as)X +1850(a)X +1892(%)X +11 s +1670 4843(20)N +1670 4614(40)N +1670 4384(60)N +1670 4155(80)N +1648 3925(100)N +7 s +2041 5141(1)N +2083(Disk)X +2490(2)X +2532(Disks)X +5 Dt +1829 5036 MXY +1494 0 Dl +4 Ds +1 Dt +1829 4806 MXY +1494 0 Dl +1829 4577 MXY +1494 0 Dl +1829 4347 MXY +1494 0 Dl +1829 4118 MXY +1494 0 Dl +1829 3888 MXY +1494 0 Dl +3 Dt +-1 Ds +8 s +555 5360(Figure)N +753(5:)X +823(Kernel)X +1028(Overhead)X +1315(for)X +1413(System)X +1625(Call)X +1756(Synchronization.)X +1 f +2254(The)X +2370(performance)X +2708(of)X +2778(the)X +2873(kernel)X +3049(call)X +3158(synchronization)X +3583(is)X +3643(expressed)X +3911(as)X +3980(a)X +4024(percentage)X +555 5450(of)N +625(the)X +720(spinlock)X +954(synchronization)X +1379(performance.)X +1749(In)X +1819(disk)X +1943(bound)X +2120(cases)X +2271(\(1)X +2341(Disk)X +2479(and)X +2588(2)X +2637(Disks\),)X +2837(we)X +2928(see)X +3026(that)X +3139(4-6%)X +3294(of)X +3364(the)X +3459(performance)X +3797(is)X +3857(lost)X +3966(due)X +4074(to)X +4140(kernel)X +555 5540(calls)N +688(while)X +846(in)X +912(the)X +1006(CPU)X +1147(bound)X +1323(case,)X +1464(we)X +1554(have)X +1690(lost)X +1799(67%)X +1932(of)X +2001(the)X +2095(performance)X +2432(due)X +2540(to)X +2606(kernel)X +2781(calls.)X + +10 p +%%Page: 10 10 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(access)N +781(methods,)X +1092(we)X +1206(now)X +1364(have)X +1536(an)X +1632(integrated)X +1973(open)X +2149(call)X +2285(with)X +2447(the)X +2565(following)X +2896(calling)X +3134(conventions:)X +7 f +715 753(DB)N +859(*dbopen)X +1243(\(const)X +1579(char)X +1819(*file,)X +2155(int)X +2347(flags,)X +2683(int)X +2875(mode,)X +3163(DBTYPE)X +3499(type,)X +1291 843(int)N +1483(dbflags,)X +1915(const)X +2203(void)X +2443(*openinfo\))X +1 f +555 966(where)N +2 f +774(\256le)X +1 f +894(is)X +969(the)X +1089(name)X +1285(of)X +1374(the)X +1494(\256le)X +1618(being)X +1818(opened,)X +2 f +2092(\257ags)X +1 f +2265(and)X +2 f +2402(mode)X +1 f +2597(are)X +2717(the)X +2836(standard)X +3129(arguments)X +3484(to)X +3 f +3567(open)X +1 f +3731(\(2\),)X +2 f +3866(type)X +1 f +4021(is)X +4095(one)X +4232(of)X +555 1056(the)N +680(access)X +913(method)X +1180(types,)X +2 f +1396(db\257ags)X +1 f +1654(indicates)X +1966(the)X +2091(mode)X +2296(of)X +2390(the)X +2515(buffer)X +2739(pool)X +2907(and)X +3049(transaction)X +3427(protection,)X +3798(and)X +2 f +3940(openinfo)X +1 f +4246(is)X +555 1146(the)N +681(access)X +915(method)X +1183(speci\256c)X +1456(information.)X +1902(Currently,)X +2257(the)X +2383(possible)X +2673(values)X +2906(for)X +2 f +3028(db\257ags)X +1 f +3287(are)X +3414(DB_SHARED)X +3912(and)X +4055(DB_TP)X +555 1236(indicating)N +895(that)X +1035(buffers)X +1283(should)X +1516(be)X +1612(kept)X +1770(in)X +1852(a)X +1908(shared)X +2138(buffer)X +2355(pool)X +2517(and)X +2653(that)X +2793(the)X +2911(\256le)X +3033(should)X +3266(be)X +3362(transaction)X +3734(protected.)X +755 1359(The)N +900(modi\256cations)X +1355(required)X +1643(to)X +1725(add)X +1861(transaction)X +2233(protection)X +2578(to)X +2660(an)X +2756(access)X +2982(method)X +3242(are)X +3361(quite)X +3541(simple)X +3774(and)X +3910(localized.)X +715 1482(1.)N +795(Replace)X +1074(\256le)X +2 f +1196(open)X +1 f +1372(with)X +2 f +1534(buf_open)X +1 f +1832(.)X +715 1572(2.)N +795(Replace)X +1074(\256le)X +2 f +1196(read)X +1 f +1363(and)X +2 f +1499(write)X +1 f +1683(calls)X +1850(with)X +2012(buffer)X +2229(manager)X +2526(calls)X +2693(\()X +2 f +2720(buf_get)X +1 f +(,)S +2 f +3000(buf_unpin)X +1 f +3324(\).)X +715 1662(3.)N +795(Precede)X +1070(buffer)X +1287(manager)X +1584(calls)X +1751(with)X +1913(an)X +2009(appropriate)X +2395(\(read)X +2581(or)X +2668(write\))X +2880(lock)X +3038(call.)X +715 1752(4.)N +795(Before)X +1034(updates,)X +1319(issue)X +1499(a)X +1555(logging)X +1819(operation.)X +715 1842(5.)N +795(After)X +985(data)X +1139(have)X +1311(been)X +1483(accessed,)X +1805(release)X +2049(the)X +2167(buffer)X +2384(manager)X +2681(pin.)X +715 1932(6.)N +795(Provide)X +1064(undo/redo)X +1409(code)X +1581(for)X +1695(each)X +1863(type)X +2021(of)X +2108(log)X +2230(record)X +2456(de\256ned.)X +555 2071(The)N +702(following)X +1035(code)X +1209(fragments)X +1552(show)X +1743(how)X +1903(to)X +1987(transaction)X +2361(protect)X +2606(several)X +2856(updates)X +3123(to)X +3206(a)X +3263(B-tree.)X +7 s +3484 2039(3)N +10 s +3533 2071(In)N +3621(the)X +3740(unprotected)X +4140(case,)X +555 2161(an)N +652(open)X +829(call)X +966(is)X +1040(followed)X +1346(by)X +1447(a)X +1504(read)X +1664(call)X +1801(to)X +1884(obtain)X +2105(the)X +2224(meta-data)X +2562(for)X +2677(the)X +2796(B-tree.)X +3058(Instead,)X +3331(we)X +3446(issue)X +3627(an)X +3724(open)X +3901(to)X +3984(the)X +4102(buffer)X +555 2251(manager)N +852(to)X +934(obtain)X +1154(a)X +1210(\256le)X +1332(id)X +1414(and)X +1550(a)X +1606(buffer)X +1823(request)X +2075(to)X +2157(obtain)X +2377(the)X +2495(meta-data)X +2832(as)X +2919(shown)X +3148(below.)X +7 f +715 2374(char)N +955(*path;)X +715 2464(int)N +907(fid,)X +1147(flags,)X +1483(len,)X +1723(mode;)X +715 2644(/*)N +859(Obtain)X +1195(a)X +1291(file)X +1531(id)X +1675(with)X +1915(which)X +2203(to)X +2347(access)X +2683(the)X +2875(buffer)X +3211(pool)X +3451(*/)X +715 2734(fid)N +907(=)X +1003(buf_open\(path,)X +1723(flags,)X +2059(mode\);)X +715 2914(/*)N +859(Read)X +1099(the)X +1291(meta)X +1531(data)X +1771(\(page)X +2059(0\))X +2203(for)X +2395(the)X +2587(B-tree)X +2923(*/)X +715 3004(if)N +859(\(tp_lock\(fid,)X +1531(0,)X +1675(READ_LOCK\)\))X +1003 3094(return)N +1339(error;)X +715 3184(meta_data_ptr)N +1387(=)X +1483(buf_get\(fid,)X +2107(0,)X +2251(BF_PIN,)X +2635(&len\);)X +1 f +555 3307(The)N +714(BF_PIN)X +1014(argument)X +1350(to)X +2 f +1445(buf_get)X +1 f +1718(indicates)X +2036(that)X +2189(we)X +2316(wish)X +2500(to)X +2595(leave)X +2798(this)X +2946(page)X +3131(pinned)X +3382(in)X +3477(memory)X +3777(so)X +3881(that)X +4034(it)X +4111(is)X +4197(not)X +555 3397(swapped)N +862(out)X +990(while)X +1194(we)X +1314(are)X +1439(accessing)X +1772(it.)X +1881(The)X +2031(last)X +2167(argument)X +2495(to)X +2 f +2582(buf_get)X +1 f +2847(returns)X +3095(the)X +3218(number)X +3488(of)X +3580(bytes)X +3774(on)X +3879(the)X +4002(page)X +4179(that)X +555 3487(were)N +732(valid)X +912(so)X +1003(that)X +1143(the)X +1261(access)X +1487(method)X +1747(may)X +1905(initialize)X +2205(the)X +2323(page)X +2495(if)X +2564(necessary.)X +755 3610(Next,)N +955(consider)X +1251(inserting)X +1555(a)X +1615(record)X +1845(on)X +1949(a)X +2009(particular)X +2341(page)X +2517(of)X +2608(a)X +2668(B-tree.)X +2932(In)X +3022(the)X +3143(unprotected)X +3545(case,)X +3727(we)X +3844(read)X +4006(the)X +4127(page,)X +555 3700(call)N +2 f +693(_bt_insertat)X +1 f +1079(,)X +1121(and)X +1258(write)X +1444(the)X +1563(page.)X +1776(Instead,)X +2049(we)X +2164(lock)X +2323(the)X +2442(page,)X +2635(request)X +2888(the)X +3007(buffer,)X +3245(log)X +3368(the)X +3487(change,)X +3756(modify)X +4008(the)X +4127(page,)X +555 3790(and)N +691(release)X +935(the)X +1053(buffer.)X +7 f +715 3913(int)N +907(fid,)X +1147(len,)X +1387(pageno;)X +1867(/*)X +2011(Identifies)X +2539(the)X +2731(buffer)X +3067(*/)X +715 4003(int)N +907(index;)X +1867(/*)X +2011(Location)X +2443(at)X +2587(which)X +2875(to)X +3019(insert)X +3355(the)X +3547(new)X +3739(pair)X +3979(*/)X +715 4093(DBT)N +907(*keyp,)X +1243(*datap;)X +1867(/*)X +2011(Key/Data)X +2443(pair)X +2683(to)X +2827(be)X +2971(inserted)X +3403(*/)X +715 4183(DATUM)N +1003(*d;)X +1867(/*)X +2011(Key/data)X +2443(structure)X +2923(to)X +3067(insert)X +3403(*/)X +715 4363(/*)N +859(Lock)X +1099(and)X +1291(request)X +1675(the)X +1867(buffer)X +2203(*/)X +715 4453(if)N +859(\(tp_lock\(fid,)X +1531(pageno,)X +1915(WRITE_LOCK\)\))X +1003 4543(return)N +1339(error;)X +715 4633(buffer_ptr)N +1243(=)X +1339(buf_get\(fid,)X +1963(pageno,)X +2347(BF_PIN,)X +2731(&len\);)X +715 4813(/*)N +859(Log)X +1051(and)X +1243(perform)X +1627(the)X +1819(update)X +2155(*/)X +715 4903(log_insdel\(BTREE_INSERT,)N +1915(fid,)X +2155(pageno,)X +2539(keyp,)X +2827(datap\);)X +715 4993(_bt_insertat\(buffer_ptr,)N +1915(d,)X +2059(index\);)X +715 5083(buf_unpin\(buffer_ptr\);)N +1 f +555 5206(Succinctly,)N +942(the)X +1068(algorithm)X +1407(for)X +1529(turning)X +1788(unprotected)X +2195(code)X +2375(into)X +2527(protected)X +2854(code)X +3034(is)X +3115(to)X +3205(replace)X +3466(read)X +3633(operations)X +3995(with)X +2 f +4165(lock)X +1 f +555 5296(and)N +2 f +691(buf_get)X +1 f +951(operations)X +1305(and)X +1441(write)X +1626(operations)X +1980(with)X +2 f +2142(log)X +1 f +2264(and)X +2 f +2400(buf_unpin)X +1 f +2744(operations.)X +8 s +10 f +555 5458(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5536(3)N +8 s +766 5561(The)N +884(following)X +1152(code)X +1291(fragments)X +1565(are)X +1661(examples,)X +1937(but)X +2038(do)X +2120(not)X +2220(de\256ne)X +2394(the)X +2490(\256nal)X +2622(interface.)X +2894(The)X +3011(\256nal)X +3143(interface)X +3383(will)X +3501(be)X +3579(determined)X +3884(after)X +4018(LIBTP)X +4214(has)X +555 5633(been)N +691(fully)X +828(integrated)X +1099(with)X +1229(the)X +1323(most)X +1464(recent)X +3 f +1635(db)X +1 f +1707(\(3\))X +1797(release)X +1989(from)X +2129(the)X +2223(Computer)X +2495(Systems)X +2725(Research)X +2974(Group)X +3153(at)X +3215(University)X +3501(of)X +3570(California,)X +3861(Berkeley.)X + +11 p +%%Page: 11 11 +8 s 8 xH 0 xS 1 f +10 s +3 f +555 630(5.)N +655(Performance)X +1 f +755 753(In)N +845(this)X +983(section,)X +1253(we)X +1370(present)X +1625(the)X +1746(results)X +1978(of)X +2067(two)X +2209(very)X +2374(different)X +2673(benchmarks.)X +3103(The)X +3250(\256rst)X +3396(is)X +3471(an)X +3569(online)X +3791(transaction)X +4165(pro-)X +555 843(cessing)N +824(benchmark,)X +1234(similar)X +1489(to)X +1584(the)X +1715(standard)X +2020(TPCB,)X +2272(but)X +2407(has)X +2547(been)X +2732(adapted)X +3015(to)X +3110(run)X +3250(in)X +3345(a)X +3414(desktop)X +3696(environment.)X +4174(The)X +555 933(second)N +798(emulates)X +1103(a)X +1159(computer-aided)X +1683(design)X +1912(environment)X +2337(and)X +2473(provides)X +2769(more)X +2954(complex)X +3250(query)X +3453(processing.)X +3 f +555 1119(5.1.)N +715(Transaction)X +1148(Processing)X +1533(Benchmark)X +1 f +755 1242(For)N +887(this)X +1023(section,)X +1291(all)X +1392(performance)X +1820(numbers)X +2117(shown)X +2346(except)X +2576(for)X +2690(the)X +2808(commercial)X +3207(database)X +3504(system)X +3746(were)X +3923(obtained)X +4219(on)X +555 1332(a)N +614(DECstation)X +1009(5000/200)X +1333(with)X +1497(32MBytes)X +1852(of)X +1941(memory)X +2230(running)X +2501(Ultrix)X +2714(V4.0,)X +2914(accessing)X +3244(a)X +3302(DEC)X +3484(RZ57)X +3688(1GByte)X +3959(disk)X +4114(drive.)X +555 1422(The)N +720(commercial)X +1139(relational)X +1482(database)X +1799(system)X +2061(tests)X +2242(were)X +2438(run)X +2584(on)X +2703(a)X +2778(comparable)X +3192(machine,)X +3523(a)X +3598(Sparcstation)X +4033(1+)X +4157(with)X +555 1512(32MBytes)N +915(memory)X +1209(and)X +1352(a)X +1415(1GByte)X +1691(external)X +1976(disk)X +2135(drive.)X +2366(The)X +2517(database,)X +2840(binaries)X +3120(and)X +3262(log)X +3390(resided)X +3648(on)X +3754(the)X +3878(same)X +4069(device.)X +555 1602(Reported)N +869(times)X +1062(are)X +1181(the)X +1299(means)X +1524(of)X +1611(\256ve)X +1751(tests)X +1913(and)X +2049(have)X +2221(standard)X +2513(deviations)X +2862(within)X +3086(two)X +3226(percent)X +3483(of)X +3570(the)X +3688(mean.)X +755 1725(The)N +905(test)X +1041(database)X +1343(was)X +1493(con\256gured)X +1861(according)X +2203(to)X +2290(the)X +2413(TPCB)X +2637(scaling)X +2889(rules)X +3070(for)X +3189(a)X +3250(10)X +3355(transaction)X +3732(per)X +3860(second)X +4108(\(TPS\))X +555 1815(system)N +817(with)X +999(1,000,000)X +1359(account)X +1649(records,)X +1946(100)X +2106(teller)X +2311(records,)X +2607(and)X +2762(10)X +2881(branch)X +3139(records.)X +3455(Where)X +3709(TPS)X +3885(numbers)X +4200(are)X +555 1905(reported,)N +865(we)X +981(are)X +1102(running)X +1373(a)X +1431(modi\256ed)X +1737(version)X +1995(of)X +2084(the)X +2203(industry)X +2486(standard)X +2779(transaction)X +3152(processing)X +3516(benchmark,)X +3914(TPCB.)X +4174(The)X +555 1995(TPCB)N +780(benchmark)X +1163(simulates)X +1491(a)X +1553(withdrawal)X +1940(performed)X +2301(by)X +2407(a)X +2469(hypothetical)X +2891(teller)X +3082(at)X +3166(a)X +3228(hypothetical)X +3650(bank.)X +3872(The)X +4022(database)X +555 2085(consists)N +831(of)X +921(relations)X +1220(\(\256les\))X +1430(for)X +1547(accounts,)X +1871(branches,)X +2200(tellers,)X +2439(and)X +2578(history.)X +2863(For)X +2997(each)X +3168(transaction,)X +3563(the)X +3684(account,)X +3976(teller,)X +4183(and)X +555 2175(branch)N +795(balances)X +1093(must)X +1269(be)X +1366(updated)X +1641(to)X +1724(re\257ect)X +1946(the)X +2065(withdrawal)X +2447(and)X +2584(a)X +2640(history)X +2882(record)X +3108(is)X +3181(written)X +3428(which)X +3644(contains)X +3931(the)X +4049(account)X +555 2265(id,)N +657(branch)X +896(id,)X +998(teller)X +1183(id,)X +1285(and)X +1421(the)X +1539(amount)X +1799(of)X +1886(the)X +2004(withdrawal)X +2385([TPCB90].)X +755 2388(Our)N +914(implementation)X +1450(of)X +1551(the)X +1683(benchmark)X +2074(differs)X +2317(from)X +2506(the)X +2637(speci\256cation)X +3075(in)X +3170(several)X +3431(aspects.)X +3736(The)X +3894(speci\256cation)X +555 2478(requires)N +840(that)X +985(the)X +1108(database)X +1410(keep)X +1587(redundant)X +1933(logs)X +2091(on)X +2196(different)X +2498(devices,)X +2784(but)X +2911(we)X +3030(use)X +3162(a)X +3223(single)X +3439(log.)X +3606(Furthermore,)X +4052(all)X +4157(tests)X +555 2568(were)N +734(run)X +863(on)X +965(a)X +1023(single,)X +1256(centralized)X +1631(system)X +1875(so)X +1968(there)X +2151(is)X +2226(no)X +2328(notion)X +2553(of)X +2641(remote)X +2885(accesses.)X +3219(Finally,)X +3486(we)X +3601(calculated)X +3948(throughput)X +555 2658(by)N +662(dividing)X +955(the)X +1080(total)X +1249(elapsed)X +1517(time)X +1686(by)X +1793(the)X +1918(number)X +2190(of)X +2284(transactions)X +2694(processed)X +3038(rather)X +3253(than)X +3418(by)X +3525(computing)X +3894(the)X +4018(response)X +555 2748(time)N +717(for)X +831(each)X +999(transaction.)X +755 2871(The)N +912(performance)X +1351(comparisons)X +1788(focus)X +1993(on)X +2104(traditional)X +2464(Unix)X +2655(techniques)X +3029(\(unprotected,)X +3486(using)X +3 f +3690(\257ock)X +1 f +3854(\(2\))X +3979(and)X +4126(using)X +3 f +555 2961(fsync)N +1 f +733(\(2\)\))X +884(and)X +1030(a)X +1096(commercial)X +1504(relational)X +1836(database)X +2142(system.)X +2433(Well-behaved)X +2913(applications)X +3329(using)X +3 f +3531(\257ock)X +1 f +3695(\(2\))X +3818(are)X +3946(guaranteed)X +555 3051(that)N +704(concurrent)X +1077(processes')X +1441(updates)X +1715(do)X +1824(not)X +1955(interact)X +2225(with)X +2396(one)X +2541(another,)X +2831(but)X +2962(no)X +3070(guarantees)X +3442(about)X +3648(atomicity)X +3978(are)X +4105(made.)X +555 3141(That)N +731(is,)X +833(if)X +911(the)X +1038(system)X +1289(crashes)X +1555(in)X +1646(mid-transaction,)X +2198(only)X +2369(parts)X +2554(of)X +2649(that)X +2797(transaction)X +3177(will)X +3329(be)X +3433(re\257ected)X +3738(in)X +3828(the)X +3954 0.3125(after-crash)AX +555 3231(state)N +725(of)X +815(the)X +936(database.)X +1276(The)X +1424(use)X +1554(of)X +3 f +1643(fsync)X +1 f +1821(\(2\))X +1937(at)X +2017(transaction)X +2391(commit)X +2657(time)X +2821(provides)X +3119(guarantees)X +3485(of)X +3574(durability)X +3907(after)X +4077(system)X +555 3321(failure.)N +825(However,)X +1160(there)X +1341(is)X +1414(no)X +1514(mechanism)X +1899(to)X +1981(perform)X +2260(transaction)X +2632(abort.)X +3 f +555 3507(5.1.1.)N +775(Single-User)X +1191(Tests)X +1 f +755 3630(These)N +978(tests)X +1151(compare)X +1459(LIBTP)X +1712(in)X +1804(a)X +1870(variety)X +2123(of)X +2220(con\256gurations)X +2708(to)X +2800(traditional)X +3159(UNIX)X +3390(solutions)X +3708(and)X +3854(a)X +3920(commercial)X +555 3720(relational)N +884(database)X +1187(system)X +1435(\(RDBMS\).)X +1814(To)X +1929(demonstrate)X +2347(the)X +2471(server)X +2694(architecture)X +3100(we)X +3220(built)X +3392(a)X +3454(front)X +3636(end)X +3777(test)X +3913(process)X +4179(that)X +555 3810(uses)N +732(TCL)X +922([OUST90])X +1304(to)X +1405(parse)X +1614(database)X +1930(access)X +2175(commands)X +2561(and)X +2716(call)X +2870(the)X +3006(database)X +3321(access)X +3565(routines.)X +3901(In)X +4006(one)X +4160(case)X +555 3900(\(SERVER\),)N +956(frontend)X +1249(and)X +1386(backend)X +1675(processes)X +2004(were)X +2181(created)X +2434(which)X +2650(communicated)X +3142(via)X +3260(an)X +3356(IP)X +3447(socket.)X +3712(In)X +3799(the)X +3917(second)X +4160(case)X +555 3990(\(TCL\),)N +802(a)X +860(single)X +1073(process)X +1336(read)X +1497(queries)X +1751(from)X +1929(standard)X +2223(input,)X +2429(parsed)X +2660(them,)X +2861(and)X +2998(called)X +3211(the)X +3330(database)X +3628(access)X +3855(routines.)X +4174(The)X +555 4080(performance)N +987(difference)X +1338(between)X +1630(the)X +1752(TCL)X +1927(and)X +2067(SERVER)X +2397(tests)X +2563(quanti\256es)X +2898(the)X +3020(communication)X +3542(overhead)X +3861(of)X +3952(the)X +4074(socket.)X +555 4170(The)N +732(RDBMS)X +1063(implementation)X +1617(used)X +1816(embedded)X +2198(SQL)X +2401(in)X +2515(C)X +2620(with)X +2814(stored)X +3062(database)X +3391(procedures.)X +3835(Therefore,)X +4224(its)X +555 4260(con\256guration)N +1003(is)X +1076(a)X +1132(hybrid)X +1361(of)X +1448(the)X +1566(single)X +1777(process)X +2038(architecture)X +2438(and)X +2574(the)X +2692(server)X +2909(architecture.)X +3349(The)X +3494(graph)X +3697(in)X +3779(\256gure)X +3986(six)X +4099(shows)X +555 4350(a)N +611(comparison)X +1005(of)X +1092(the)X +1210(following)X +1541(six)X +1654(con\256gurations:)X +1126 4506(LIBTP)N +1552(Uses)X +1728(the)X +1846(LIBTP)X +2088(library)X +2322(in)X +2404(a)X +2460(single)X +2671(application.)X +1126 4596(TCL)N +1552(Uses)X +1728(the)X +1846(LIBTP)X +2088(library)X +2322(in)X +2404(a)X +2460(single)X +2671(application,)X +3067(requires)X +3346(query)X +3549(parsing.)X +1126 4686(SERVER)N +1552(Uses)X +1728(the)X +1846(LIBTP)X +2088(library)X +2322(in)X +2404(a)X +2460(server)X +2677(con\256guration,)X +3144(requires)X +3423(query)X +3626(parsing.)X +1126 4776(NOTP)N +1552(Uses)X +1728(no)X +1828(locking,)X +2108(logging,)X +2392(or)X +2479(concurrency)X +2897(control.)X +1126 4866(FLOCK)N +1552(Uses)X +3 f +1728(\257ock)X +1 f +1892(\(2\))X +2006(for)X +2120(concurrency)X +2538(control)X +2785(and)X +2921(nothing)X +3185(for)X +3299(durability.)X +1126 4956(FSYNC)N +1552(Uses)X +3 f +1728(fsync)X +1 f +1906(\(2\))X +2020(for)X +2134(durability)X +2465(and)X +2601(nothing)X +2865(for)X +2979(concurrency)X +3397(control.)X +1126 5046(RDBMS)N +1552(Uses)X +1728(a)X +1784(commercial)X +2183(relational)X +2506(database)X +2803(system.)X +755 5235(The)N +902(results)X +1133(show)X +1324(that)X +1466(LIBTP,)X +1730(both)X +1894(in)X +1978(the)X +2098(procedural)X +2464(and)X +2602(parsed)X +2834(environments,)X +3312(is)X +3387(competitive)X +3787(with)X +3951(a)X +4009(commer-)X +555 5325(cial)N +692(system)X +935(\(comparing)X +1326(LIBTP,)X +1589(TCL,)X +1781(and)X +1917(RDBMS\).)X +2263(Compared)X +2617(to)X +2699(existing)X +2972(UNIX)X +3193(solutions,)X +3521(LIBTP)X +3763(is)X +3836(approximately)X +555 5415(15%)N +738(slower)X +988(than)X +1162(using)X +3 f +1371(\257ock)X +1 f +1535(\(2\))X +1665(or)X +1768(no)X +1884(protection)X +2245(but)X +2383(over)X +2562(80%)X +2745(better)X +2964(than)X +3137(using)X +3 f +3345(fsync)X +1 f +3523(\(2\))X +3652(\(comparing)X +4057(LIBTP,)X +555 5505(FLOCK,)N +857(NOTP,)X +1106(and)X +1242(FSYNC\).)X + +12 p +%%Page: 12 12 +10 s 10 xH 0 xS 1 f +3 f +8 s +3500 2184(RDBMS)N +1 Dt +3553 2085 MXY + 3553 2085 lineto + 3676 2085 lineto + 3676 1351 lineto + 3553 1351 lineto + 3553 2085 lineto +closepath 16 3553 1351 3676 2085 Dp +2018 2184(SERVER)N +1720 1168 MXY +0 917 Dl +122 0 Dl +0 -917 Dl +-122 0 Dl +1715 2184(TCL)N +2087 1534 MXY + 2087 1534 lineto + 2209 1534 lineto + 2209 2085 lineto + 2087 2085 lineto + 2087 1534 lineto +closepath 12 2087 1534 2209 2085 Dp +3187 MX + 3187 1534 lineto + 3309 1534 lineto + 3309 2085 lineto + 3187 2085 lineto + 3187 1534 lineto +closepath 19 3187 1534 3309 2085 Dp +3142 2184(FSYNC)N +2425(NOTP)X +2453 955 MXY + 2453 955 lineto + 2576 955 lineto + 2576 2085 lineto + 2453 2085 lineto + 2453 955 lineto +closepath 21 2453 955 2576 2085 Dp +2820 1000 MXY + 2820 1000 lineto + 2942 1000 lineto + 2942 2085 lineto + 2820 2085 lineto + 2820 1000 lineto +closepath 14 2820 1000 2942 2085 Dp +5 Dt +1231 2085 MXY +2567 0 Dl +4 Ds +1 Dt +1231 1840 MXY +2567 0 Dl +1231 1596 MXY +2567 0 Dl +1231 1351 MXY +2567 0 Dl +1231 1108 MXY +2567 0 Dl +1231 863 MXY +2567 0 Dl +11 s +1087 1877(2)N +1087 1633(4)N +1087 1388(6)N +1087 1145(8)N +1065 900(10)N +1028 763(TPS)N +-1 Ds +1353 2085 MXY + 1353 2085 lineto + 1353 1151 lineto + 1476 1151 lineto + 1476 2085 lineto + 1353 2085 lineto +closepath 3 1353 1151 1476 2085 Dp +8 s +1318 2184(LIBTP)N +2767(FLOCK)X +3 Dt +-1 Ds +10 s +1597 2399(Figure)N +1844(6:)X +1931(Single-User)X +2347(Performance)X +2814(Comparison.)X +1 f +10 f +555 2579(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +3 f +555 2855(5.1.2.)N +775(Multi-User)X +1174(Tests)X +1 f +755 2978(While)N +975(the)X +1097(single-user)X +1473(tests)X +1639(form)X +1819(a)X +1878(basis)X +2061(for)X +2178(comparing)X +2544(LIBTP)X +2789(to)X +2874(other)X +3062(systems,)X +3358(our)X +3488(goal)X +3649(in)X +3734(multi-user)X +4086(testing)X +555 3068(was)N +714(to)X +810(analyze)X +1089(its)X +1197(scalability.)X +1579(To)X +1701(this)X +1849(end,)X +2018(we)X +2145(have)X +2330(run)X +2470(the)X +2601(benchmark)X +2991(in)X +3086(three)X +3280(modes,)X +3542(the)X +3673(normal)X +3933(disk)X +4099(bound)X +555 3158(con\256guration)N +1010(\(\256gure)X +1252(seven\),)X +1510(a)X +1573(CPU)X +1755(bound)X +1982(con\256guration)X +2436(\(\256gure)X +2677(eight,)X +2884(READ-ONLY\),)X +3426(and)X +3569(lock)X +3734(contention)X +4099(bound)X +555 3248(\(\256gure)N +796(eight,)X +1003(NO_FSYNC\).)X +1510(Since)X +1715(the)X +1840(normal)X +2094(con\256guration)X +2548(is)X +2628(completely)X +3011(disk)X +3171(bound)X +3398(\(each)X +3600(transaction)X +3978(requires)X +4263(a)X +555 3354(random)N +823(read,)X +1005(a)X +1064(random)X +1332(write,)X +1540(and)X +1679(a)X +1738(sequential)X +2086(write)X +7 s +2251 3322(4)N +10 s +3354(\))Y +2329(we)X +2446(expect)X +2679(to)X +2764(see)X +2890(little)X +3059(performance)X +3489(improvement)X +3939(as)X +4028(the)X +4148(mul-)X +555 3444(tiprogramming)N +1064(level)X +1249(increases.)X +1613(In)X +1709(fact,)X +1879(\256gure)X +2095(seven)X +2307(reveals)X +2564(that)X +2713(we)X +2836(are)X +2964(able)X +3127(to)X +3218(overlap)X +3487(CPU)X +3670(and)X +3814(disk)X +3975(utilization)X +555 3534(slightly)N +825(producing)X +1181(approximately)X +1674(a)X +1740(10%)X +1917(performance)X +2354(improvement)X +2811(with)X +2983(two)X +3133(processes.)X +3511(After)X +3711(that)X +3861(point,)X +4075(perfor-)X +555 3624(mance)N +785(drops)X +983(off,)X +1117(and)X +1253(at)X +1331(a)X +1387(multi-programming)X +2038(level)X +2214(of)X +2301(4,)X +2381(we)X +2495(are)X +2614(performing)X +2995(worse)X +3207(than)X +3365(in)X +3447(the)X +3565(single)X +3776(process)X +4037(case.)X +755 3747(Similar)N +1021(behavior)X +1333(was)X +1489(reported)X +1787(on)X +1897(the)X +2025(commercial)X +2434(relational)X +2767(database)X +3074(system)X +3326(using)X +3529(the)X +3657(same)X +3852(con\256guration.)X +555 3837(The)N +707(important)X +1045(conclusion)X +1419(to)X +1508(draw)X +1696(from)X +1879(this)X +2021(is)X +2101(that)X +2248(you)X +2395(cannot)X +2636(attain)X +2841(good)X +3028(multi-user)X +3384(scaling)X +3638(on)X +3745(a)X +3808(badly)X +4013(balanced)X +555 3927(system.)N +839(If)X +915(multi-user)X +1266(performance)X +1695(on)X +1797(applications)X +2205(of)X +2293(this)X +2429(sort)X +2570(is)X +2644(important,)X +2996(one)X +3133(must)X +3309(have)X +3482(a)X +3539(separate)X +3824(logging)X +4089(device)X +555 4017(and)N +697(horizontally)X +1110(partition)X +1407(the)X +1531(database)X +1834(to)X +1921(allow)X +2124(a)X +2185(suf\256ciently)X +2570(high)X +2737(degree)X +2977(of)X +3069(multiprogramming)X +3698(that)X +3843(group)X +4055(commit)X +555 4107(can)N +687(amortize)X +988(the)X +1106(cost)X +1255(of)X +1342(log)X +1464(\257ushing.)X +755 4230(By)N +871(using)X +1067(a)X +1126(very)X +1292(small)X +1488(database)X +1788(\(one)X +1954(that)X +2097(can)X +2232(be)X +2331(entirely)X +2599(cached)X +2846(in)X +2930(main)X +3112(memory\))X +3428(and)X +3566(read-only)X +3896(transactions,)X +555 4320(we)N +670(generated)X +1004(a)X +1061(CPU)X +1236(bound)X +1456(environment.)X +1921(By)X +2034(using)X +2227(the)X +2345(same)X +2530(small)X +2723(database,)X +3040(the)X +3158(complete)X +3472(TPCB)X +3691(transaction,)X +4083(and)X +4219(no)X +3 f +555 4410(fsync)N +1 f +733(\(2\))X +862(on)X +977(the)X +1110(log)X +1247(at)X +1340(commit,)X +1639(we)X +1768(created)X +2036(a)X +2107(lock)X +2280(contention)X +2652(bound)X +2886(environment.)X +3365(The)X +3524(small)X +3731(database)X +4042(used)X +4223(an)X +555 4500(account)N +828(\256le)X +953(containing)X +1314(only)X +1479(1000)X +1662(records)X +1922(rather)X +2133(than)X +2294(the)X +2415(full)X +2549(1,000,000)X +2891(records)X +3150(and)X +3288(ran)X +3413(enough)X +3671(transactions)X +4076(to)X +4160(read)X +555 4590(the)N +677(entire)X +883(database)X +1183(into)X +1330(the)X +1451(buffer)X +1671(pool)X +1836(\(2000\))X +2073(before)X +2302(beginning)X +2645(measurements.)X +3147(The)X +3295(read-only)X +3626(transaction)X +4001(consisted)X +555 4680(of)N +646(three)X +831(database)X +1132(reads)X +1326(\(from)X +1533(the)X +1655(1000)X +1839(record)X +2069(account)X +2343(\256le,)X +2489(the)X +2611(100)X +2754(record)X +2983(teller)X +3171(\256le,)X +3316(and)X +3455(the)X +3576(10)X +3679(record)X +3908(branch)X +4150(\256le\).)X +555 4770(Since)N +759(no)X +865(data)X +1025(were)X +1208(modi\256ed)X +1518(and)X +1660(no)X +1766(history)X +2014(records)X +2277(were)X +2460(written,)X +2733(no)X +2839(log)X +2966(records)X +3228(were)X +3410(written.)X +3702(For)X +3838(the)X +3961(contention)X +555 4860(bound)N +780(con\256guration,)X +1252(we)X +1371(used)X +1543(the)X +1666(normal)X +1918(TPCB)X +2142(transaction)X +2519(\(against)X +2798(the)X +2920(small)X +3117(database\))X +3445(and)X +3585(disabled)X +3876(the)X +3998(log)X +4124(\257ush.)X +555 4950(Figure)N +784(eight)X +964(shows)X +1184(both)X +1346(of)X +1433(these)X +1618(results.)X +755 5073(The)N +902(read-only)X +1231(test)X +1363(indicates)X +1669(that)X +1810(we)X +1925(barely)X +2147(scale)X +2329(at)X +2408(all)X +2509(in)X +2592(the)X +2711(CPU)X +2887(bound)X +3108(case.)X +3308(The)X +3454(explanation)X +3849(for)X +3964(that)X +4105(is)X +4179(that)X +555 5163(even)N +735(with)X +905(a)X +969(single)X +1188(process,)X +1477(we)X +1599(are)X +1726(able)X +1888(to)X +1978(drive)X +2171(the)X +2297(CPU)X +2480(utilization)X +2832(to)X +2922(96%.)X +3137(As)X +3254(a)X +3317(result,)X +3542(that)X +3689(gives)X +3885(us)X +3983(very)X +4153(little)X +555 5253(room)N +753(for)X +876(improvement,)X +1352(and)X +1497(it)X +1570(takes)X +1764(a)X +1829(multiprogramming)X +2462(level)X +2647(of)X +2743(four)X +2906(to)X +2997(approach)X +3321(100%)X +3537(CPU)X +3721(saturation.)X +4106(In)X +4201(the)X +555 5343(case)N +718(where)X +939(we)X +1057(do)X +1161(perform)X +1444(writes,)X +1684(we)X +1802(are)X +1925(interested)X +2261(in)X +2347(detecting)X +2665(when)X +2863(lock)X +3025(contention)X +3387(becomes)X +3691(a)X +3750(dominant)X +4075(perfor-)X +555 5433(mance)N +787(factor.)X +1037(Contention)X +1414(will)X +1560(cause)X +1761(two)X +1903(phenomena;)X +2317(we)X +2433(will)X +2579(see)X +2704(transactions)X +3109(queueing)X +3425(behind)X +3665(frequently)X +4017(accessed)X +555 5523(data,)N +731(and)X +869(we)X +985(will)X +1131(see)X +1256(transaction)X +1629(abort)X +1815(rates)X +1988(increasing)X +2339(due)X +2476(to)X +2559(deadlock.)X +2910(Given)X +3127(that)X +3268(the)X +3387(branch)X +3627(\256le)X +3750(contains)X +4038(only)X +4201(ten)X +8 s +10 f +555 5595(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5673(4)N +8 s +763 5698(Although)N +1021(the)X +1115(log)X +1213(is)X +1272(written)X +1469(sequentially,)X +1810(we)X +1900(do)X +1980(not)X +2078(get)X +2172(the)X +2266(bene\256t)X +2456(of)X +2525(sequentiality)X +2868(since)X +3015(the)X +3109(log)X +3207(and)X +3315(database)X +3550(reside)X +3718(on)X +3798(the)X +3892(same)X +4039(disk.)X + +13 p +%%Page: 13 13 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +3187 2051 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3286 2028 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3384 1926 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3483 1910 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3581 1910 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3680 1832 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3778 1909 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3877 1883 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3975 1679 MXY +0 17 Dl +0 -8 Dl +9 0 Dl +-18 0 Dl +4074 1487 MXY +0 17 Dl +0 -8 Dl +9 0 Dl +-18 0 Dl +5 Dt +3187 2060 MXY +99 -24 Dl +98 -101 Dl +99 -16 Dl +98 0 Dl +99 -78 Dl +98 77 Dl +99 -26 Dl +98 -204 Dl +99 -192 Dl +3 f +6 s +4088 1516(SMALL)N +3 Dt +3187 2051 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3286 2051 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3384 2041 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3483 1990 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3581 1843 MXY +0 17 Dl +0 -8 Dl +9 0 Dl +-18 0 Dl +3680 1578 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3778 1496 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3877 1430 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +3975 1269 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +4074 1070 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1 Dt +3187 2060 MXY +99 0 Dl +98 -10 Dl +99 -51 Dl +98 -147 Dl +99 -265 Dl +98 -82 Dl +99 -66 Dl +98 -161 Dl +99 -199 Dl +4088 1099(LARGE)N +5 Dt +3089 2060 MXY +985 0 Dl +3089 MX +0 -1174 Dl +4 Ds +1 Dt +3581 2060 MXY +0 -1174 Dl +4074 2060 MXY +0 -1174 Dl +3089 1825 MXY +985 0 Dl +9 s +2993 1855(25)N +3089 1591 MXY +985 0 Dl +2993 1621(50)N +3089 1356 MXY +985 0 Dl +2993 1386(75)N +3089 1121 MXY +985 0 Dl +2957 1151(100)N +3089 886 MXY +985 0 Dl +2957 916(125)N +3281 2199(Multiprogramming)N +3071 2152(0)N +3569(5)X +4038(10)X +2859 787(Aborts)N +3089(per)X +3211(500)X +2901 847(transactions)N +-1 Ds +3 Dt +2037 1342 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2125 1358 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2213 1341 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2301 1191 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2388 1124 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-17 0 Dl +2476 1157 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2564 1157 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2652 1161 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2740 1153 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2828 1150 MXY +0 18 Dl +0 -9 Dl +8 0 Dl +-17 0 Dl +5 Dt +2037 1351 MXY +88 16 Dl +88 -17 Dl +88 -150 Dl +87 -67 Dl +88 33 Dl +88 0 Dl +88 4 Dl +88 -8 Dl +88 -3 Dl +6 s +2685 1234(READ-ONLY)N +3 Dt +2037 1464 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2125 1640 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2213 1854 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2301 1872 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2388 1871 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-17 0 Dl +2476 1933 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2564 1914 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2652 1903 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2740 1980 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +2828 2004 MXY +0 18 Dl +0 -9 Dl +8 0 Dl +-17 0 Dl +1 Dt +2037 1473 MXY +88 176 Dl +88 214 Dl +88 18 Dl +87 -2 Dl +88 63 Dl +88 -19 Dl +88 -11 Dl +88 77 Dl +88 24 Dl +2759 1997(NO-FSYNC)N +5 Dt +1949 2060 MXY +879 0 Dl +1949 MX +0 -1174 Dl +4 Ds +1 Dt +2388 2060 MXY +0 -1174 Dl +2828 2060 MXY +0 -1174 Dl +1949 1825 MXY +879 0 Dl +9 s +1842 1855(40)N +1949 1591 MXY +879 0 Dl +1842 1621(80)N +1949 1356 MXY +879 0 Dl +1806 1386(120)N +1949 1121 MXY +879 0 Dl +1806 1151(160)N +1949 886 MXY +879 0 Dl +1806 916(200)N +2088 2199(Multiprogramming)N +1844 863(in)N +1922(TPS)X +1761 792(Throughput)N +1931 2121(0)N +2370 2133(5)N +2792(10)X +6 s +1679 1833(LIBTP)N +-1 Ds +3 Dt +837 1019 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +929 878 MXY +0 17 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1021 939 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1113 1043 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1205 1314 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1297 1567 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1389 1665 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1481 1699 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1573 1828 MXY +0 18 Dl +0 -9 Dl +9 0 Dl +-18 0 Dl +1665 1804 MXY +0 18 Dl +0 -9 Dl +8 0 Dl +-17 0 Dl +5 Dt +837 1027 MXY +92 -141 Dl +92 62 Dl +92 104 Dl +92 271 Dl +92 253 Dl +92 98 Dl +92 34 Dl +92 129 Dl +92 -24 Dl +745 2060 MXY +920 0 Dl +745 MX +0 -1174 Dl +4 Ds +1 Dt +1205 2060 MXY +0 -1174 Dl +1665 2060 MXY +0 -1174 Dl +745 1766 MXY +920 0 Dl +9 s +673 1796(3)N +745 1473 MXY +920 0 Dl +673 1503(5)N +745 1180 MXY +920 0 Dl +673 1210(8)N +745 886 MXY +920 0 Dl +637 916(10)N +905 2199(Multiprogramming)N +622 851(in)N +700(TPS)X +575 792(Throughput)N +733 2152(0)N +1196(5)X +1629(10)X +3 Dt +-1 Ds +8 s +655 2441(Figure)N +872(7:)X +960(Multi-user)X +1286(Performance.)X +1 f +655 2531(Since)N +825(the)X +931(con\256guration)X +1300(is)X +1371(completely)X +655 2621(disk)N +790(bound,)X +994(we)X +1096(see)X +1204(only)X +1345(a)X +1400(small)X +1566(im-)X +655 2711(provement)N +964(by)X +1064(adding)X +1274(a)X +1337(second)X +1549(pro-)X +655 2801(cess.)N +849(Adding)X +1081(any)X +1213(more)X +1383(concurrent)X +655 2891(processes)N +935(causes)X +1137(performance)X +1493(degra-)X +655 2981(dation.)N +3 f +1927 2441(Figure)N +2149(8:)X +2243(Multi-user)X +2574(Performance)X +1927 2531(on)N +2021(a)X +2079(small)X +2251(database.)X +1 f +2551(With)X +2704(one)X +2821(pro-)X +1927 2621(cess,)N +2075(we)X +2174(are)X +2276(driving)X +2486(the)X +2589(CPU)X +2739(at)X +2810(96%)X +1927 2711(utilization)N +2215(leaving)X +2430(little)X +2575(room)X +2737(for)X +2838(im-)X +1927 2801(provement)N +2238(as)X +2328(the)X +2443(multiprogramming)X +1927 2891(level)N +2091(increases.)X +2396(In)X +2489(the)X +2607(NO-FSYNC)X +1927 2981(case,)N +2076(lock)X +2209(contention)X +2502(degrades)X +2751(perfor-)X +1927 3071(mance)N +2117(as)X +2194(soon)X +2339(as)X +2416(a)X +2468(second)X +2669(process)X +2884(is)X +1927 3161(added.)N +3 f +3199 2441(Figure)N +3405(9:)X +3482(Abort)X +3669(rates)X +3827(on)X +3919(the)X +4028(TPCB)X +3199 2531(Benchmark.)N +1 f +3589(The)X +3726(abort)X +3895(rate)X +4028(climbs)X +3199 2621(more)N +3366(quickly)X +3594(for)X +3704(the)X +3818(large)X +3980(database)X +3199 2711(test)N +3324(since)X +3491(processes)X +3771(are)X +3884(descheduled)X +3199 2801(more)N +3409(frequently,)X +3766(allowing)X +4068(more)X +3199 2891(processes)N +3459(to)X +3525(vie)X +3619(for)X +3709(the)X +3803(same)X +3950(locks.)X +10 s +10 f +555 3284(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 f +555 3560(records,)N +835(we)X +952(expect)X +1185(contention)X +1546(to)X +1631(become)X +1904(a)X +1963(factor)X +2174(quickly)X +2437(and)X +2576(the)X +2697(NO-FSYNC)X +3120(line)X +3263(in)X +3348(\256gure)X +3557(eight)X +3739(demonstrates)X +4184(this)X +555 3650(dramatically.)N +1022(Each)X +1209(additional)X +1555(process)X +1822(causes)X +2058(both)X +2226(more)X +2417(waiting)X +2682(and)X +2823(more)X +3013(deadlocking.)X +3470(Figure)X +3704(nine)X +3867(shows)X +4092(that)X +4237(in)X +555 3740(the)N +681(small)X +882(database)X +1187(case)X +1353(\(SMALL\),)X +1725(waiting)X +1992(is)X +2072(the)X +2197(dominant)X +2526(cause)X +2732(of)X +2826(declining)X +3151(performance)X +3585(\(the)X +3737(number)X +4009(of)X +4103(aborts)X +555 3830(increases)N +878(less)X +1026(steeply)X +1281(than)X +1447(the)X +1573(performance)X +2008(drops)X +2214(off)X +2336(in)X +2426(\256gure)X +2641(eight\),)X +2876(while)X +3082(in)X +3172(the)X +3298(large)X +3487(database)X +3792(case)X +3958(\(LARGE\),)X +555 3920(deadlocking)N +967(contributes)X +1343(more)X +1528(to)X +1610(the)X +1728(declining)X +2046(performance.)X +755 4043(Deadlocks)N +1116(are)X +1237(more)X +1424(likely)X +1628(to)X +1712(occur)X +1913(in)X +1997(the)X +2116(LARGE)X +2404(test)X +2536(than)X +2695(in)X +2778(the)X +2897(SMALL)X +3189(test)X +3321(because)X +3597(there)X +3779(are)X +3899(more)X +4085(oppor-)X +555 4133(tunities)N +814(to)X +900(wait.)X +1082(In)X +1173(the)X +1295(SMALL)X +1590(case,)X +1773(processes)X +2105(never)X +2307(do)X +2410(I/O)X +2540(and)X +2679(are)X +2801(less)X +2944(likely)X +3149(to)X +3234(be)X +3333(descheduled)X +3753(during)X +3985(a)X +4044(transac-)X +555 4223(tion.)N +740(In)X +828(the)X +947(LARGE)X +1235(case,)X +1415(processes)X +1744(will)X +1889(frequently)X +2240(be)X +2337(descheduled)X +2755(since)X +2941(they)X +3100(have)X +3273(to)X +3356(perform)X +3636(I/O.)X +3804(This)X +3967(provides)X +4263(a)X +555 4313(window)N +837(where)X +1058(a)X +1118(second)X +1365(process)X +1630(can)X +1766(request)X +2022(locks)X +2215(on)X +2318(already)X +2578(locked)X +2815(pages,)X +3041(thus)X +3197(increasing)X +3550(the)X +3671(likelihood)X +4018(of)X +4108(build-)X +555 4403(ing)N +677(up)X +777(long)X +939(chains)X +1164(of)X +1251(waiting)X +1511(processes.)X +1879(Eventually,)X +2266(this)X +2401(leads)X +2586(to)X +2668(deadlock.)X +3 f +555 4589(5.2.)N +715(The)X +868(OO1)X +1052(Benchmark)X +1 f +755 4712(The)N +903(TPCB)X +1125(benchmark)X +1505(described)X +1836(in)X +1921(the)X +2042(previous)X +2341(section)X +2591(measures)X +2913(performance)X +3343(under)X +3549(a)X +3608(conventional)X +4044(transac-)X +555 4802(tion)N +706(processing)X +1076(workload.)X +1446(Other)X +1656(application)X +2039(domains,)X +2357(such)X +2531(as)X +2625(computer-aided)X +3156(design,)X +3412(have)X +3591(substantially)X +4022(different)X +555 4892(access)N +786(patterns.)X +1105(In)X +1197(order)X +1392(to)X +1479(measure)X +1772(the)X +1895(performance)X +2327(of)X +2418(LIBTP)X +2664(under)X +2871(workloads)X +3229(of)X +3320(this)X +3459(type,)X +3641(we)X +3759(implemented)X +4201(the)X +555 4982(OO1)N +731(benchmark)X +1108(described)X +1436(in)X +1518([CATT91].)X +755 5105(The)N +908(database)X +1213(models)X +1472(a)X +1535(set)X +1651(of)X +1745(electronics)X +2120(components)X +2534(with)X +2703(connections)X +3113(among)X +3358(them.)X +3585(One)X +3746(table)X +3929(stores)X +4143(parts)X +555 5195(and)N +696(another)X +962(stores)X +1174(connections.)X +1622(There)X +1835(are)X +1959(three)X +2145(connections)X +2552(originating)X +2927(at)X +3009(any)X +3149(given)X +3351(part.)X +3540(Ninety)X +3782(percent)X +4043(of)X +4134(these)X +555 5285(connections)N +960(are)X +1081(to)X +1165(nearby)X +1406(parts)X +1584(\(those)X +1802(with)X +1966(nearby)X +2 f +2207(ids)X +1 f +2300(\))X +2348(to)X +2431(model)X +2652(the)X +2771(spatial)X +3001(locality)X +3262(often)X +3448(exhibited)X +3767(in)X +3850(CAD)X +4040(applica-)X +555 5375(tions.)N +779(Ten)X +933(percent)X +1198(of)X +1293(the)X +1419(connections)X +1830(are)X +1957(randomly)X +2292(distributed)X +2662(among)X +2908(all)X +3016(other)X +3209(parts)X +3393(in)X +3483(the)X +3609(database.)X +3954(Every)X +4174(part)X +555 5465(appears)N +829(exactly)X +1089(three)X +1278(times)X +1479(in)X +1569(the)X +2 f +1695(from)X +1 f +1874(\256eld)X +2043(of)X +2137(a)X +2200(connection)X +2579(record,)X +2832(and)X +2975(zero)X +3141(or)X +3235(more)X +3427(times)X +3627(in)X +3716(the)X +2 f +3841(to)X +1 f +3930(\256eld.)X +4139(Parts)X +555 5555(have)N +2 f +727(x)X +1 f +783(and)X +2 f +919(y)X +1 f +975(locations)X +1284(set)X +1393(randomly)X +1720(in)X +1802(an)X +1898(appropriate)X +2284(range.)X + +14 p +%%Page: 14 14 +10 s 10 xH 0 xS 1 f +3 f +1 f +755 630(The)N +900(intent)X +1102(of)X +1189(OO1)X +1365(is)X +1438(to)X +1520(measure)X +1808(the)X +1926(overall)X +2169(cost)X +2318(of)X +2405(a)X +2461(query)X +2664(mix)X +2808(characteristic)X +3257(of)X +3344(engineering)X +3743(database)X +4040(applica-)X +555 720(tions.)N +770(There)X +978(are)X +1097(three)X +1278(tests:)X +10 f +635 843(g)N +2 f +755(Lookup)X +1 f +1022(generates)X +1353(1,000)X +1560(random)X +1832(part)X +2 f +1984(ids)X +1 f +2077(,)X +2124(fetches)X +2378(the)X +2502(corresponding)X +2987(parts)X +3169(from)X +3351(the)X +3475(database,)X +3798(and)X +3940(calls)X +4113(a)X +4175(null)X +755 933(procedure)N +1097(in)X +1179(the)X +1297(host)X +1450(programming)X +1906(language)X +2216(with)X +2378(the)X +2496(parts')X +2 f +2699(x)X +1 f +2755(and)X +2 f +2891(y)X +1 f +2947(positions.)X +10 f +635 1056(g)N +2 f +755(Traverse)X +1 f +1067(retrieves)X +1371(a)X +1434(random)X +1706(part)X +1858(from)X +2041(the)X +2166(database)X +2470(and)X +2613(follows)X +2880(connections)X +3290(from)X +3473(it)X +3544(to)X +3632(other)X +3823(parts.)X +4045(Each)X +4232(of)X +755 1146(those)N +947(parts)X +1126(is)X +1202(retrieved,)X +1531(and)X +1670(all)X +1773(connections)X +2179(from)X +2358(it)X +2424(followed.)X +2771(This)X +2935(procedure)X +3279(is)X +3354(repeated)X +3649(depth-\256rst)X +4000(for)X +4116(seven)X +755 1236(hops)N +930(from)X +1110(the)X +1232(original)X +1505(part,)X +1674(for)X +1792(a)X +1852(total)X +2018(of)X +2109(3280)X +2293(parts.)X +2513(Backward)X +2862(traversal)X +3162(also)X +3314(exists,)X +3539(and)X +3678(follows)X +3941(all)X +4044(connec-)X +755 1326(tions)N +930(into)X +1074(a)X +1130(given)X +1328(part)X +1473(to)X +1555(their)X +1722(origin.)X +10 f +635 1449(g)N +2 f +755(Insert)X +1 f +962(adds)X +1129(100)X +1269(new)X +1423(parts)X +1599(and)X +1735(their)X +1902(connections.)X +755 1572(The)N +913(benchmark)X +1303(is)X +1389(single-user,)X +1794(but)X +1929(multi-user)X +2291(access)X +2530(controls)X +2821(\(locking)X +3120(and)X +3268(transaction)X +3652(protection\))X +4036(must)X +4223(be)X +555 1662(enforced.)N +898(It)X +968(is)X +1042(designed)X +1348(to)X +1431(be)X +1528(run)X +1656(on)X +1757(a)X +1814(database)X +2112(with)X +2275(20,000)X +2516(parts,)X +2713(and)X +2850(on)X +2951(one)X +3087(with)X +3249(200,000)X +3529(parts.)X +3745(Because)X +4033(we)X +4147(have)X +555 1752(insuf\256cient)N +935(disk)X +1088(space)X +1287(for)X +1401(the)X +1519(larger)X +1727(database,)X +2044(we)X +2158(report)X +2370(results)X +2599(only)X +2761(for)X +2875(the)X +2993(20,000)X +3233(part)X +3378(database.)X +3 f +555 1938(5.2.1.)N +775(Implementation)X +1 f +755 2061(The)N +920(LIBTP)X +1182(implementation)X +1724(of)X +1831(OO1)X +2027(uses)X +2205(the)X +2342(TCL)X +2532([OUST90])X +2914(interface)X +3235(described)X +3582(earlier.)X +3867(The)X +4031(backend)X +555 2151(accepts)N +813(commands)X +1181(over)X +1345(an)X +1442(IP)X +1534(socket)X +1760(and)X +1897(performs)X +2208(the)X +2327(requested)X +2656(database)X +2954(actions.)X +3242(The)X +3387(frontend)X +3679(opens)X +3886(and)X +4022(executes)X +555 2241(a)N +618(TCL)X +796(script.)X +1041(This)X +1210(script)X +1415(contains)X +1709(database)X +2013(accesses)X +2313(interleaved)X +2697(with)X +2866(ordinary)X +3165(program)X +3463(control)X +3716(statements.)X +4120(Data-)X +555 2331(base)N +718(commands)X +1085(are)X +1204(submitted)X +1539(to)X +1621(the)X +1739(backend)X +2027(and)X +2163(results)X +2392(are)X +2511(bound)X +2731(to)X +2813(program)X +3105(variables.)X +755 2454(The)N +903(parts)X +1082(table)X +1261(was)X +1409(stored)X +1628(as)X +1718(a)X +1776(B-tree)X +1999(indexed)X +2275(by)X +2 f +2377(id)X +1 f +2439(.)X +2501(The)X +2648(connection)X +3022(table)X +3200(was)X +3347(stored)X +3565(as)X +3654(a)X +3712(set)X +3823(of)X +3912(\256xed-length)X +555 2544(records)N +824(using)X +1029(the)X +1159(4.4BSD)X +1446(recno)X +1657(access)X +1895(method.)X +2207(In)X +2306(addition,)X +2620(two)X +2771(B-tree)X +3003(indices)X +3261(were)X +3449(maintained)X +3836(on)X +3947(connection)X +555 2634(table)N +732(entries.)X +1007(One)X +1162(index)X +1360(mapped)X +1634(the)X +2 f +1752(from)X +1 f +1923(\256eld)X +2085(to)X +2167(a)X +2223(connection)X +2595(record)X +2821(number,)X +3106(and)X +3242(the)X +3360(other)X +3545(mapped)X +3819(the)X +2 f +3937(to)X +1 f +4019(\256eld)X +4181(to)X +4263(a)X +555 2724(connection)N +932(record)X +1163(number.)X +1473(These)X +1690(indices)X +1941(support)X +2205(fast)X +2345(lookups)X +2622(on)X +2726(connections)X +3133(in)X +3219(both)X +3385(directions.)X +3765(For)X +3900(the)X +4022(traversal)X +555 2814(tests,)N +743(the)X +867(frontend)X +1165(does)X +1338(an)X +1439(index)X +1642(lookup)X +1889(to)X +1976(discover)X +2273(the)X +2396(connected)X +2747(part's)X +2 f +2955(id)X +1 f +3017(,)X +3062(and)X +3203(then)X +3366(does)X +3538(another)X +3804(lookup)X +4051(to)X +4138(fetch)X +555 2904(the)N +673(part)X +818(itself.)X +3 f +555 3090(5.2.2.)N +775(Performance)X +1242(Measurements)X +1766(for)X +1889(OO1)X +1 f +755 3213(We)N +888(compare)X +1186(LIBTP's)X +1487(OO1)X +1664(performance)X +2092(to)X +2174(that)X +2314(reported)X +2602(in)X +2684([CATT91].)X +3087(Those)X +3303(results)X +3532(were)X +3709(collected)X +4019(on)X +4119(a)X +4175(Sun)X +555 3303(3/280)N +759(\(25)X +888(MHz)X +1075(MC68020\))X +1448(with)X +1612(16)X +1714(MBytes)X +1989(of)X +2078(memory)X +2367(and)X +2505(two)X +2647(Hitachi)X +2904(892MByte)X +3267(disks)X +3452(\(15)X +3580(ms)X +3694(average)X +3966(seek)X +4130(time\))X +555 3393(behind)N +793(an)X +889(SMD-4)X +1149(controller.)X +1521(Frontends)X +1861(ran)X +1984(on)X +2084(an)X +2180(8MByte)X +2462(Sun)X +2606(3/260.)X +755 3516(In)N +844(order)X +1036(to)X +1120(measure)X +1410(performance)X +1839(on)X +1941(a)X +1999(machine)X +2293(of)X +2382(roughly)X +2653(equivalent)X +3009(processor)X +3339(power,)X +3582(we)X +3698(ran)X +3822(one)X +3959(set)X +4069(of)X +4157(tests)X +555 3606(on)N +666(a)X +733(standalone)X +1107(MC68030-based)X +1671(HP300)X +1923(\(33MHz)X +2225(MC68030\).)X +2646(The)X +2801(database)X +3108(was)X +3263(stored)X +3489(on)X +3599(a)X +3665(300MByte)X +4037(HP7959)X +555 3696(SCSI)N +744(disk)X +898(\(17)X +1026(ms)X +1139(average)X +1410(seek)X +1573(time\).)X +1802(Since)X +2000(this)X +2135(machine)X +2427(is)X +2500(not)X +2622(connected)X +2968(to)X +3050(a)X +3106(network,)X +3409(we)X +3523(ran)X +3646(local)X +3822(tests)X +3984(where)X +4201(the)X +555 3786(frontend)N +855(and)X +999(backend)X +1295(run)X +1430(on)X +1538(the)X +1664(same)X +1856(machine.)X +2195(We)X +2334(compare)X +2638(these)X +2830(measurements)X +3316(with)X +3485(Cattell's)X +3783(local)X +3966(Sun)X +4117(3/280)X +555 3876(numbers.)N +755 3999(Because)N +1051(the)X +1177(benchmark)X +1562(requires)X +1849(remote)X +2100(access,)X +2354(we)X +2476(ran)X +2607(another)X +2876(set)X +2993(of)X +3088(tests)X +3258(on)X +3365(a)X +3428(DECstation)X +3828(5000/200)X +4157(with)X +555 4089(32M)N +732(of)X +825(memory)X +1118(running)X +1393(Ultrix)X +1610(V4.0)X +1794(and)X +1936(a)X +1998(DEC)X +2184(1GByte)X +2459(RZ57)X +2666(SCSI)X +2859(disk.)X +3057(We)X +3194(compare)X +3496(the)X +3619(local)X +3800(performance)X +4232(of)X +555 4179(OO1)N +734(on)X +837(the)X +958(DECstation)X +1354(to)X +1439(its)X +1536(remote)X +1781(performance.)X +2250(For)X +2383(the)X +2503(remote)X +2748(case,)X +2929(we)X +3045(ran)X +3170(the)X +3290(frontend)X +3584(on)X +3686(a)X +3744(DECstation)X +4139(3100)X +555 4269(with)N +717(16)X +817(MBytes)X +1090(of)X +1177(main)X +1357(memory.)X +755 4392(The)N +900(databases)X +1228(tested)X +1435(in)X +1517([CATT91])X +1880(are)X +10 f +635 4515(g)N +1 f +755(INDEX,)X +1045(a)X +1101(highly-optimized)X +1672(access)X +1898(method)X +2158(package)X +2442(developed)X +2792(at)X +2870(Sun)X +3014(Microsystems.)X +10 f +635 4638(g)N +1 f +755(OODBMS,)X +1137(a)X +1193(beta)X +1347(release)X +1591(of)X +1678(a)X +1734(commercial)X +2133(object-oriented)X +2639(database)X +2936(management)X +3366(system.)X +10 f +635 4761(g)N +1 f +755(RDBMS,)X +1076(a)X +1133(UNIX-based)X +1565(commercial)X +1965(relational)X +2289(data)X +2444(manager)X +2742(at)X +2821(production)X +3189(release.)X +3474(The)X +3620(OO1)X +3797(implementation)X +755 4851(used)N +922(embedded)X +1272(SQL)X +1443(in)X +1525(C.)X +1638(Stored)X +1867(procedures)X +2240(were)X +2417(de\256ned)X +2673(to)X +2755(reduce)X +2990(client-server)X +3412(traf\256c.)X +755 4974(Table)N +974(two)X +1130(shows)X +1366(the)X +1500(measurements)X +1995(from)X +2187([CATT91])X +2566(and)X +2718(LIBTP)X +2976(for)X +3106(a)X +3178(local)X +3370(test)X +3517(on)X +3632(the)X +3765(MC680x0-based)X +555 5064(hardware.)N +915(All)X +1037(caches)X +1272(are)X +1391(cleared)X +1644(before)X +1870(each)X +2038(test.)X +2209(All)X +2331(times)X +2524(are)X +2643(in)X +2725(seconds.)X +755 5187(Table)N +960(two)X +1102(shows)X +1324(that)X +1466(LIBTP)X +1710(outperforms)X +2123(the)X +2242(commercial)X +2642(relational)X +2966(system,)X +3229(but)X +3352(is)X +3426(slower)X +3661(than)X +3820(OODBMS)X +4183(and)X +555 5277(INDEX.)N +872(Since)X +1077(the)X +1202(caches)X +1444(were)X +1628(cleared)X +1888(at)X +1973(the)X +2098(start)X +2263(of)X +2356(each)X +2530(test,)X +2687(disk)X +2846(throughput)X +3223(is)X +3302(critical)X +3551(in)X +3639(this)X +3780(test.)X +3957(The)X +4108(single)X +555 5367(SCSI)N +749(HP)X +877(drive)X +1068(used)X +1241(by)X +1347(LIBTP)X +1595(is)X +1674(approximately)X +2163(13%)X +2336(slower)X +2576(than)X +2739(the)X +2862(disks)X +3051(used)X +3223(in)X +3310([CATT91])X +3678(which)X +3899(accounts)X +4205(for)X +555 5457(part)N +700(of)X +787(the)X +905(difference.)X +755 5580(OODBMS)N +1118(and)X +1255(INDEX)X +1525(outperform)X +1906(LIBTP)X +2148(most)X +2323(dramatically)X +2744(on)X +2844(traversal.)X +3181(This)X +3343(is)X +3416(because)X +3691(we)X +3805(use)X +3932(index)X +4130(look-)X +555 5670(ups)N +689(to)X +774(\256nd)X +921(connections,)X +1347(whereas)X +1634(the)X +1755(other)X +1942(two)X +2084(systems)X +2359(use)X +2488(a)X +2546(link)X +2692(access)X +2920(method.)X +3222(The)X +3369(index)X +3569(requires)X +3850(us)X +3943(to)X +4027(examine)X + +15 p +%%Page: 15 15 +10 s 10 xH 0 xS 1 f +3 f +1 f +10 f +555 679(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +2 f +606 769(Measure)N +1 f +1019(INDEX)X +1389(OODBMS)X +1851(RDBMS)X +2250(LIBTP)X +10 f +555 771(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +555 787(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +1 f +595 869(Lookup)N +1114(5.4)X +1490(12.9)X +1950(27)X +2291(27.2)X +595 959(Traversal)N +1074(13)X +1530(9.8)X +1950(90)X +2291(47.3)X +595 1049(Insert)N +1114(7.4)X +1530(1.5)X +1950(22)X +2331(9.7)X +10 f +555 1059(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)N +555(c)X +999(c)Y +919(c)Y +839(c)Y +759(c)Y +959 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +1329 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +1791 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +2190 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +2512 1059(c)N +999(c)Y +919(c)Y +839(c)Y +759(c)Y +2618 679(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2 f +2829 769(Measure)N +3401(Cache)X +3726(Local)X +4028(Remote)X +1 f +10 f +2618 771(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2618 787(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 869(Lookup)N +3401(cold)X +3747(15.7)X +4078(20.6)X +3401 959(warm)N +3787(7.8)X +4078(12.4)X +10 f +2618 969(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 1059(Forward)N +2950(traversal)X +3401(cold)X +3747(28.4)X +4078(52.6)X +3401 1149(warm)N +3747(23.5)X +4078(47.4)X +10 f +2618 1159(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 1249(Backward)N +3004(traversal)X +3401(cold)X +3747(24.2)X +4078(47.4)X +3401 1339(warm)N +3747(24.3)X +4078(47.6)X +10 f +2618 1349(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +1 f +2658 1439(Insert)N +3401(cold)X +3787(7.5)X +4078(10.3)X +3401 1529(warm)N +3787(6.7)X +4078(10.9)X +10 f +2618 1539(i)N +2629(iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii)X +2618(c)X +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3341 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3666 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3968 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +4309 1539(c)N +1479(c)Y +1399(c)Y +1319(c)Y +1239(c)Y +1159(c)Y +1079(c)Y +999(c)Y +919(c)Y +839(c)Y +759(c)Y +3 f +587 1785(Table)N +823(2:)X +931(Local)X +1163(MC680x0)X +1538(Performance)X +2026(of)X +2133(Several)X +587 1875(Systems)N +883(on)X +987(OO1.)X +2667 1785(Table)N +2909(3:)X +3023(Local)X +3260(vs.)X +3397(Remote)X +3707(Performance)X +4200(of)X +2667 1875(LIBTP)N +2926(on)X +3030(OO1.)X +1 f +10 f +555 1998(h)N +579(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)X +1 f +555 2274(two)N +696(disk)X +850(pages,)X +1074(but)X +1197(the)X +1316(links)X +1492(require)X +1741(only)X +1904(one,)X +2061(regardless)X +2408(of)X +2496(database)X +2794(size.)X +2980(Cattell)X +3214(reports)X +3458(that)X +3599(lookups)X +3873(using)X +4067(B-trees)X +555 2364(instead)N +808(of)X +901(links)X +1082(makes)X +1313(traversal)X +1616(take)X +1776(twice)X +1976(as)X +2069(long)X +2237(in)X +2325(INDEX.)X +2641(Adding)X +2907(a)X +2969(link)X +3119(access)X +3351(method)X +3617(to)X +3 f +3704(db)X +1 f +3792(\(3\))X +3911(or)X +4003(using)X +4201(the)X +555 2454(existing)N +828(hash)X +995(method)X +1255(would)X +1475(apparently)X +1834(be)X +1930(a)X +1986(good)X +2166(idea.)X +755 2577(Both)N +936(OODBMS)X +1304(and)X +1446(INDEX)X +1722(issue)X +1908 0.1944(coarser-granularity)AX +2545(locks)X +2739(than)X +2902(LIBTP.)X +3189(This)X +3356(limits)X +3562(concurrency)X +3985(for)X +4104(multi-)X +555 2667(user)N +711(applications,)X +1140(but)X +1264(helps)X +1455(single-user)X +1829(applications.)X +2278(In)X +2367(addition,)X +2671(the)X +2791(fact)X +2934(that)X +3076(LIBTP)X +3319(releases)X +3595(B-tree)X +3817(locks)X +4007(early)X +4189(is)X +4263(a)X +555 2757(drawback)N +896(in)X +986(OO1.)X +1210(Since)X +1416(there)X +1605(is)X +1686(no)X +1793(concurrency)X +2218(in)X +2307(the)X +2432(benchmark,)X +2836(high-concurrency)X +3430(strategies)X +3760(only)X +3929(show)X +4125(up)X +4232(as)X +555 2847(increased)N +882(locking)X +1145(overhead.)X +1503(Finally,)X +1772(the)X +1892(architecture)X +2294(of)X +2383(the)X +2503(LIBTP)X +2747(implementation)X +3271(was)X +3418(substantially)X +3844(different)X +4143(from)X +555 2937(that)N +702(of)X +796(either)X +1006(OODBMS)X +1375(or)X +1469(INDEX.)X +1786(Both)X +1968(of)X +2062(those)X +2258(systems)X +2538(do)X +2645(the)X +2770(searches)X +3070(in)X +3159(the)X +3284(user's)X +3503(address)X +3771(space,)X +3997(and)X +4139(issue)X +555 3027(requests)N +844(for)X +964(pages)X +1173(to)X +1260(the)X +1383(server)X +1605(process.)X +1911(Pages)X +2123(are)X +2247(cached)X +2496(in)X +2583(the)X +2706(client,)X +2929(and)X +3070(many)X +3273(queries)X +3530(can)X +3667(be)X +3768(satis\256ed)X +4055(without)X +555 3117(contacting)N +910(the)X +1029(server)X +1247(at)X +1326(all.)X +1467(LIBTP)X +1710(submits)X +1979(all)X +2080(the)X +2199(queries)X +2452(to)X +2535(the)X +2653(server)X +2870(process,)X +3151(and)X +3287(receives)X +3571(database)X +3868(records)X +4125(back;)X +555 3207(it)N +619(does)X +786(no)X +886(client)X +1084(caching.)X +755 3330(The)N +911(RDBMS)X +1221(architecture)X +1632(is)X +1716(much)X +1925(closer)X +2148(to)X +2241(that)X +2392(of)X +2490(LIBTP.)X +2783(A)X +2872(server)X +3100(process)X +3372(receives)X +3667(queries)X +3930(and)X +4076(returns)X +555 3420(results)N +786(to)X +870(a)X +928(client.)X +1168(The)X +1315(timing)X +1545(results)X +1776(in)X +1860(table)X +2038(two)X +2180(clearly)X +2421(show)X +2612(that)X +2754(the)X +2874(conventional)X +3309(database)X +3607(client/server)X +4025(model)X +4246(is)X +555 3510(expensive.)N +941(LIBTP)X +1188(outperforms)X +1605(the)X +1728(RDBMS)X +2032(on)X +2136(traversal)X +2437(and)X +2577(insertion.)X +2921(We)X +3057(speculate)X +3380(that)X +3524(this)X +3663(is)X +3740(due)X +3880(in)X +3966(part)X +4115(to)X +4201(the)X +555 3600(overhead)N +870(of)X +957(query)X +1160(parsing,)X +1436(optimization,)X +1880(and)X +2016(repeated)X +2309(interpretation)X +2761(of)X +2848(the)X +2966(plan)X +3124(tree)X +3265(in)X +3347(the)X +3465(RDBMS')X +3791(query)X +3994(executor.)X +755 3723(Table)N +962(three)X +1147(shows)X +1371(the)X +1492(differences)X +1873(between)X +2164(local)X +2343(and)X +2482(remote)X +2728(execution)X +3063(of)X +3153(LIBTP's)X +3456(OO1)X +3635(implementation)X +4160(on)X +4263(a)X +555 3813(DECstation.)N +989(We)X +1122(measured)X +1451(performance)X +1879(with)X +2042(a)X +2099(populated)X +2436(\(warm\))X +2694(cache)X +2899(and)X +3036(an)X +3133(empty)X +3354(\(cold\))X +3567(cache.)X +3812(Reported)X +4126(times)X +555 3903(are)N +681(the)X +806(means)X +1037(of)X +1130(twenty)X +1374(tests,)X +1562(and)X +1704(are)X +1829(in)X +1917(seconds.)X +2237(Standard)X +2548(deviations)X +2903(were)X +3086(within)X +3316(seven)X +3525(percent)X +3788(of)X +3881(the)X +4005(mean)X +4205(for)X +555 3993(remote,)N +818(and)X +954(two)X +1094(percent)X +1351(of)X +1438(the)X +1556(mean)X +1750(for)X +1864(local.)X +755 4116(The)N +914(20ms)X +1121(overhead)X +1450(of)X +1551(TCP/IP)X +1824(on)X +1938(an)X +2048(Ethernet)X +2354(entirely)X +2633(accounts)X +2948(for)X +3076(the)X +3207(difference)X +3567(in)X +3662(speed.)X +3918(The)X +4076(remote)X +555 4206(traversal)N +857(times)X +1055(are)X +1179(nearly)X +1405(double)X +1648(the)X +1771(local)X +1952(times)X +2150(because)X +2430(we)X +2549(do)X +2653(index)X +2855(lookups)X +3132(and)X +3272(part)X +3421(fetches)X +3673(in)X +3759(separate)X +4047(queries.)X +555 4296(It)N +629(would)X +854(make)X +1053(sense)X +1252(to)X +1339(do)X +1444(indexed)X +1723(searches)X +2021(on)X +2126(the)X +2248(server,)X +2489(but)X +2615(we)X +2733(were)X +2914(unwilling)X +3244(to)X +3330(hard-code)X +3676(knowledge)X +4052(of)X +4143(OO1)X +555 4386(indices)N +803(into)X +948(our)X +1075(LIBTP)X +1317(TCL)X +1488(server.)X +1745(Cold)X +1920(and)X +2056(warm)X +2259(insertion)X +2559(times)X +2752(are)X +2871(identical)X +3167(since)X +3352(insertions)X +3683(do)X +3783(not)X +3905(bene\256t)X +4143(from)X +555 4476(caching.)N +755 4599(One)N +915(interesting)X +1279(difference)X +1632(shown)X +1867(by)X +1973(table)X +2155(three)X +2342(is)X +2421(the)X +2545(cost)X +2700(of)X +2793(forward)X +3074(versus)X +3305(backward)X +3644(traversal.)X +3987(When)X +4205(we)X +555 4689(built)N +725(the)X +847(database,)X +1168(we)X +1285(inserted)X +1562(parts)X +1741(in)X +1826(part)X +2 f +1974(id)X +1 f +2059(order.)X +2292(We)X +2427(built)X +2596(the)X +2717(indices)X +2967(at)X +3048(the)X +3169(same)X +3357(time.)X +3562(Therefore,)X +3923(the)X +4044(forward)X +555 4779(index)N +757(had)X +897(keys)X +1068(inserted)X +1346(in)X +1432(order,)X +1646(while)X +1848(the)X +1970(backward)X +2307(index)X +2509(had)X +2649(keys)X +2820(inserted)X +3098(more)X +3286(randomly.)X +3656(In-order)X +3943(insertion)X +4246(is)X +555 4885(pessimal)N +858(for)X +975(B-tree)X +1199(indices,)X +1469(so)X +1563(the)X +1684(forward)X +1962(index)X +2163(is)X +2239(much)X +2440(larger)X +2651(than)X +2812(the)X +2933(backward)X +3269(one)X +7 s +3385 4853(5)N +10 s +4885(.)Y +3476(This)X +3640(larger)X +3850(size)X +3997(shows)X +4219(up)X +555 4975(as)N +642(extra)X +823(disk)X +976(reads)X +1166(in)X +1248(the)X +1366(cold)X +1524(benchmark.)X +3 f +555 5161(6.)N +655(Conclusions)X +1 f +755 5284(LIBTP)N +1006(provides)X +1311(the)X +1438(basic)X +1632(building)X +1927(blocks)X +2165(to)X +2256(support)X +2525(transaction)X +2906(protection.)X +3300(In)X +3396(comparison)X +3799(with)X +3970(traditional)X +555 5374(Unix)N +746(libraries)X +1040(and)X +1187(commercial)X +1597(systems,)X +1900(it)X +1974(offers)X +2192(a)X +2258(variety)X +2511(of)X +2608(tradeoffs.)X +2964(Using)X +3185(complete)X +3509(transaction)X +3891(protection)X +4246(is)X +555 5464(more)N +747(complicated)X +1166(than)X +1331(simply)X +1575(adding)X +3 f +1820(fsync)X +1 f +1998(\(2\))X +2119(and)X +3 f +2262(\257ock)X +1 f +2426(\(2\))X +2547(calls)X +2721(to)X +2810(code,)X +3008(but)X +3136(it)X +3206(is)X +3285(faster)X +3490(in)X +3578(some)X +3773(cases)X +3969(and)X +4111(offers)X +8 s +10 f +555 5536(hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh)N +5 s +1 f +727 5614(5)N +8 s +763 5639(The)N +878(next)X +1004(release)X +1196(of)X +1265(the)X +1359(4.4BSD)X +1580(access)X +1758(method)X +1966(will)X +2082(automatically)X +2446(detect)X +2614(and)X +2722(compensate)X +3039(for)X +3129(in-order)X +3350(insertion,)X +3606(eliminating)X +3914(this)X +4023(problem.)X + +16 p +%%Page: 16 16 +8 s 8 xH 0 xS 1 f +10 s +3 f +1 f +555 630(stricter)N +801(guarantees)X +1168(\(atomicity,)X +1540(consistency,)X +1957(isolation,)X +2275(and)X +2414(durability\).)X +2815(If)X +2892(the)X +3013(data)X +3170(to)X +3255(be)X +3354(protected)X +3676(are)X +3798(already)X +4058(format-)X +555 720(ted)N +675(\()X +2 f +702(i.e.)X +1 f +821(use)X +949(one)X +1086(of)X +1174(the)X +1293(database)X +1591(access)X +1818(methods\),)X +2157(then)X +2316(adding)X +2555(transaction)X +2928(protection)X +3274(requires)X +3554(no)X +3655(additional)X +3996(complex-)X +555 810(ity,)N +679(but)X +801(incurs)X +1017(a)X +1073(performance)X +1500(penalty)X +1756(of)X +1843(approximately)X +2326(15%.)X +755 933(In)N +844(comparison)X +1240(with)X +1404(commercial)X +1805(database)X +2104(systems,)X +2399(the)X +2519(tradeoffs)X +2827(are)X +2948(more)X +3135(complex.)X +3473(LIBTP)X +3717(does)X +3886(not)X +4009(currently)X +555 1023(support)N +825(a)X +891(standard)X +1193(query)X +1406(language.)X +1766(The)X +1921(TCL-based)X +2312(server)X +2539(process)X +2810(allows)X +3049(a)X +3115(certain)X +3364(ease)X +3533(of)X +3630(use)X +3767(which)X +3993(would)X +4223(be)X +555 1113(enhanced)N +882(with)X +1047(a)X +1106(more)X +1294(user-friendly)X +1732(interface)X +2037(\()X +2 f +2064(e.g.)X +1 f +2203(a)X +2261(windows)X +2572(based)X +2777(query-by-form)X +3272(application\),)X +3697(for)X +3813(which)X +4031(we)X +4147(have)X +555 1203(a)N +620(working)X +916(prototype.)X +1292(When)X +1513(accesses)X +1815(do)X +1924(not)X +2055(require)X +2312(sophisticated)X +2758(query)X +2969(processing,)X +3360(the)X +3486(TCL)X +3665(interface)X +3975(is)X +4056(an)X +4160(ade-)X +555 1293(quate)N +756(solution.)X +1080(What)X +1281(LIBTP)X +1529(fails)X +1693(to)X +1781(provide)X +2052(in)X +2140(functionality,)X +2595(it)X +2665(makes)X +2896(up)X +3002(for)X +3122(in)X +3210(performance)X +3643(and)X +3785(\257exibility.)X +4161(Any)X +555 1383(application)N +931(may)X +1089(make)X +1283(use)X +1410(of)X +1497(its)X +1592(record)X +1818(interface)X +2120(or)X +2207(the)X +2325(more)X +2510(primitive)X +2823(log,)X +2965(lock,)X +3143(and)X +3279(buffer)X +3496(calls.)X +755 1506(Future)N +987(work)X +1175(will)X +1322(focus)X +1519(on)X +1621(overcoming)X +2026(some)X +2217(of)X +2306(the)X +2426(areas)X +2614(in)X +2698(which)X +2916(LIBTP)X +3160(is)X +3235(currently)X +3547(de\256cient)X +3845(and)X +3983(extending)X +555 1596(its)N +652(transaction)X +1026(model.)X +1288(The)X +1435(addition)X +1719(of)X +1808(an)X +1905(SQL)X +2077(parser)X +2295(and)X +2432(forms)X +2640(front)X +2817(end)X +2954(will)X +3099(improve)X +3387(the)X +3506(system's)X +3807(ease)X +3967(of)X +4055(use)X +4183(and)X +555 1686(make)N +750(it)X +815(more)X +1001(competitive)X +1400(with)X +1563(commercial)X +1963(systems.)X +2277(In)X +2365(the)X +2484(long)X +2647(term,)X +2835(we)X +2950(would)X +3170(like)X +3310(to)X +3392(add)X +3528(generalized)X +3919(hierarchical)X +555 1776(locking,)N +836(nested)X +1062(transactions,)X +1486(parallel)X +1748(transactions,)X +2171(passing)X +2431(of)X +2518(transactions)X +2921(between)X +3209(processes,)X +3557(and)X +3693(distributed)X +4055(commit)X +555 1866(handling.)N +900(In)X +992(the)X +1115(short)X +1300(term,)X +1492(the)X +1614(next)X +1776(step)X +1929(is)X +2006(to)X +2092(integrate)X +2397(LIBTP)X +2643(with)X +2809(the)X +2931(most)X +3110(recent)X +3331(release)X +3579(of)X +3670(the)X +3792(database)X +4093(access)X +555 1956(routines)N +833(and)X +969(make)X +1163(it)X +1227(freely)X +1435(available)X +1745(via)X +1863(anonymous)X +2252(ftp.)X +3 f +555 2142(7.)N +655(Acknowledgements)X +1 f +755 2265(We)N +888(would)X +1109(like)X +1250(to)X +1332(thank)X +1530(John)X +1701(Wilkes)X +1948(and)X +2084(Carl)X +2242(Staelin)X +2484(of)X +2571(Hewlett-Packard)X +3131(Laboratories)X +3557(and)X +3693(Jon)X +3824(Krueger.)X +4148(John)X +555 2355(and)N +694(Carl)X +855(provided)X +1162(us)X +1255(with)X +1419(an)X +1517(extra)X +1700(disk)X +1855(for)X +1971(the)X +2091(HP)X +2215(testbed)X +2464(less)X +2606(than)X +2766(24)X +2868(hours)X +3068(after)X +3238(we)X +3354(requested)X +3684(it.)X +3770(Jon)X +3903(spent)X +4094(count-)X +555 2445(less)N +699(hours)X +901(helping)X +1164(us)X +1258(understand)X +1633(the)X +1754(intricacies)X +2107(of)X +2197(commercial)X +2599(database)X +2899(products)X +3198(and)X +3337(their)X +3507(behavior)X +3811(under)X +4017(a)X +4076(variety)X +555 2535(of)N +642(system)X +884(con\256gurations.)X +3 f +555 2721(8.)N +655(References)X +1 f +555 2901([ANDR89])N +942(Andrade,)X +1265(J.,)X +1361(Carges,)X +1629(M.,)X +1765(Kovach,)X +2060(K.,)X +2183(``Building)X +2541(an)X +2642(On-Line)X +2939(Transaction)X +3343(Processing)X +3715(System)X +3975(On)X +4098(UNIX)X +727 2991(System)N +982(V'',)X +2 f +1134(CommUNIXations)X +1 f +1725(,)X +1765 0.2188(November/December)AX +2477(1989.)X +555 3171([BAY77])N +878(Bayer,)X +1110(R.,)X +1223(Schkolnick,)X +1623(M.,)X +1754(``Concurrency)X +2243(of)X +2330(Operations)X +2702(on)X +2802(B-Trees'',)X +2 f +3155(Acta)X +3322(Informatica)X +1 f +3700(,)X +3740(1977.)X +555 3351([BERN80])N +936(Bernstein,)X +1297(P.,)X +1415(Goodman,)X +1785(N.,)X +1917(``Timestamp)X +2365(Based)X +2595(Algorithms)X +2992(for)X +3119(Concurrency)X +3567(Control)X +3844(in)X +3939(Distributed)X +727 3441(Database)N +1042(Systems'',)X +2 f +1402(Proceedings)X +1823(6th)X +1945(International)X +2387(Conference)X +2777(on)X +2877(Very)X +3049(Large)X +3260(Data)X +3440(Bases)X +1 f +3627(,)X +3667(October)X +3946(1980.)X +555 3621([BSD91])N +864(DB\(3\),)X +2 f +1109(4.4BSD)X +1376(Unix)X +1552(Programmer's)X +2044(Manual)X +2313(Reference)X +2655(Guide)X +1 f +2851(,)X +2891(University)X +3249(of)X +3336(California,)X +3701(Berkeley,)X +4031(1991.)X +555 3801([CATT91])N +923(Cattell,)X +1181(R.G.G.,)X +1455(``An)X +1632(Engineering)X +2049(Database)X +2369(Benchmark'',)X +2 f +2838(The)X +2983(Benchmark)X +3373(Handbook)X +3731(for)X +3848(Database)X +4179(and)X +727 3891(Transaction)N +1133(Processing)X +1509(Systems)X +1 f +1763(,)X +1803(J.)X +1874(Gray,)X +2075(editor,)X +2302(Morgan)X +2576(Kaufman)X +2895(1991.)X +555 4071([CHEN91])N +929(Cheng,)X +1180(E.,)X +1291(Chang,)X +1542(E.,)X +1653(Klein,)X +1872(J.,)X +1964(Lee,)X +2126(D.,)X +2245(Lu,)X +2375(E.,)X +2485(Lutgardo,)X +2820(A.,)X +2939(Obermarck,)X +3342(R.,)X +3456(``An)X +3629(Open)X +3824(and)X +3961(Extensible)X +727 4161(Event-Based)N +1157(Transaction)X +1556(Manager'',)X +2 f +1936(Proceedings)X +2357(1991)X +2537(Summer)X +2820(Usenix)X +1 f +3043(,)X +3083(Nashville,)X +3430(TN,)X +3577(June)X +3744(1991.)X +555 4341([CHOU85])N +943(Chou,)X +1163(H.,)X +1288(DeWitt,)X +1570(D.,)X +1694(``An)X +1872(Evaluation)X +2245(of)X +2338(Buffer)X +2574(Management)X +3019(Strategies)X +3361(for)X +3481(Relational)X +3836(Database)X +4157(Sys-)X +727 4431(tems'',)N +2 f +972(Proceedings)X +1393(of)X +1475(the)X +1593(11th)X +1755(International)X +2197(Conference)X +2587(on)X +2687(Very)X +2859(Large)X +3070(Databases)X +1 f +3408(,)X +3448(1985.)X +555 4611([DEWI84])N +925(DeWitt,)X +1207(D.,)X +1331(Katz,)X +1529(R.,)X +1648(Olken,)X +1890(F.,)X +2000(Shapiro,)X +2295(L.,)X +2410(Stonebraker,)X +2843(M.,)X +2979(Wood,)X +3220(D.,)X +3343(``Implementation)X +3929(Techniques)X +727 4701(for)N +841(Main)X +1030(Memory)X +1326(Database)X +1641(Systems'',)X +2 f +2001(Proceedings)X +2422(of)X +2504(SIGMOD)X +1 f +2812(,)X +2852(pp.)X +2972(1-8,)X +3119(June)X +3286(1984.)X +555 4881([GRAY76])N +944(Gray,)X +1153(J.,)X +1252(Lorie,)X +1474(R.,)X +1595(Putzolu,)X +1887(F.,)X +1999(and)X +2143(Traiger,)X +2428(I.,)X +2522(``Granularity)X +2973(of)X +3067(locks)X +3263(and)X +3406(degrees)X +3679(of)X +3773(consistency)X +4174(in)X +4263(a)X +727 4971(large)N +909(shared)X +1140(data)X +1295(base'',)X +2 f +1533(Modeling)X +1861(in)X +1944(Data)X +2125(Base)X +2301(Management)X +2740(Systems)X +1 f +2994(,)X +3034(Elsevier)X +3317(North)X +3524(Holland,)X +3822(New)X +3994(York,)X +4199(pp.)X +727 5061(365-394.)N +555 5241([HAER83])N +931(Haerder,)X +1235(T.)X +1348(Reuter,)X +1606(A.)X +1728(``Principles)X +2126(of)X +2217(Transaction-Oriented)X +2928(Database)X +3246(Recovery'',)X +2 f +3651(Computing)X +4029(Surveys)X +1 f +4279(,)X +727 5331(15\(4\);)N +943(237-318,)X +1250(1983.)X +555 5511([KUNG81])N +943(Kung,)X +1162(H.)X +1261(T.,)X +1371(Richardson,)X +1777(J.,)X +1869(``On)X +2042(Optimistic)X +2400(Methods)X +2701(for)X +2816(Concurrency)X +3252(Control'',)X +2 f +3591(ACM)X +3781(Transactions)X +4219(on)X +727 5601(Database)N +1054(Systems)X +1 f +1328(6\(2\);)X +1504(213-226,)X +1811(1981.)X + +17 p +%%Page: 17 17 +10 s 10 xH 0 xS 1 f +3 f +1 f +555 630([LEHM81])N +939(Lehman,)X +1245(P.,)X +1352(Yao,)X +1529(S.,)X +1636(``Ef\256cient)X +1989(Locking)X +2279(for)X +2396(Concurrent)X +2780(Operations)X +3155(on)X +3258(B-trees'',)X +2 f +3587(ACM)X +3779(Transactions)X +4219(on)X +727 720(Database)N +1054(Systems)X +1 f +1308(,)X +1348(6\(4\),)X +1522(December)X +1873(1981.)X +555 900([MOHA91])N +964(Mohan,)X +1241(C.,)X +1364(Pirahesh,)X +1690(H.,)X +1818(``ARIES-RRH:)X +2366(Restricted)X +2721(Repeating)X +3076(of)X +3173(History)X +3442(in)X +3533(the)X +3660(ARIES)X +3920(Transaction)X +727 990(Recovery)N +1055(Method'',)X +2 f +1398(Proceedings)X +1819(7th)X +1941(International)X +2383(Conference)X +2773(on)X +2873(Data)X +3053(Engineering)X +1 f +3449(,)X +3489(Kobe,)X +3703(Japan,)X +3926(April)X +4115(1991.)X +555 1170([NODI90])N +914(Nodine,)X +1194(M.,)X +1328(Zdonik,)X +1602(S.,)X +1709(``Cooperative)X +2178(Transaction)X +2580(Hierarchies:)X +2996(A)X +3077(Transaction)X +3479(Model)X +3711(to)X +3796(Support)X +4072(Design)X +727 1260(Applications'',)N +2 f +1242(Proceedings)X +1675(16th)X +1849(International)X +2303(Conference)X +2704(on)X +2815(Very)X +2998(Large)X +3220(Data)X +3411(Bases)X +1 f +3598(,)X +3649(Brisbane,)X +3985(Australia,)X +727 1350(August)N +978(1990.)X +555 1530([OUST90])N +923(Ousterhout,)X +1324(J.,)X +1420(``Tcl:)X +1648(An)X +1771(Embeddable)X +2197(Command)X +2555(Language'',)X +2 f +2971(Proceedings)X +3396(1990)X +3580(Winter)X +3822(Usenix)X +1 f +4045(,)X +4089(Wash-)X +727 1620(ington,)N +971(D.C.,)X +1162(January)X +1432(1990.)X +555 1800([POSIX91])N +955(``Unapproved)X +1441(Draft)X +1645(for)X +1773(Realtime)X +2096(Extension)X +2450(for)X +2578(Portable)X +2879(Operating)X +3234(Systems'',)X +3608(Draft)X +3812(11,)X +3946(October)X +4239(7,)X +727 1890(1991,)N +927(IEEE)X +1121(Computer)X +1461(Society.)X +555 2070([ROSE91])N +925(Rosenblum,)X +1341(M.,)X +1484(Ousterhout,)X +1892(J.,)X +1995(``The)X +2206(Design)X +2464(and)X +2611(Implementation)X +3149(of)X +3247(a)X +3314(Log-Structured)X +3835(File)X +3990(System'',)X +2 f +727 2160(Proceedings)N +1148(of)X +1230(the)X +1348(13th)X +1510(Symposium)X +1895(on)X +1995(Operating)X +2344(Systems)X +2618(Principles)X +1 f +2947(,)X +2987(1991.)X +555 2340([SELT91])N +904(Seltzer,)X +1171(M.,)X +1306(Stonebraker,)X +1738(M.,)X +1873(``Read)X +2116(Optimized)X +2478(File)X +2626(Systems:)X +2938(A)X +3020(Performance)X +3454(Evaluation'',)X +2 f +3898(Proceedings)X +727 2430(7th)N +849(Annual)X +1100(International)X +1542(Conference)X +1932(on)X +2032(Data)X +2212(Engineering)X +1 f +2608(,)X +2648(Kobe,)X +2862(Japan,)X +3085(April)X +3274(1991.)X +555 2610([SPEC88])N +907(Spector,)X +1200(Rausch,)X +1484(Bruell,)X +1732(``Camelot:)X +2107(A)X +2192(Flexible,)X +2501(Distributed)X +2888(Transaction)X +3294(Processing)X +3668(System'',)X +2 f +4004(Proceed-)X +727 2700(ings)N +880(of)X +962(Spring)X +1195(COMPCON)X +1606(1988)X +1 f +(,)S +1806(February)X +2116(1988.)X +555 2880([SQL86])N +862(American)X +1201(National)X +1499(Standards)X +1836(Institute,)X +2139(``Database)X +2509(Language)X +2847(SQL'',)X +3093(ANSI)X +3301(X3.135-1986)X +3747(\(ISO)X +3924(9075\),)X +4152(May)X +727 2970(1986.)N +555 3150([STON81])N +919(Stonebraker,)X +1348(M.,)X +1480(``Operating)X +1876(System)X +2132(Support)X +2406(for)X +2520(Database)X +2835(Management'',)X +2 f +3348(Communications)X +3910(of)X +3992(the)X +4110(ACM)X +1 f +4279(,)X +727 3240(1981.)N +555 3420([SULL92])N +925(Sullivan,)X +1247(M.,)X +1394(Olson,)X +1641(M.,)X +1788(``An)X +1976(Index)X +2195(Implementation)X +2737(Supporting)X +3127(Fast)X +3295(Recovery)X +3638(for)X +3767(the)X +3900(POSTGRES)X +727 3510(Storage)N +1014(System'',)X +1365(to)X +1469(appear)X +1726(in)X +2 f +1830(Proceedings)X +2272(8th)X +2415(Annual)X +2687(International)X +3150(Conference)X +3561(on)X +3682(Data)X +3883(Engineering)X +1 f +4279(,)X +727 3600(Tempe,)N +990(Arizona,)X +1289(February)X +1599(1992.)X +555 3780([TPCB90])N +914(Transaction)X +1319(Processing)X +1692(Performance)X +2129(Council,)X +2428(``TPC)X +2653(Benchmark)X +3048(B'',)X +3200(Standard)X +3510(Speci\256cation,)X +3973(Waterside)X +727 3870(Associates,)N +1110(Fremont,)X +1421(CA.,)X +1592(1990.)X +555 4050([YOUN91])N +947(Young,)X +1211(M.)X +1328(W.,)X +1470(Thompson,)X +1858(D.)X +1962(S.,)X +2072(Jaffe,)X +2274(E.,)X +2388(``A)X +2525(Modular)X +2826(Architecture)X +3253(for)X +3372(Distributed)X +3757(Transaction)X +4161(Pro-)X +727 4140(cessing'',)N +2 f +1057(Proceedings)X +1478(1991)X +1658(Winter)X +1896(Usenix)X +1 f +2119(,)X +2159(Dallas,)X +2404(TX,)X +2551(January)X +2821(1991.)X +3 f +755 4263(Margo)N +1008(I.)X +1080(Seltzer)X +1 f +1338(is)X +1411(a)X +1467(Ph.D.)X +1669(student)X +1920(in)X +2002(the)X +2120(Department)X +2519(of)X +2606(Electrical)X +2934(Engineering)X +3346(and)X +3482(Computer)X +3822(Sciences)X +4123(at)X +4201(the)X +555 4353(University)N +919(of)X +1012(California,)X +1383(Berkeley.)X +1739(Her)X +1886(research)X +2181(interests)X +2474(include)X +2735(\256le)X +2862(systems,)X +3160(databases,)X +3513(and)X +3654(transaction)X +4031(process-)X +555 4443(ing)N +686(systems.)X +1008(She)X +1157(spent)X +1355(several)X +1612(years)X +1811(working)X +2107(at)X +2194(startup)X +2441(companies)X +2813(designing)X +3153(and)X +3298(implementing)X +3771(\256le)X +3902(systems)X +4183(and)X +555 4533(transaction)N +929(processing)X +1294(software)X +1592(and)X +1729(designing)X +2061(microprocessors.)X +2648(Ms.)X +2791(Seltzer)X +3035(received)X +3329(her)X +3453(AB)X +3585(in)X +3668(Applied)X +3947(Mathemat-)X +555 4623(ics)N +664(from)X +840 0.1953(Harvard/Radcliffe)AX +1445(College)X +1714(in)X +1796(1983.)X +755 4746(In)N +845(her)X +971(spare)X +1163(time,)X +1347(Margo)X +1583(can)X +1717(usually)X +1970(be)X +2068(found)X +2277(preparing)X +2607(massive)X +2887(quantities)X +3220(of)X +3309(food)X +3478(for)X +3594(hungry)X +3843(hordes,)X +4099(study-)X +555 4836(ing)N +677(Japanese,)X +1003(or)X +1090(playing)X +1350(soccer)X +1576(with)X +1738(an)X +1834(exciting)X +2112(Bay)X +2261(Area)X +2438(Women's)X +2770(Soccer)X +3009(team,)X +3205(the)X +3323(Berkeley)X +3633(Bruisers.)X +3 f +755 5049(Michael)N +1056(A.)X +1159(Olson)X +1 f +1383(is)X +1461(a)X +1522(Master's)X +1828(student)X +2084(in)X +2170(the)X +2292(Department)X +2695(of)X +2786(Electrical)X +3118(Engineering)X +3534(and)X +3674(Computer)X +4018(Sciences)X +555 5139(at)N +645(the)X +774(University)X +1143(of)X +1241(California,)X +1617(Berkeley.)X +1978(His)X +2120(primary)X +2405(interests)X +2703(are)X +2833(database)X +3141(systems)X +3425(and)X +3572(mass)X +3763(storage)X +4026(systems.)X +555 5229(Mike)N +759(spent)X +963(two)X +1118(years)X +1323(working)X +1625(for)X +1754(a)X +1825(commercial)X +2239(database)X +2551(system)X +2808(vendor)X +3066(before)X +3307(joining)X +3567(the)X +3699(Postgres)X +4004(Research)X +555 5319(Group)N +780(at)X +858(Berkeley)X +1168(in)X +1250(1988.)X +1470(He)X +1584(received)X +1877(his)X +1990(B.A.)X +2161(in)X +2243(Computer)X +2583(Science)X +2853(from)X +3029(Berkeley)X +3339(in)X +3421(May)X +3588(1991.)X +755 5442(Mike)N +945(only)X +1108(recently)X +1388(transferred)X +1758(into)X +1903(Sin)X +2030(City,)X +2208(but)X +2330(is)X +2403(rapidly)X +2650(adopting)X +2950(local)X +3126(customs)X +3408(and)X +3544(coloration.)X +3929(In)X +4016(his)X +4129(spare)X +555 5532(time,)N +742(he)X +843(organizes)X +1176(informal)X +1477(Friday)X +1711(afternoon)X +2043(study)X +2240(groups)X +2482(to)X +2568(discuss)X +2823(recent)X +3044(technical)X +3358(and)X +3498(economic)X +3834(developments.)X +555 5622(Among)N +815(his)X +928(hobbies)X +1197(are)X +1316(Charles)X +1581(Dickens,)X +1884(Red)X +2033(Rock,)X +2242(and)X +2378(speaking)X +2683(Dutch)X +2899(to)X +2981(anyone)X +3233(who)X +3391(will)X +3535(permit)X +3764(it.)X + +17 p +%%Trailer +xt + +xs + diff --git a/lib/libc/db/hash/Makefile.inc b/lib/libc/db/hash/Makefile.inc new file mode 100644 index 0000000..cac96fe --- /dev/null +++ b/lib/libc/db/hash/Makefile.inc @@ -0,0 +1,6 @@ +# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 + +.PATH: ${.CURDIR}/db/hash + +SRCS+= hash.c hash_bigkey.c hash_buf.c hash_func.c hash_log2.c \ + hash_page.c hsearch.c ndbm.c diff --git a/lib/libc/db/hash/README b/lib/libc/db/hash/README new file mode 100644 index 0000000..f29ccf7 --- /dev/null +++ b/lib/libc/db/hash/README @@ -0,0 +1,72 @@ +# @(#)README 8.1 (Berkeley) 6/4/93 + +This package implements a superset of the hsearch and dbm/ndbm libraries. + +Test Programs: + All test programs which need key/data pairs expect them entered + with key and data on separate lines + + tcreat3.c + Takes + bucketsize (bsize), + fill factor (ffactor), and + initial number of elements (nelem). + Creates a hash table named hashtest containing the + keys/data pairs entered from standard in. + thash4.c + Takes + bucketsize (bsize), + fill factor (ffactor), + initial number of elements (nelem) + bytes of cache (ncached), and + file from which to read data (fname) + Creates a table from the key/data pairs on standard in and + then does a read of each key/data in fname + tdel.c + Takes + bucketsize (bsize), and + fill factor (ffactor). + file from which to read data (fname) + Reads each key/data pair from fname and deletes the + key from the hash table hashtest + tseq.c + Reads the key/data pairs in the file hashtest and writes them + to standard out. + tread2.c + Takes + butes of cache (ncached). + Reads key/data pairs from standard in and looks them up + in the file hashtest. + tverify.c + Reads key/data pairs from standard in, looks them up + in the file hashtest, and verifies that the data is + correct. + +NOTES: + +The file search.h is provided for using the hsearch compatible interface +on BSD systems. On System V derived systems, search.h should appear in +/usr/include. + +The man page ../man/db.3 explains the interface to the hashing system. +The file hash.ps is a postscript copy of a paper explaining +the history, implementation, and performance of the hash package. + +"bugs" or idiosyncracies + +If you have a lot of overflows, it is possible to run out of overflow +pages. Currently, this will cause a message to be printed on stderr. +Eventually, this will be indicated by a return error code. + +If you are using the ndbm interface and exit without flushing or closing the +file, you may lose updates since the package buffers all writes. Also, +the db interface only creates a single database file. To avoid overwriting +the user's original file, the suffix ".db" is appended to the file name +passed to dbm_open. Additionally, if your code "knows" about the historic +.dir and .pag files, it will break. + +There is a fundamental difference between this package and the old hsearch. +Hsearch requires the user to maintain the keys and data in the application's +allocated memory while hash takes care of all storage management. The down +side is that the byte strings passed in the ENTRY structure must be null +terminated (both the keys and the data). diff --git a/lib/libc/db/hash/extern.h b/lib/libc/db/hash/extern.h new file mode 100644 index 0000000..b7ef37a --- /dev/null +++ b/lib/libc/db/hash/extern.h @@ -0,0 +1,65 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.2 (Berkeley) 2/21/94 + */ + +BUFHEAD *__add_ovflpage __P((HTAB *, BUFHEAD *)); +int __addel __P((HTAB *, BUFHEAD *, const DBT *, const DBT *)); +int __big_delete __P((HTAB *, BUFHEAD *)); +int __big_insert __P((HTAB *, BUFHEAD *, const DBT *, const DBT *)); +int __big_keydata __P((HTAB *, BUFHEAD *, DBT *, DBT *, int)); +int __big_return __P((HTAB *, BUFHEAD *, int, DBT *, int)); +int __big_split __P((HTAB *, BUFHEAD *, BUFHEAD *, BUFHEAD *, + int, u_int, SPLIT_RETURN *)); +int __buf_free __P((HTAB *, int, int)); +void __buf_init __P((HTAB *, int)); +u_int __call_hash __P((HTAB *, char *, int)); +int __delpair __P((HTAB *, BUFHEAD *, int)); +int __expand_table __P((HTAB *)); +int __find_bigpair __P((HTAB *, BUFHEAD *, int, char *, int)); +u_short __find_last_page __P((HTAB *, BUFHEAD **)); +void __free_ovflpage __P((HTAB *, BUFHEAD *)); +BUFHEAD *__get_buf __P((HTAB *, u_int, BUFHEAD *, int)); +int __get_page __P((HTAB *, char *, u_int, int, int, int)); +int __init_bitmap __P((HTAB *, int, int, int)); +u_int __log2 __P((u_int)); +int __put_page __P((HTAB *, char *, u_int, int, int)); +void __reclaim_buf __P((HTAB *, BUFHEAD *)); +int __split_page __P((HTAB *, u_int, u_int)); + +/* Default hash routine. */ +extern u_int32_t (*__default_hash) __P((const void *, size_t)); + +#ifdef HASH_STATISTICS +extern long hash_accesses, hash_collisions, hash_expansions, hash_overflows; +#endif diff --git a/lib/libc/db/hash/hash.c b/lib/libc/db/hash/hash.c new file mode 100644 index 0000000..6a23f3d --- /dev/null +++ b/lib/libc/db/hash/hash.c @@ -0,0 +1,994 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash.c 8.7 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#ifdef DEBUG +#include <assert.h> +#endif + +#include <db.h> +#include "hash.h" +#include "page.h" +#include "extern.h" + +static int alloc_segs __P((HTAB *, int)); +static int flush_meta __P((HTAB *)); +static int hash_access __P((HTAB *, ACTION, DBT *, DBT *)); +static int hash_close __P((DB *)); +static int hash_delete __P((const DB *, const DBT *, u_int)); +static int hash_fd __P((const DB *)); +static int hash_get __P((const DB *, const DBT *, DBT *, u_int)); +static int hash_put __P((const DB *, DBT *, const DBT *, u_int)); +static void *hash_realloc __P((SEGMENT **, int, int)); +static int hash_seq __P((const DB *, DBT *, DBT *, u_int)); +static int hash_sync __P((const DB *, u_int)); +static int hdestroy __P((HTAB *)); +static HTAB *init_hash __P((HTAB *, const char *, HASHINFO *)); +static int init_htab __P((HTAB *, int)); +#if BYTE_ORDER == LITTLE_ENDIAN +static void swap_header __P((HTAB *)); +static void swap_header_copy __P((HASHHDR *, HASHHDR *)); +#endif + +/* Fast arithmetic, relying on powers of 2, */ +#define MOD(x, y) ((x) & ((y) - 1)) + +#define RETURN_ERROR(ERR, LOC) { save_errno = ERR; goto LOC; } + +/* Return values */ +#define SUCCESS (0) +#define ERROR (-1) +#define ABNORMAL (1) + +#ifdef HASH_STATISTICS +long hash_accesses, hash_collisions, hash_expansions, hash_overflows; +#endif + +/************************** INTERFACE ROUTINES ***************************/ +/* OPEN/CLOSE */ + +extern DB * +__hash_open(file, flags, mode, info, dflags) + const char *file; + int flags, mode, dflags; + const HASHINFO *info; /* Special directives for create */ +{ + HTAB *hashp; + struct stat statbuf; + DB *dbp; + int bpages, hdrsize, new_table, nsegs, save_errno; + + if ((flags & O_ACCMODE) == O_WRONLY) { + errno = EINVAL; + return (NULL); + } + + if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB)))) + return (NULL); + hashp->fp = -1; + + /* + * Even if user wants write only, we need to be able to read + * the actual file, so we need to open it read/write. But, the + * field in the hashp structure needs to be accurate so that + * we can check accesses. + */ + hashp->flags = flags; + + new_table = 0; + if (!file || (flags & O_TRUNC) || + (stat(file, &statbuf) && (errno == ENOENT))) { + if (errno == ENOENT) + errno = 0; /* Just in case someone looks at errno */ + new_table = 1; + } + if (file) { + if ((hashp->fp = open(file, flags, mode)) == -1) + RETURN_ERROR(errno, error0); + (void)fcntl(hashp->fp, F_SETFD, 1); + } + if (new_table) { + if (!(hashp = init_hash(hashp, file, (HASHINFO *)info))) + RETURN_ERROR(errno, error1); + } else { + /* Table already exists */ + if (info && info->hash) + hashp->hash = info->hash; + else + hashp->hash = __default_hash; + + hdrsize = read(hashp->fp, &hashp->hdr, sizeof(HASHHDR)); +#if BYTE_ORDER == LITTLE_ENDIAN + swap_header(hashp); +#endif + if (hdrsize == -1) + RETURN_ERROR(errno, error1); + if (hdrsize != sizeof(HASHHDR)) + RETURN_ERROR(EFTYPE, error1); + /* Verify file type, versions and hash function */ + if (hashp->MAGIC != HASHMAGIC) + RETURN_ERROR(EFTYPE, error1); +#define OLDHASHVERSION 1 + if (hashp->VERSION != HASHVERSION && + hashp->VERSION != OLDHASHVERSION) + RETURN_ERROR(EFTYPE, error1); + if (hashp->hash(CHARKEY, sizeof(CHARKEY)) != hashp->H_CHARKEY) + RETURN_ERROR(EFTYPE, error1); + /* + * Figure out how many segments we need. Max_Bucket is the + * maximum bucket number, so the number of buckets is + * max_bucket + 1. + */ + nsegs = (hashp->MAX_BUCKET + 1 + hashp->SGSIZE - 1) / + hashp->SGSIZE; + hashp->nsegs = 0; + if (alloc_segs(hashp, nsegs)) + /* + * If alloc_segs fails, table will have been destroyed + * and errno will have been set. + */ + return (NULL); + /* Read in bitmaps */ + bpages = (hashp->SPARES[hashp->OVFL_POINT] + + (hashp->BSIZE << BYTE_SHIFT) - 1) >> + (hashp->BSHIFT + BYTE_SHIFT); + + hashp->nmaps = bpages; + (void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_long *)); + } + + /* Initialize Buffer Manager */ + if (info && info->cachesize) + __buf_init(hashp, info->cachesize); + else + __buf_init(hashp, DEF_BUFSIZE); + + hashp->new_file = new_table; + hashp->save_file = file && (hashp->flags & O_RDWR); + hashp->cbucket = -1; + if (!(dbp = (DB *)malloc(sizeof(DB)))) { + save_errno = errno; + hdestroy(hashp); + errno = save_errno; + return (NULL); + } + dbp->internal = hashp; + dbp->close = hash_close; + dbp->del = hash_delete; + dbp->fd = hash_fd; + dbp->get = hash_get; + dbp->put = hash_put; + dbp->seq = hash_seq; + dbp->sync = hash_sync; + dbp->type = DB_HASH; + +#ifdef DEBUG + (void)fprintf(stderr, +"%s\n%s%x\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n", + "init_htab:", + "TABLE POINTER ", hashp, + "BUCKET SIZE ", hashp->BSIZE, + "BUCKET SHIFT ", hashp->BSHIFT, + "DIRECTORY SIZE ", hashp->DSIZE, + "SEGMENT SIZE ", hashp->SGSIZE, + "SEGMENT SHIFT ", hashp->SSHIFT, + "FILL FACTOR ", hashp->FFACTOR, + "MAX BUCKET ", hashp->MAX_BUCKET, + "OVFL POINT ", hashp->OVFL_POINT, + "LAST FREED ", hashp->LAST_FREED, + "HIGH MASK ", hashp->HIGH_MASK, + "LOW MASK ", hashp->LOW_MASK, + "NSEGS ", hashp->nsegs, + "NKEYS ", hashp->NKEYS); +#endif +#ifdef HASH_STATISTICS + hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0; +#endif + return (dbp); + +error1: + if (hashp != NULL) + (void)close(hashp->fp); + +error0: + free(hashp); + errno = save_errno; + return (NULL); +} + +static int +hash_close(dbp) + DB *dbp; +{ + HTAB *hashp; + int retval; + + if (!dbp) + return (ERROR); + + hashp = (HTAB *)dbp->internal; + retval = hdestroy(hashp); + free(dbp); + return (retval); +} + +static int +hash_fd(dbp) + const DB *dbp; +{ + HTAB *hashp; + + if (!dbp) + return (ERROR); + + hashp = (HTAB *)dbp->internal; + if (hashp->fp == -1) { + errno = ENOENT; + return (-1); + } + return (hashp->fp); +} + +/************************** LOCAL CREATION ROUTINES **********************/ +static HTAB * +init_hash(hashp, file, info) + HTAB *hashp; + const char *file; + HASHINFO *info; +{ + struct stat statbuf; + int nelem; + + nelem = 1; + hashp->NKEYS = 0; + hashp->LORDER = BYTE_ORDER; + hashp->BSIZE = DEF_BUCKET_SIZE; + hashp->BSHIFT = DEF_BUCKET_SHIFT; + hashp->SGSIZE = DEF_SEGSIZE; + hashp->SSHIFT = DEF_SEGSIZE_SHIFT; + hashp->DSIZE = DEF_DIRSIZE; + hashp->FFACTOR = DEF_FFACTOR; + hashp->hash = __default_hash; + memset(hashp->SPARES, 0, sizeof(hashp->SPARES)); + memset(hashp->BITMAPS, 0, sizeof (hashp->BITMAPS)); + + /* Fix bucket size to be optimal for file system */ + if (file != NULL) { + if (stat(file, &statbuf)) + return (NULL); + hashp->BSIZE = statbuf.st_blksize; + hashp->BSHIFT = __log2(hashp->BSIZE); + } + + if (info) { + if (info->bsize) { + /* Round pagesize up to power of 2 */ + hashp->BSHIFT = __log2(info->bsize); + hashp->BSIZE = 1 << hashp->BSHIFT; + if (hashp->BSIZE > MAX_BSIZE) { + errno = EINVAL; + return (NULL); + } + } + if (info->ffactor) + hashp->FFACTOR = info->ffactor; + if (info->hash) + hashp->hash = info->hash; + if (info->nelem) + nelem = info->nelem; + if (info->lorder) { + if (info->lorder != BIG_ENDIAN && + info->lorder != LITTLE_ENDIAN) { + errno = EINVAL; + return (NULL); + } + hashp->LORDER = info->lorder; + } + } + /* init_htab should destroy the table and set errno if it fails */ + if (init_htab(hashp, nelem)) + return (NULL); + else + return (hashp); +} +/* + * This calls alloc_segs which may run out of memory. Alloc_segs will destroy + * the table and set errno, so we just pass the error information along. + * + * Returns 0 on No Error + */ +static int +init_htab(hashp, nelem) + HTAB *hashp; + int nelem; +{ + register int nbuckets, nsegs; + int l2; + + /* + * Divide number of elements by the fill factor and determine a + * desired number of buckets. Allocate space for the next greater + * power of two number of buckets. + */ + nelem = (nelem - 1) / hashp->FFACTOR + 1; + + l2 = __log2(MAX(nelem, 2)); + nbuckets = 1 << l2; + + hashp->SPARES[l2] = l2 + 1; + hashp->SPARES[l2 + 1] = l2 + 1; + hashp->OVFL_POINT = l2; + hashp->LAST_FREED = 2; + + /* First bitmap page is at: splitpoint l2 page offset 1 */ + if (__init_bitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0)) + return (-1); + + hashp->MAX_BUCKET = hashp->LOW_MASK = nbuckets - 1; + hashp->HIGH_MASK = (nbuckets << 1) - 1; + hashp->HDRPAGES = ((MAX(sizeof(HASHHDR), MINHDRSIZE) - 1) >> + hashp->BSHIFT) + 1; + + nsegs = (nbuckets - 1) / hashp->SGSIZE + 1; + nsegs = 1 << __log2(nsegs); + + if (nsegs > hashp->DSIZE) + hashp->DSIZE = nsegs; + return (alloc_segs(hashp, nsegs)); +} + +/********************** DESTROY/CLOSE ROUTINES ************************/ + +/* + * Flushes any changes to the file if necessary and destroys the hashp + * structure, freeing all allocated space. + */ +static int +hdestroy(hashp) + HTAB *hashp; +{ + int i, save_errno; + + save_errno = 0; + +#ifdef HASH_STATISTICS + (void)fprintf(stderr, "hdestroy: accesses %ld collisions %ld\n", + hash_accesses, hash_collisions); + (void)fprintf(stderr, "hdestroy: expansions %ld\n", + hash_expansions); + (void)fprintf(stderr, "hdestroy: overflows %ld\n", + hash_overflows); + (void)fprintf(stderr, "keys %ld maxp %d segmentcount %d\n", + hashp->NKEYS, hashp->MAX_BUCKET, hashp->nsegs); + + for (i = 0; i < NCACHED; i++) + (void)fprintf(stderr, + "spares[%d] = %d\n", i, hashp->SPARES[i]); +#endif + /* + * Call on buffer manager to free buffers, and if required, + * write them to disk. + */ + if (__buf_free(hashp, 1, hashp->save_file)) + save_errno = errno; + if (hashp->dir) { + free(*hashp->dir); /* Free initial segments */ + /* Free extra segments */ + while (hashp->exsegs--) + free(hashp->dir[--hashp->nsegs]); + free(hashp->dir); + } + if (flush_meta(hashp) && !save_errno) + save_errno = errno; + /* Free Bigmaps */ + for (i = 0; i < hashp->nmaps; i++) + if (hashp->mapp[i]) + free(hashp->mapp[i]); + + if (hashp->fp != -1) + (void)close(hashp->fp); + + free(hashp); + + if (save_errno) { + errno = save_errno; + return (ERROR); + } + return (SUCCESS); +} +/* + * Write modified pages to disk + * + * Returns: + * 0 == OK + * -1 ERROR + */ +static int +hash_sync(dbp, flags) + const DB *dbp; + u_int flags; +{ + HTAB *hashp; + + if (flags != 0) { + errno = EINVAL; + return (ERROR); + } + + if (!dbp) + return (ERROR); + + hashp = (HTAB *)dbp->internal; + if (!hashp->save_file) + return (0); + if (__buf_free(hashp, 0, 1) || flush_meta(hashp)) + return (ERROR); + hashp->new_file = 0; + return (0); +} + +/* + * Returns: + * 0 == OK + * -1 indicates that errno should be set + */ +static int +flush_meta(hashp) + HTAB *hashp; +{ + HASHHDR *whdrp; +#if BYTE_ORDER == LITTLE_ENDIAN + HASHHDR whdr; +#endif + int fp, i, wsize; + + if (!hashp->save_file) + return (0); + hashp->MAGIC = HASHMAGIC; + hashp->VERSION = HASHVERSION; + hashp->H_CHARKEY = hashp->hash(CHARKEY, sizeof(CHARKEY)); + + fp = hashp->fp; + whdrp = &hashp->hdr; +#if BYTE_ORDER == LITTLE_ENDIAN + whdrp = &whdr; + swap_header_copy(&hashp->hdr, whdrp); +#endif + if ((lseek(fp, (off_t)0, SEEK_SET) == -1) || + ((wsize = write(fp, whdrp, sizeof(HASHHDR))) == -1)) + return (-1); + else + if (wsize != sizeof(HASHHDR)) { + errno = EFTYPE; + hashp->errno = errno; + return (-1); + } + for (i = 0; i < NCACHED; i++) + if (hashp->mapp[i]) + if (__put_page(hashp, (char *)hashp->mapp[i], + hashp->BITMAPS[i], 0, 1)) + return (-1); + return (0); +} + +/*******************************SEARCH ROUTINES *****************************/ +/* + * All the access routines return + * + * Returns: + * 0 on SUCCESS + * 1 to indicate an external ERROR (i.e. key not found, etc) + * -1 to indicate an internal ERROR (i.e. out of memory, etc) + */ +static int +hash_get(dbp, key, data, flag) + const DB *dbp; + const DBT *key; + DBT *data; + u_int flag; +{ + HTAB *hashp; + + hashp = (HTAB *)dbp->internal; + if (flag) { + hashp->errno = errno = EINVAL; + return (ERROR); + } + return (hash_access(hashp, HASH_GET, (DBT *)key, data)); +} + +static int +hash_put(dbp, key, data, flag) + const DB *dbp; + DBT *key; + const DBT *data; + u_int flag; +{ + HTAB *hashp; + + hashp = (HTAB *)dbp->internal; + if (flag && flag != R_NOOVERWRITE) { + hashp->errno = errno = EINVAL; + return (ERROR); + } + if ((hashp->flags & O_ACCMODE) == O_RDONLY) { + hashp->errno = errno = EPERM; + return (ERROR); + } + return (hash_access(hashp, flag == R_NOOVERWRITE ? + HASH_PUTNEW : HASH_PUT, (DBT *)key, (DBT *)data)); +} + +static int +hash_delete(dbp, key, flag) + const DB *dbp; + const DBT *key; + u_int flag; /* Ignored */ +{ + HTAB *hashp; + + hashp = (HTAB *)dbp->internal; + if (flag && flag != R_CURSOR) { + hashp->errno = errno = EINVAL; + return (ERROR); + } + if ((hashp->flags & O_ACCMODE) == O_RDONLY) { + hashp->errno = errno = EPERM; + return (ERROR); + } + return (hash_access(hashp, HASH_DELETE, (DBT *)key, NULL)); +} + +/* + * Assume that hashp has been set in wrapper routine. + */ +static int +hash_access(hashp, action, key, val) + HTAB *hashp; + ACTION action; + DBT *key, *val; +{ + register BUFHEAD *rbufp; + BUFHEAD *bufp, *save_bufp; + register u_short *bp; + register int n, ndx, off, size; + register char *kp; + u_short pageno; + +#ifdef HASH_STATISTICS + hash_accesses++; +#endif + + off = hashp->BSIZE; + size = key->size; + kp = (char *)key->data; + rbufp = __get_buf(hashp, __call_hash(hashp, kp, size), NULL, 0); + if (!rbufp) + return (ERROR); + save_bufp = rbufp; + + /* Pin the bucket chain */ + rbufp->flags |= BUF_PIN; + for (bp = (u_short *)rbufp->page, n = *bp++, ndx = 1; ndx < n;) + if (bp[1] >= REAL_KEY) { + /* Real key/data pair */ + if (size == off - *bp && + memcmp(kp, rbufp->page + *bp, size) == 0) + goto found; + off = bp[1]; +#ifdef HASH_STATISTICS + hash_collisions++; +#endif + bp += 2; + ndx += 2; + } else if (bp[1] == OVFLPAGE) { + rbufp = __get_buf(hashp, *bp, rbufp, 0); + if (!rbufp) { + save_bufp->flags &= ~BUF_PIN; + return (ERROR); + } + /* FOR LOOP INIT */ + bp = (u_short *)rbufp->page; + n = *bp++; + ndx = 1; + off = hashp->BSIZE; + } else if (bp[1] < REAL_KEY) { + if ((ndx = + __find_bigpair(hashp, rbufp, ndx, kp, size)) > 0) + goto found; + if (ndx == -2) { + bufp = rbufp; + if (!(pageno = + __find_last_page(hashp, &bufp))) { + ndx = 0; + rbufp = bufp; + break; /* FOR */ + } + rbufp = __get_buf(hashp, pageno, bufp, 0); + if (!rbufp) { + save_bufp->flags &= ~BUF_PIN; + return (ERROR); + } + /* FOR LOOP INIT */ + bp = (u_short *)rbufp->page; + n = *bp++; + ndx = 1; + off = hashp->BSIZE; + } else { + save_bufp->flags &= ~BUF_PIN; + return (ERROR); + } + } + + /* Not found */ + switch (action) { + case HASH_PUT: + case HASH_PUTNEW: + if (__addel(hashp, rbufp, key, val)) { + save_bufp->flags &= ~BUF_PIN; + return (ERROR); + } else { + save_bufp->flags &= ~BUF_PIN; + return (SUCCESS); + } + case HASH_GET: + case HASH_DELETE: + default: + save_bufp->flags &= ~BUF_PIN; + return (ABNORMAL); + } + +found: + switch (action) { + case HASH_PUTNEW: + save_bufp->flags &= ~BUF_PIN; + return (ABNORMAL); + case HASH_GET: + bp = (u_short *)rbufp->page; + if (bp[ndx + 1] < REAL_KEY) { + if (__big_return(hashp, rbufp, ndx, val, 0)) + return (ERROR); + } else { + val->data = (u_char *)rbufp->page + (int)bp[ndx + 1]; + val->size = bp[ndx] - bp[ndx + 1]; + } + break; + case HASH_PUT: + if ((__delpair(hashp, rbufp, ndx)) || + (__addel(hashp, rbufp, key, val))) { + save_bufp->flags &= ~BUF_PIN; + return (ERROR); + } + break; + case HASH_DELETE: + if (__delpair(hashp, rbufp, ndx)) + return (ERROR); + break; + default: + abort(); + } + save_bufp->flags &= ~BUF_PIN; + return (SUCCESS); +} + +static int +hash_seq(dbp, key, data, flag) + const DB *dbp; + DBT *key, *data; + u_int flag; +{ + register u_int bucket; + register BUFHEAD *bufp; + HTAB *hashp; + u_short *bp, ndx; + + hashp = (HTAB *)dbp->internal; + if (flag && flag != R_FIRST && flag != R_NEXT) { + hashp->errno = errno = EINVAL; + return (ERROR); + } +#ifdef HASH_STATISTICS + hash_accesses++; +#endif + if ((hashp->cbucket < 0) || (flag == R_FIRST)) { + hashp->cbucket = 0; + hashp->cndx = 1; + hashp->cpage = NULL; + } + + for (bp = NULL; !bp || !bp[0]; ) { + if (!(bufp = hashp->cpage)) { + for (bucket = hashp->cbucket; + bucket <= hashp->MAX_BUCKET; + bucket++, hashp->cndx = 1) { + bufp = __get_buf(hashp, bucket, NULL, 0); + if (!bufp) + return (ERROR); + hashp->cpage = bufp; + bp = (u_short *)bufp->page; + if (bp[0]) + break; + } + hashp->cbucket = bucket; + if (hashp->cbucket > hashp->MAX_BUCKET) { + hashp->cbucket = -1; + return (ABNORMAL); + } + } else + bp = (u_short *)hashp->cpage->page; + +#ifdef DEBUG + assert(bp); + assert(bufp); +#endif + while (bp[hashp->cndx + 1] == OVFLPAGE) { + bufp = hashp->cpage = + __get_buf(hashp, bp[hashp->cndx], bufp, 0); + if (!bufp) + return (ERROR); + bp = (u_short *)(bufp->page); + hashp->cndx = 1; + } + if (!bp[0]) { + hashp->cpage = NULL; + ++hashp->cbucket; + } + } + ndx = hashp->cndx; + if (bp[ndx + 1] < REAL_KEY) { + if (__big_keydata(hashp, bufp, key, data, 1)) + return (ERROR); + } else { + key->data = (u_char *)hashp->cpage->page + bp[ndx]; + key->size = (ndx > 1 ? bp[ndx - 1] : hashp->BSIZE) - bp[ndx]; + data->data = (u_char *)hashp->cpage->page + bp[ndx + 1]; + data->size = bp[ndx] - bp[ndx + 1]; + ndx += 2; + if (ndx > bp[0]) { + hashp->cpage = NULL; + hashp->cbucket++; + hashp->cndx = 1; + } else + hashp->cndx = ndx; + } + return (SUCCESS); +} + +/********************************* UTILITIES ************************/ + +/* + * Returns: + * 0 ==> OK + * -1 ==> Error + */ +extern int +__expand_table(hashp) + HTAB *hashp; +{ + u_int old_bucket, new_bucket; + int dirsize, new_segnum, spare_ndx; + +#ifdef HASH_STATISTICS + hash_expansions++; +#endif + new_bucket = ++hashp->MAX_BUCKET; + old_bucket = (hashp->MAX_BUCKET & hashp->LOW_MASK); + + new_segnum = new_bucket >> hashp->SSHIFT; + + /* Check if we need a new segment */ + if (new_segnum >= hashp->nsegs) { + /* Check if we need to expand directory */ + if (new_segnum >= hashp->DSIZE) { + /* Reallocate directory */ + dirsize = hashp->DSIZE * sizeof(SEGMENT *); + if (!hash_realloc(&hashp->dir, dirsize, dirsize << 1)) + return (-1); + hashp->DSIZE = dirsize << 1; + } + if ((hashp->dir[new_segnum] = + (SEGMENT)calloc(hashp->SGSIZE, sizeof(SEGMENT))) == NULL) + return (-1); + hashp->exsegs++; + hashp->nsegs++; + } + /* + * If the split point is increasing (MAX_BUCKET's log base 2 + * * increases), we need to copy the current contents of the spare + * split bucket to the next bucket. + */ + spare_ndx = __log2(hashp->MAX_BUCKET + 1); + if (spare_ndx > hashp->OVFL_POINT) { + hashp->SPARES[spare_ndx] = hashp->SPARES[hashp->OVFL_POINT]; + hashp->OVFL_POINT = spare_ndx; + } + + if (new_bucket > hashp->HIGH_MASK) { + /* Starting a new doubling */ + hashp->LOW_MASK = hashp->HIGH_MASK; + hashp->HIGH_MASK = new_bucket | hashp->LOW_MASK; + } + /* Relocate records to the new bucket */ + return (__split_page(hashp, old_bucket, new_bucket)); +} + +/* + * If realloc guarantees that the pointer is not destroyed if the realloc + * fails, then this routine can go away. + */ +static void * +hash_realloc(p_ptr, oldsize, newsize) + SEGMENT **p_ptr; + int oldsize, newsize; +{ + register void *p; + + if (p = malloc(newsize)) { + memmove(p, *p_ptr, oldsize); + memset((char *)p + oldsize, 0, newsize - oldsize); + free(*p_ptr); + *p_ptr = p; + } + return (p); +} + +extern u_int +__call_hash(hashp, k, len) + HTAB *hashp; + char *k; + int len; +{ + int n, bucket; + + n = hashp->hash(k, len); + bucket = n & hashp->HIGH_MASK; + if (bucket > hashp->MAX_BUCKET) + bucket = bucket & hashp->LOW_MASK; + return (bucket); +} + +/* + * Allocate segment table. On error, destroy the table and set errno. + * + * Returns 0 on success + */ +static int +alloc_segs(hashp, nsegs) + HTAB *hashp; + int nsegs; +{ + register int i; + register SEGMENT store; + + int save_errno; + + if ((hashp->dir = + (SEGMENT *)calloc(hashp->DSIZE, sizeof(SEGMENT *))) == NULL) { + save_errno = errno; + (void)hdestroy(hashp); + errno = save_errno; + return (-1); + } + /* Allocate segments */ + if ((store = + (SEGMENT)calloc(nsegs << hashp->SSHIFT, sizeof(SEGMENT))) == NULL) { + save_errno = errno; + (void)hdestroy(hashp); + errno = save_errno; + return (-1); + } + for (i = 0; i < nsegs; i++, hashp->nsegs++) + hashp->dir[i] = &store[i << hashp->SSHIFT]; + return (0); +} + +#if BYTE_ORDER == LITTLE_ENDIAN +/* + * Hashp->hdr needs to be byteswapped. + */ +static void +swap_header_copy(srcp, destp) + HASHHDR *srcp, *destp; +{ + int i; + + P_32_COPY(srcp->magic, destp->magic); + P_32_COPY(srcp->version, destp->version); + P_32_COPY(srcp->lorder, destp->lorder); + P_32_COPY(srcp->bsize, destp->bsize); + P_32_COPY(srcp->bshift, destp->bshift); + P_32_COPY(srcp->dsize, destp->dsize); + P_32_COPY(srcp->ssize, destp->ssize); + P_32_COPY(srcp->sshift, destp->sshift); + P_32_COPY(srcp->ovfl_point, destp->ovfl_point); + P_32_COPY(srcp->last_freed, destp->last_freed); + P_32_COPY(srcp->max_bucket, destp->max_bucket); + P_32_COPY(srcp->high_mask, destp->high_mask); + P_32_COPY(srcp->low_mask, destp->low_mask); + P_32_COPY(srcp->ffactor, destp->ffactor); + P_32_COPY(srcp->nkeys, destp->nkeys); + P_32_COPY(srcp->hdrpages, destp->hdrpages); + P_32_COPY(srcp->h_charkey, destp->h_charkey); + for (i = 0; i < NCACHED; i++) { + P_32_COPY(srcp->spares[i], destp->spares[i]); + P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]); + } +} + +static void +swap_header(hashp) + HTAB *hashp; +{ + HASHHDR *hdrp; + int i; + + hdrp = &hashp->hdr; + + M_32_SWAP(hdrp->magic); + M_32_SWAP(hdrp->version); + M_32_SWAP(hdrp->lorder); + M_32_SWAP(hdrp->bsize); + M_32_SWAP(hdrp->bshift); + M_32_SWAP(hdrp->dsize); + M_32_SWAP(hdrp->ssize); + M_32_SWAP(hdrp->sshift); + M_32_SWAP(hdrp->ovfl_point); + M_32_SWAP(hdrp->last_freed); + M_32_SWAP(hdrp->max_bucket); + M_32_SWAP(hdrp->high_mask); + M_32_SWAP(hdrp->low_mask); + M_32_SWAP(hdrp->ffactor); + M_32_SWAP(hdrp->nkeys); + M_32_SWAP(hdrp->hdrpages); + M_32_SWAP(hdrp->h_charkey); + for (i = 0; i < NCACHED; i++) { + M_32_SWAP(hdrp->spares[i]); + M_16_SWAP(hdrp->bitmaps[i]); + } +} +#endif diff --git a/lib/libc/db/hash/hash.h b/lib/libc/db/hash/hash.h new file mode 100644 index 0000000..cfbedaa --- /dev/null +++ b/lib/libc/db/hash/hash.h @@ -0,0 +1,284 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)hash.h 8.2 (Berkeley) 2/21/94 + */ + +/* Operations */ +typedef enum { + HASH_GET, HASH_PUT, HASH_PUTNEW, HASH_DELETE, HASH_FIRST, HASH_NEXT +} ACTION; + +/* Buffer Management structures */ +typedef struct _bufhead BUFHEAD; + +struct _bufhead { + BUFHEAD *prev; /* LRU links */ + BUFHEAD *next; /* LRU links */ + BUFHEAD *ovfl; /* Overflow page buffer header */ + u_int addr; /* Address of this page */ + char *page; /* Actual page data */ + char flags; +#define BUF_MOD 0x0001 +#define BUF_DISK 0x0002 +#define BUF_BUCKET 0x0004 +#define BUF_PIN 0x0008 +}; + +#define IS_BUCKET(X) ((X) & BUF_BUCKET) + +typedef BUFHEAD **SEGMENT; + +/* Hash Table Information */ +typedef struct hashhdr { /* Disk resident portion */ + int magic; /* Magic NO for hash tables */ + int version; /* Version ID */ + long lorder; /* Byte Order */ + int bsize; /* Bucket/Page Size */ + int bshift; /* Bucket shift */ + int dsize; /* Directory Size */ + int ssize; /* Segment Size */ + int sshift; /* Segment shift */ + int ovfl_point; /* Where overflow pages are being allocated */ + int last_freed; /* Last overflow page freed */ + int max_bucket; /* ID of Maximum bucket in use */ + int high_mask; /* Mask to modulo into entire table */ + int low_mask; /* Mask to modulo into lower half of table */ + int ffactor; /* Fill factor */ + int nkeys; /* Number of keys in hash table */ + int hdrpages; /* Size of table header */ + int h_charkey; /* value of hash(CHARKEY) */ +#define NCACHED 32 /* number of bit maps and spare points */ + int spares[NCACHED];/* spare pages for overflow */ + u_short bitmaps[NCACHED]; /* address of overflow page bitmaps */ +} HASHHDR; + +typedef struct htab { /* Memory resident data structure */ + HASHHDR hdr; /* Header */ + int nsegs; /* Number of allocated segments */ + int exsegs; /* Number of extra allocated segments */ + u_int32_t /* Hash function */ + (*hash)__P((const void *, size_t)); + int flags; /* Flag values */ + int fp; /* File pointer */ + char *tmp_buf; /* Temporary Buffer for BIG data */ + char *tmp_key; /* Temporary Buffer for BIG keys */ + BUFHEAD *cpage; /* Current page */ + int cbucket; /* Current bucket */ + int cndx; /* Index of next item on cpage */ + int errno; /* Error Number -- for DBM compatability */ + int new_file; /* Indicates if fd is backing store or no */ + int save_file; /* Indicates whether we need to flush file at + * exit */ + u_long *mapp[NCACHED]; /* Pointers to page maps */ + int nmaps; /* Initial number of bitmaps */ + int nbufs; /* Number of buffers left to allocate */ + BUFHEAD bufhead; /* Header of buffer lru list */ + SEGMENT *dir; /* Hash Bucket directory */ +} HTAB; + +/* + * Constants + */ +#define MAX_BSIZE 65536 /* 2^16 */ +#define MIN_BUFFERS 6 +#define MINHDRSIZE 512 +#define DEF_BUFSIZE 65536 /* 64 K */ +#define DEF_BUCKET_SIZE 4096 +#define DEF_BUCKET_SHIFT 12 /* log2(BUCKET) */ +#define DEF_SEGSIZE 256 +#define DEF_SEGSIZE_SHIFT 8 /* log2(SEGSIZE) */ +#define DEF_DIRSIZE 256 +#define DEF_FFACTOR 65536 +#define MIN_FFACTOR 4 +#define SPLTMAX 8 +#define CHARKEY "%$sniglet^&" +#define NUMKEY 1038583 +#define BYTE_SHIFT 3 +#define INT_TO_BYTE 2 +#define INT_BYTE_SHIFT 5 +#define ALL_SET ((u_int)0xFFFFFFFF) +#define ALL_CLEAR 0 + +#define PTROF(X) ((BUFHEAD *)((u_int)(X)&~0x3)) +#define ISMOD(X) ((u_int)(X)&0x1) +#define DOMOD(X) ((X) = (char *)((u_int)(X)|0x1)) +#define ISDISK(X) ((u_int)(X)&0x2) +#define DODISK(X) ((X) = (char *)((u_int)(X)|0x2)) + +#define BITS_PER_MAP 32 + +/* Given the address of the beginning of a big map, clear/set the nth bit */ +#define CLRBIT(A, N) ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP))) +#define SETBIT(A, N) ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP))) +#define ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP))) + +/* Overflow management */ +/* + * Overflow page numbers are allocated per split point. At each doubling of + * the table, we can allocate extra pages. So, an overflow page number has + * the top 5 bits indicate which split point and the lower 11 bits indicate + * which page at that split point is indicated (pages within split points are + * numberered starting with 1). + */ + +#define SPLITSHIFT 11 +#define SPLITMASK 0x7FF +#define SPLITNUM(N) (((u_int)(N)) >> SPLITSHIFT) +#define OPAGENUM(N) ((N) & SPLITMASK) +#define OADDR_OF(S,O) ((u_int)((u_int)(S) << SPLITSHIFT) + (O)) + +#define BUCKET_TO_PAGE(B) \ + (B) + hashp->HDRPAGES + ((B) ? hashp->SPARES[__log2((B)+1)-1] : 0) +#define OADDR_TO_PAGE(B) \ + BUCKET_TO_PAGE ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B)); + +/* + * page.h contains a detailed description of the page format. + * + * Normally, keys and data are accessed from offset tables in the top of + * each page which point to the beginning of the key and data. There are + * four flag values which may be stored in these offset tables which indicate + * the following: + * + * + * OVFLPAGE Rather than a key data pair, this pair contains + * the address of an overflow page. The format of + * the pair is: + * OVERFLOW_PAGE_NUMBER OVFLPAGE + * + * PARTIAL_KEY This must be the first key/data pair on a page + * and implies that page contains only a partial key. + * That is, the key is too big to fit on a single page + * so it starts on this page and continues on the next. + * The format of the page is: + * KEY_OFF PARTIAL_KEY OVFL_PAGENO OVFLPAGE + * + * KEY_OFF -- offset of the beginning of the key + * PARTIAL_KEY -- 1 + * OVFL_PAGENO - page number of the next overflow page + * OVFLPAGE -- 0 + * + * FULL_KEY This must be the first key/data pair on the page. It + * is used in two cases. + * + * Case 1: + * There is a complete key on the page but no data + * (because it wouldn't fit). The next page contains + * the data. + * + * Page format it: + * KEY_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE + * + * KEY_OFF -- offset of the beginning of the key + * FULL_KEY -- 2 + * OVFL_PAGENO - page number of the next overflow page + * OVFLPAGE -- 0 + * + * Case 2: + * This page contains no key, but part of a large + * data field, which is continued on the next page. + * + * Page format it: + * DATA_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE + * + * KEY_OFF -- offset of the beginning of the data on + * this page + * FULL_KEY -- 2 + * OVFL_PAGENO - page number of the next overflow page + * OVFLPAGE -- 0 + * + * FULL_KEY_DATA + * This must be the first key/data pair on the page. + * There are two cases: + * + * Case 1: + * This page contains a key and the beginning of the + * data field, but the data field is continued on the + * next page. + * + * Page format is: + * KEY_OFF FULL_KEY_DATA OVFL_PAGENO DATA_OFF + * + * KEY_OFF -- offset of the beginning of the key + * FULL_KEY_DATA -- 3 + * OVFL_PAGENO - page number of the next overflow page + * DATA_OFF -- offset of the beginning of the data + * + * Case 2: + * This page contains the last page of a big data pair. + * There is no key, only the tail end of the data + * on this page. + * + * Page format is: + * DATA_OFF FULL_KEY_DATA <OVFL_PAGENO> <OVFLPAGE> + * + * DATA_OFF -- offset of the beginning of the data on + * this page + * FULL_KEY_DATA -- 3 + * OVFL_PAGENO - page number of the next overflow page + * OVFLPAGE -- 0 + * + * OVFL_PAGENO and OVFLPAGE are optional (they are + * not present if there is no next page). + */ + +#define OVFLPAGE 0 +#define PARTIAL_KEY 1 +#define FULL_KEY 2 +#define FULL_KEY_DATA 3 +#define REAL_KEY 4 + +/* Short hands for accessing structure */ +#define BSIZE hdr.bsize +#define BSHIFT hdr.bshift +#define DSIZE hdr.dsize +#define SGSIZE hdr.ssize +#define SSHIFT hdr.sshift +#define LORDER hdr.lorder +#define OVFL_POINT hdr.ovfl_point +#define LAST_FREED hdr.last_freed +#define MAX_BUCKET hdr.max_bucket +#define FFACTOR hdr.ffactor +#define HIGH_MASK hdr.high_mask +#define LOW_MASK hdr.low_mask +#define NKEYS hdr.nkeys +#define HDRPAGES hdr.hdrpages +#define SPARES hdr.spares +#define BITMAPS hdr.bitmaps +#define VERSION hdr.version +#define MAGIC hdr.magic +#define NEXT_FREE hdr.next_free +#define H_CHARKEY hdr.h_charkey diff --git a/lib/libc/db/hash/hash_bigkey.c b/lib/libc/db/hash/hash_bigkey.c new file mode 100644 index 0000000..b747fbc --- /dev/null +++ b/lib/libc/db/hash/hash_bigkey.c @@ -0,0 +1,667 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash_bigkey.c 8.2 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +/* + * PACKAGE: hash + * DESCRIPTION: + * Big key/data handling for the hashing package. + * + * ROUTINES: + * External + * __big_keydata + * __big_split + * __big_insert + * __big_return + * __big_delete + * __find_last_page + * Internal + * collect_key + * collect_data + */ + +#include <sys/param.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef DEBUG +#include <assert.h> +#endif + +#include <db.h> +#include "hash.h" +#include "page.h" +#include "extern.h" + +static int collect_key __P((HTAB *, BUFHEAD *, int, DBT *, int)); +static int collect_data __P((HTAB *, BUFHEAD *, int, int)); + +/* + * Big_insert + * + * You need to do an insert and the key/data pair is too big + * + * Returns: + * 0 ==> OK + *-1 ==> ERROR + */ +extern int +__big_insert(hashp, bufp, key, val) + HTAB *hashp; + BUFHEAD *bufp; + const DBT *key, *val; +{ + register u_short *p; + int key_size, n, val_size; + u_short space, move_bytes, off; + char *cp, *key_data, *val_data; + + cp = bufp->page; /* Character pointer of p. */ + p = (u_short *)cp; + + key_data = (char *)key->data; + key_size = key->size; + val_data = (char *)val->data; + val_size = val->size; + + /* First move the Key */ + for (space = FREESPACE(p) - BIGOVERHEAD; key_size; + space = FREESPACE(p) - BIGOVERHEAD) { + move_bytes = MIN(space, key_size); + off = OFFSET(p) - move_bytes; + memmove(cp + off, key_data, move_bytes); + key_size -= move_bytes; + key_data += move_bytes; + n = p[0]; + p[++n] = off; + p[0] = ++n; + FREESPACE(p) = off - PAGE_META(n); + OFFSET(p) = off; + p[n] = PARTIAL_KEY; + bufp = __add_ovflpage(hashp, bufp); + if (!bufp) + return (-1); + n = p[0]; + if (!key_size) + if (FREESPACE(p)) { + move_bytes = MIN(FREESPACE(p), val_size); + off = OFFSET(p) - move_bytes; + p[n] = off; + memmove(cp + off, val_data, move_bytes); + val_data += move_bytes; + val_size -= move_bytes; + p[n - 2] = FULL_KEY_DATA; + FREESPACE(p) = FREESPACE(p) - move_bytes; + OFFSET(p) = off; + } else + p[n - 2] = FULL_KEY; + p = (u_short *)bufp->page; + cp = bufp->page; + bufp->flags |= BUF_MOD; + } + + /* Now move the data */ + for (space = FREESPACE(p) - BIGOVERHEAD; val_size; + space = FREESPACE(p) - BIGOVERHEAD) { + move_bytes = MIN(space, val_size); + /* + * Here's the hack to make sure that if the data ends on the + * same page as the key ends, FREESPACE is at least one. + */ + if (space == val_size && val_size == val->size) + move_bytes--; + off = OFFSET(p) - move_bytes; + memmove(cp + off, val_data, move_bytes); + val_size -= move_bytes; + val_data += move_bytes; + n = p[0]; + p[++n] = off; + p[0] = ++n; + FREESPACE(p) = off - PAGE_META(n); + OFFSET(p) = off; + if (val_size) { + p[n] = FULL_KEY; + bufp = __add_ovflpage(hashp, bufp); + if (!bufp) + return (-1); + cp = bufp->page; + p = (u_short *)cp; + } else + p[n] = FULL_KEY_DATA; + bufp->flags |= BUF_MOD; + } + return (0); +} + +/* + * Called when bufp's page contains a partial key (index should be 1) + * + * All pages in the big key/data pair except bufp are freed. We cannot + * free bufp because the page pointing to it is lost and we can't get rid + * of its pointer. + * + * Returns: + * 0 => OK + *-1 => ERROR + */ +extern int +__big_delete(hashp, bufp) + HTAB *hashp; + BUFHEAD *bufp; +{ + register BUFHEAD *last_bfp, *rbufp; + u_short *bp, pageno; + int key_done, n; + + rbufp = bufp; + last_bfp = NULL; + bp = (u_short *)bufp->page; + pageno = 0; + key_done = 0; + + while (!key_done || (bp[2] != FULL_KEY_DATA)) { + if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) + key_done = 1; + + /* + * If there is freespace left on a FULL_KEY_DATA page, then + * the data is short and fits entirely on this page, and this + * is the last page. + */ + if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) + break; + pageno = bp[bp[0] - 1]; + rbufp->flags |= BUF_MOD; + rbufp = __get_buf(hashp, pageno, rbufp, 0); + if (last_bfp) + __free_ovflpage(hashp, last_bfp); + last_bfp = rbufp; + if (!rbufp) + return (-1); /* Error. */ + bp = (u_short *)rbufp->page; + } + + /* + * If we get here then rbufp points to the last page of the big + * key/data pair. Bufp points to the first one -- it should now be + * empty pointing to the next page after this pair. Can't free it + * because we don't have the page pointing to it. + */ + + /* This is information from the last page of the pair. */ + n = bp[0]; + pageno = bp[n - 1]; + + /* Now, bp is the first page of the pair. */ + bp = (u_short *)bufp->page; + if (n > 2) { + /* There is an overflow page. */ + bp[1] = pageno; + bp[2] = OVFLPAGE; + bufp->ovfl = rbufp->ovfl; + } else + /* This is the last page. */ + bufp->ovfl = NULL; + n -= 2; + bp[0] = n; + FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); + OFFSET(bp) = hashp->BSIZE - 1; + + bufp->flags |= BUF_MOD; + if (rbufp) + __free_ovflpage(hashp, rbufp); + if (last_bfp != rbufp) + __free_ovflpage(hashp, last_bfp); + + hashp->NKEYS--; + return (0); +} +/* + * Returns: + * 0 = key not found + * -1 = get next overflow page + * -2 means key not found and this is big key/data + * -3 error + */ +extern int +__find_bigpair(hashp, bufp, ndx, key, size) + HTAB *hashp; + BUFHEAD *bufp; + int ndx; + char *key; + int size; +{ + register u_short *bp; + register char *p; + int ksize; + u_short bytes; + char *kkey; + + bp = (u_short *)bufp->page; + p = bufp->page; + ksize = size; + kkey = key; + + for (bytes = hashp->BSIZE - bp[ndx]; + bytes <= size && bp[ndx + 1] == PARTIAL_KEY; + bytes = hashp->BSIZE - bp[ndx]) { + if (memcmp(p + bp[ndx], kkey, bytes)) + return (-2); + kkey += bytes; + ksize -= bytes; + bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0); + if (!bufp) + return (-3); + p = bufp->page; + bp = (u_short *)p; + ndx = 1; + } + + if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { +#ifdef HASH_STATISTICS + ++hash_collisions; +#endif + return (-2); + } else + return (ndx); +} + +/* + * Given the buffer pointer of the first overflow page of a big pair, + * find the end of the big pair + * + * This will set bpp to the buffer header of the last page of the big pair. + * It will return the pageno of the overflow page following the last page + * of the pair; 0 if there isn't any (i.e. big pair is the last key in the + * bucket) + */ +extern u_short +__find_last_page(hashp, bpp) + HTAB *hashp; + BUFHEAD **bpp; +{ + BUFHEAD *bufp; + u_short *bp, pageno; + int n; + + bufp = *bpp; + bp = (u_short *)bufp->page; + for (;;) { + n = bp[0]; + + /* + * This is the last page if: the tag is FULL_KEY_DATA and + * either only 2 entries OVFLPAGE marker is explicit there + * is freespace on the page. + */ + if (bp[2] == FULL_KEY_DATA && + ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) + break; + + pageno = bp[n - 1]; + bufp = __get_buf(hashp, pageno, bufp, 0); + if (!bufp) + return (0); /* Need to indicate an error! */ + bp = (u_short *)bufp->page; + } + + *bpp = bufp; + if (bp[0] > 2) + return (bp[3]); + else + return (0); +} + +/* + * Return the data for the key/data pair that begins on this page at this + * index (index should always be 1). + */ +extern int +__big_return(hashp, bufp, ndx, val, set_current) + HTAB *hashp; + BUFHEAD *bufp; + int ndx; + DBT *val; + int set_current; +{ + BUFHEAD *save_p; + u_short *bp, len, off, save_addr; + char *tp; + + bp = (u_short *)bufp->page; + while (bp[ndx + 1] == PARTIAL_KEY) { + bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); + if (!bufp) + return (-1); + bp = (u_short *)bufp->page; + ndx = 1; + } + + if (bp[ndx + 1] == FULL_KEY) { + bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); + if (!bufp) + return (-1); + bp = (u_short *)bufp->page; + save_p = bufp; + save_addr = save_p->addr; + off = bp[1]; + len = 0; + } else + if (!FREESPACE(bp)) { + /* + * This is a hack. We can't distinguish between + * FULL_KEY_DATA that contains complete data or + * incomplete data, so we require that if the data + * is complete, there is at least 1 byte of free + * space left. + */ + off = bp[bp[0]]; + len = bp[1] - off; + save_p = bufp; + save_addr = bufp->addr; + bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); + if (!bufp) + return (-1); + bp = (u_short *)bufp->page; + } else { + /* The data is all on one page. */ + tp = (char *)bp; + off = bp[bp[0]]; + val->data = (u_char *)tp + off; + val->size = bp[1] - off; + if (set_current) { + if (bp[0] == 2) { /* No more buckets in + * chain */ + hashp->cpage = NULL; + hashp->cbucket++; + hashp->cndx = 1; + } else { + hashp->cpage = __get_buf(hashp, + bp[bp[0] - 1], bufp, 0); + if (!hashp->cpage) + return (-1); + hashp->cndx = 1; + if (!((u_short *) + hashp->cpage->page)[0]) { + hashp->cbucket++; + hashp->cpage = NULL; + } + } + } + return (0); + } + + val->size = collect_data(hashp, bufp, (int)len, set_current); + if (val->size == -1) + return (-1); + if (save_p->addr != save_addr) { + /* We are pretty short on buffers. */ + errno = EINVAL; /* OUT OF BUFFERS */ + return (-1); + } + memmove(hashp->tmp_buf, (save_p->page) + off, len); + val->data = (u_char *)hashp->tmp_buf; + return (0); +} +/* + * Count how big the total datasize is by recursing through the pages. Then + * allocate a buffer and copy the data as you recurse up. + */ +static int +collect_data(hashp, bufp, len, set) + HTAB *hashp; + BUFHEAD *bufp; + int len, set; +{ + register u_short *bp; + register char *p; + BUFHEAD *xbp; + u_short save_addr; + int mylen, totlen; + + p = bufp->page; + bp = (u_short *)p; + mylen = hashp->BSIZE - bp[1]; + save_addr = bufp->addr; + + if (bp[2] == FULL_KEY_DATA) { /* End of Data */ + totlen = len + mylen; + if (hashp->tmp_buf) + free(hashp->tmp_buf); + if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL) + return (-1); + if (set) { + hashp->cndx = 1; + if (bp[0] == 2) { /* No more buckets in chain */ + hashp->cpage = NULL; + hashp->cbucket++; + } else { + hashp->cpage = + __get_buf(hashp, bp[bp[0] - 1], bufp, 0); + if (!hashp->cpage) + return (-1); + else if (!((u_short *)hashp->cpage->page)[0]) { + hashp->cbucket++; + hashp->cpage = NULL; + } + } + } + } else { + xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); + if (!xbp || ((totlen = + collect_data(hashp, xbp, len + mylen, set)) < 1)) + return (-1); + } + if (bufp->addr != save_addr) { + errno = EINVAL; /* Out of buffers. */ + return (-1); + } + memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen); + return (totlen); +} + +/* + * Fill in the key and data for this big pair. + */ +extern int +__big_keydata(hashp, bufp, key, val, set) + HTAB *hashp; + BUFHEAD *bufp; + DBT *key, *val; + int set; +{ + key->size = collect_key(hashp, bufp, 0, val, set); + if (key->size == -1) + return (-1); + key->data = (u_char *)hashp->tmp_key; + return (0); +} + +/* + * Count how big the total key size is by recursing through the pages. Then + * collect the data, allocate a buffer and copy the key as you recurse up. + */ +static int +collect_key(hashp, bufp, len, val, set) + HTAB *hashp; + BUFHEAD *bufp; + int len; + DBT *val; + int set; +{ + BUFHEAD *xbp; + char *p; + int mylen, totlen; + u_short *bp, save_addr; + + p = bufp->page; + bp = (u_short *)p; + mylen = hashp->BSIZE - bp[1]; + + save_addr = bufp->addr; + totlen = len + mylen; + if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ + if (hashp->tmp_key != NULL) + free(hashp->tmp_key); + if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL) + return (-1); + if (__big_return(hashp, bufp, 1, val, set)) + return (-1); + } else { + xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); + if (!xbp || ((totlen = + collect_key(hashp, xbp, totlen, val, set)) < 1)) + return (-1); + } + if (bufp->addr != save_addr) { + errno = EINVAL; /* MIS -- OUT OF BUFFERS */ + return (-1); + } + memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen); + return (totlen); +} + +/* + * Returns: + * 0 => OK + * -1 => error + */ +extern int +__big_split(hashp, op, np, big_keyp, addr, obucket, ret) + HTAB *hashp; + BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */ + BUFHEAD *np; /* Pointer to new bucket page */ + /* Pointer to first page containing the big key/data */ + BUFHEAD *big_keyp; + int addr; /* Address of big_keyp */ + u_int obucket;/* Old Bucket */ + SPLIT_RETURN *ret; +{ + register BUFHEAD *tmpp; + register u_short *tp; + BUFHEAD *bp; + DBT key, val; + u_int change; + u_short free_space, n, off; + + bp = big_keyp; + + /* Now figure out where the big key/data goes */ + if (__big_keydata(hashp, big_keyp, &key, &val, 0)) + return (-1); + change = (__call_hash(hashp, key.data, key.size) != obucket); + + if (ret->next_addr = __find_last_page(hashp, &big_keyp)) { + if (!(ret->nextp = + __get_buf(hashp, ret->next_addr, big_keyp, 0))) + return (-1);; + } else + ret->nextp = NULL; + + /* Now make one of np/op point to the big key/data pair */ +#ifdef DEBUG + assert(np->ovfl == NULL); +#endif + if (change) + tmpp = np; + else + tmpp = op; + + tmpp->flags |= BUF_MOD; +#ifdef DEBUG1 + (void)fprintf(stderr, + "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, + (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); +#endif + tmpp->ovfl = bp; /* one of op/np point to big_keyp */ + tp = (u_short *)tmpp->page; +#ifdef DEBUG + assert(FREESPACE(tp) >= OVFLSIZE); +#endif + n = tp[0]; + off = OFFSET(tp); + free_space = FREESPACE(tp); + tp[++n] = (u_short)addr; + tp[++n] = OVFLPAGE; + tp[0] = n; + OFFSET(tp) = off; + FREESPACE(tp) = free_space - OVFLSIZE; + + /* + * Finally, set the new and old return values. BIG_KEYP contains a + * pointer to the last page of the big key_data pair. Make sure that + * big_keyp has no following page (2 elements) or create an empty + * following page. + */ + + ret->newp = np; + ret->oldp = op; + + tp = (u_short *)big_keyp->page; + big_keyp->flags |= BUF_MOD; + if (tp[0] > 2) { + /* + * There may be either one or two offsets on this page. If + * there is one, then the overflow page is linked on normally + * and tp[4] is OVFLPAGE. If there are two, tp[4] contains + * the second offset and needs to get stuffed in after the + * next overflow page is added. + */ + n = tp[4]; + free_space = FREESPACE(tp); + off = OFFSET(tp); + tp[0] -= 2; + FREESPACE(tp) = free_space + OVFLSIZE; + OFFSET(tp) = off; + tmpp = __add_ovflpage(hashp, big_keyp); + if (!tmpp) + return (-1); + tp[4] = n; + } else + tmpp = big_keyp; + + if (change) + ret->newp = tmpp; + else + ret->oldp = tmpp; + return (0); +} diff --git a/lib/libc/db/hash/hash_buf.c b/lib/libc/db/hash/hash_buf.c new file mode 100644 index 0000000..1362c83 --- /dev/null +++ b/lib/libc/db/hash/hash_buf.c @@ -0,0 +1,347 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash_buf.c 8.2 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +/* + * PACKAGE: hash + * + * DESCRIPTION: + * Contains buffer management + * + * ROUTINES: + * External + * __buf_init + * __get_buf + * __buf_free + * __reclaim_buf + * Internal + * newbuf + */ + +#include <sys/param.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#ifdef DEBUG +#include <assert.h> +#endif + +#include <db.h> +#include "hash.h" +#include "page.h" +#include "extern.h" + +static BUFHEAD *newbuf __P((HTAB *, u_int, BUFHEAD *)); + +/* Unlink B from its place in the lru */ +#define BUF_REMOVE(B) { \ + (B)->prev->next = (B)->next; \ + (B)->next->prev = (B)->prev; \ +} + +/* Insert B after P */ +#define BUF_INSERT(B, P) { \ + (B)->next = (P)->next; \ + (B)->prev = (P); \ + (P)->next = (B); \ + (B)->next->prev = (B); \ +} + +#define MRU hashp->bufhead.next +#define LRU hashp->bufhead.prev + +#define MRU_INSERT(B) BUF_INSERT((B), &hashp->bufhead) +#define LRU_INSERT(B) BUF_INSERT((B), LRU) + +/* + * We are looking for a buffer with address "addr". If prev_bp is NULL, then + * address is a bucket index. If prev_bp is not NULL, then it points to the + * page previous to an overflow page that we are trying to find. + * + * CAVEAT: The buffer header accessed via prev_bp's ovfl field may no longer + * be valid. Therefore, you must always verify that its address matches the + * address you are seeking. + */ +extern BUFHEAD * +__get_buf(hashp, addr, prev_bp, newpage) + HTAB *hashp; + u_int addr; + BUFHEAD *prev_bp; + int newpage; /* If prev_bp set, indicates a new overflow page. */ +{ + register BUFHEAD *bp; + register u_int is_disk_mask; + register int is_disk, segment_ndx; + SEGMENT segp; + + is_disk = 0; + is_disk_mask = 0; + if (prev_bp) { + bp = prev_bp->ovfl; + if (!bp || (bp->addr != addr)) + bp = NULL; + if (!newpage) + is_disk = BUF_DISK; + } else { + /* Grab buffer out of directory */ + segment_ndx = addr & (hashp->SGSIZE - 1); + + /* valid segment ensured by __call_hash() */ + segp = hashp->dir[addr >> hashp->SSHIFT]; +#ifdef DEBUG + assert(segp != NULL); +#endif + bp = PTROF(segp[segment_ndx]); + is_disk_mask = ISDISK(segp[segment_ndx]); + is_disk = is_disk_mask || !hashp->new_file; + } + + if (!bp) { + bp = newbuf(hashp, addr, prev_bp); + if (!bp || + __get_page(hashp, bp->page, addr, !prev_bp, is_disk, 0)) + return (NULL); + if (!prev_bp) + segp[segment_ndx] = + (BUFHEAD *)((u_int)bp | is_disk_mask); + } else { + BUF_REMOVE(bp); + MRU_INSERT(bp); + } + return (bp); +} + +/* + * We need a buffer for this page. Either allocate one, or evict a resident + * one (if we have as many buffers as we're allowed) and put this one in. + * + * If newbuf finds an error (returning NULL), it also sets errno. + */ +static BUFHEAD * +newbuf(hashp, addr, prev_bp) + HTAB *hashp; + u_int addr; + BUFHEAD *prev_bp; +{ + register BUFHEAD *bp; /* The buffer we're going to use */ + register BUFHEAD *xbp; /* Temp pointer */ + register BUFHEAD *next_xbp; + SEGMENT segp; + int segment_ndx; + u_short oaddr, *shortp; + + oaddr = 0; + bp = LRU; + /* + * If LRU buffer is pinned, the buffer pool is too small. We need to + * allocate more buffers. + */ + if (hashp->nbufs || (bp->flags & BUF_PIN)) { + /* Allocate a new one */ + if ((bp = (BUFHEAD *)malloc(sizeof(BUFHEAD))) == NULL) + return (NULL); + if ((bp->page = (char *)malloc(hashp->BSIZE)) == NULL) { + free(bp); + return (NULL); + } + if (hashp->nbufs) + hashp->nbufs--; + } else { + /* Kick someone out */ + BUF_REMOVE(bp); + /* + * If this is an overflow page with addr 0, it's already been + * flushed back in an overflow chain and initialized. + */ + if ((bp->addr != 0) || (bp->flags & BUF_BUCKET)) { + /* + * Set oaddr before __put_page so that you get it + * before bytes are swapped. + */ + shortp = (u_short *)bp->page; + if (shortp[0]) + oaddr = shortp[shortp[0] - 1]; + if ((bp->flags & BUF_MOD) && __put_page(hashp, bp->page, + bp->addr, (int)IS_BUCKET(bp->flags), 0)) + return (NULL); + /* + * Update the pointer to this page (i.e. invalidate it). + * + * If this is a new file (i.e. we created it at open + * time), make sure that we mark pages which have been + * written to disk so we retrieve them from disk later, + * rather than allocating new pages. + */ + if (IS_BUCKET(bp->flags)) { + segment_ndx = bp->addr & (hashp->SGSIZE - 1); + segp = hashp->dir[bp->addr >> hashp->SSHIFT]; +#ifdef DEBUG + assert(segp != NULL); +#endif + + if (hashp->new_file && + ((bp->flags & BUF_MOD) || + ISDISK(segp[segment_ndx]))) + segp[segment_ndx] = (BUFHEAD *)BUF_DISK; + else + segp[segment_ndx] = NULL; + } + /* + * Since overflow pages can only be access by means of + * their bucket, free overflow pages associated with + * this bucket. + */ + for (xbp = bp; xbp->ovfl;) { + next_xbp = xbp->ovfl; + xbp->ovfl = 0; + xbp = next_xbp; + + /* Check that ovfl pointer is up date. */ + if (IS_BUCKET(xbp->flags) || + (oaddr != xbp->addr)) + break; + + shortp = (u_short *)xbp->page; + if (shortp[0]) + /* set before __put_page */ + oaddr = shortp[shortp[0] - 1]; + if ((xbp->flags & BUF_MOD) && __put_page(hashp, + xbp->page, xbp->addr, 0, 0)) + return (NULL); + xbp->addr = 0; + xbp->flags = 0; + BUF_REMOVE(xbp); + LRU_INSERT(xbp); + } + } + } + + /* Now assign this buffer */ + bp->addr = addr; +#ifdef DEBUG1 + (void)fprintf(stderr, "NEWBUF1: %d->ovfl was %d is now %d\n", + bp->addr, (bp->ovfl ? bp->ovfl->addr : 0), 0); +#endif + bp->ovfl = NULL; + if (prev_bp) { + /* + * If prev_bp is set, this is an overflow page, hook it in to + * the buffer overflow links. + */ +#ifdef DEBUG1 + (void)fprintf(stderr, "NEWBUF2: %d->ovfl was %d is now %d\n", + prev_bp->addr, (prev_bp->ovfl ? bp->ovfl->addr : 0), + (bp ? bp->addr : 0)); +#endif + prev_bp->ovfl = bp; + bp->flags = 0; + } else + bp->flags = BUF_BUCKET; + MRU_INSERT(bp); + return (bp); +} + +extern void +__buf_init(hashp, nbytes) + HTAB *hashp; + int nbytes; +{ + BUFHEAD *bfp; + int npages; + + bfp = &(hashp->bufhead); + npages = (nbytes + hashp->BSIZE - 1) >> hashp->BSHIFT; + npages = MAX(npages, MIN_BUFFERS); + + hashp->nbufs = npages; + bfp->next = bfp; + bfp->prev = bfp; + /* + * This space is calloc'd so these are already null. + * + * bfp->ovfl = NULL; + * bfp->flags = 0; + * bfp->page = NULL; + * bfp->addr = 0; + */ +} + +extern int +__buf_free(hashp, do_free, to_disk) + HTAB *hashp; + int do_free, to_disk; +{ + BUFHEAD *bp; + + /* Need to make sure that buffer manager has been initialized */ + if (!LRU) + return (0); + for (bp = LRU; bp != &hashp->bufhead;) { + /* Check that the buffer is valid */ + if (bp->addr || IS_BUCKET(bp->flags)) { + if (to_disk && (bp->flags & BUF_MOD) && + __put_page(hashp, bp->page, + bp->addr, IS_BUCKET(bp->flags), 0)) + return (-1); + } + /* Check if we are freeing stuff */ + if (do_free) { + if (bp->page) + free(bp->page); + BUF_REMOVE(bp); + free(bp); + bp = LRU; + } else + bp = bp->prev; + } + return (0); +} + +extern void +__reclaim_buf(hashp, bp) + HTAB *hashp; + BUFHEAD *bp; +{ + bp->ovfl = 0; + bp->addr = 0; + bp->flags = 0; + BUF_REMOVE(bp); + LRU_INSERT(bp); +} diff --git a/lib/libc/db/hash/hash_func.c b/lib/libc/db/hash/hash_func.c new file mode 100644 index 0000000..a5ec434 --- /dev/null +++ b/lib/libc/db/hash/hash_func.c @@ -0,0 +1,212 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash_func.c 8.2 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <db.h> +#include "hash.h" +#include "page.h" +#include "extern.h" + +static u_int32_t hash1 __P((const void *, size_t)); +static u_int32_t hash2 __P((const void *, size_t)); +static u_int32_t hash3 __P((const void *, size_t)); +static u_int32_t hash4 __P((const void *, size_t)); + +/* Global default hash function */ +u_int32_t (*__default_hash) __P((const void *, size_t)) = hash4; + +/* + * HASH FUNCTIONS + * + * Assume that we've already split the bucket to which this key hashes, + * calculate that bucket, and check that in fact we did already split it. + * + * This came from ejb's hsearch. + */ + +#define PRIME1 37 +#define PRIME2 1048583 + +static u_int32_t +hash1(keyarg, len) + const void *keyarg; + register size_t len; +{ + register const u_char *key; + register u_int32_t h; + + /* Convert string to integer */ + for (key = keyarg, h = 0; len--;) + h = h * PRIME1 ^ (*key++ - ' '); + h %= PRIME2; + return (h); +} + +/* + * Phong's linear congruential hash + */ +#define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c)) + +static u_int32_t +hash2(keyarg, len) + const void *keyarg; + size_t len; +{ + register const u_char *e, *key; + register u_int32_t h; + register u_char c; + + key = keyarg; + e = key + len; + for (h = 0; key != e;) { + c = *key++; + if (!c && key > e) + break; + dcharhash(h, c); + } + return (h); +} + +/* + * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte + * units. On the first time through the loop we get the "leftover bytes" + * (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle + * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If + * this routine is heavily used enough, it's worth the ugly coding. + * + * OZ's original sdbm hash + */ +static u_int32_t +hash3(keyarg, len) + const void *keyarg; + register size_t len; +{ + register const u_char *key; + register size_t loop; + register u_int32_t h; + +#define HASHC h = *key++ + 65599 * h + + h = 0; + key = keyarg; + if (len > 0) { + loop = (len + 8 - 1) >> 3; + + switch (len & (8 - 1)) { + case 0: + do { + HASHC; + /* FALLTHROUGH */ + case 7: + HASHC; + /* FALLTHROUGH */ + case 6: + HASHC; + /* FALLTHROUGH */ + case 5: + HASHC; + /* FALLTHROUGH */ + case 4: + HASHC; + /* FALLTHROUGH */ + case 3: + HASHC; + /* FALLTHROUGH */ + case 2: + HASHC; + /* FALLTHROUGH */ + case 1: + HASHC; + } while (--loop); + } + } + return (h); +} + +/* Hash function from Chris Torek. */ +static u_int32_t +hash4(keyarg, len) + const void *keyarg; + register size_t len; +{ + register const u_char *key; + register size_t loop; + register u_int32_t h; + +#define HASH4a h = (h << 5) - h + *key++; +#define HASH4b h = (h << 5) + h + *key++; +#define HASH4 HASH4b + + h = 0; + key = keyarg; + if (len > 0) { + loop = (len + 8 - 1) >> 3; + + switch (len & (8 - 1)) { + case 0: + do { + HASH4; + /* FALLTHROUGH */ + case 7: + HASH4; + /* FALLTHROUGH */ + case 6: + HASH4; + /* FALLTHROUGH */ + case 5: + HASH4; + /* FALLTHROUGH */ + case 4: + HASH4; + /* FALLTHROUGH */ + case 3: + HASH4; + /* FALLTHROUGH */ + case 2: + HASH4; + /* FALLTHROUGH */ + case 1: + HASH4; + } while (--loop); + } + } + return (h); +} diff --git a/lib/libc/db/hash/hash_log2.c b/lib/libc/db/hash/hash_log2.c new file mode 100644 index 0000000..b773707 --- /dev/null +++ b/lib/libc/db/hash/hash_log2.c @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash_log2.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +u_int +__log2(num) + u_int num; +{ + register u_int i, limit; + + limit = 1; + for (i = 0; limit < num; limit = limit << 1, i++); + return (i); +} diff --git a/lib/libc/db/hash/hash_page.c b/lib/libc/db/hash/hash_page.c new file mode 100644 index 0000000..2b2f572 --- /dev/null +++ b/lib/libc/db/hash/hash_page.c @@ -0,0 +1,944 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hash_page.c 8.4 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +/* + * PACKAGE: hashing + * + * DESCRIPTION: + * Page manipulation for hashing package. + * + * ROUTINES: + * + * External + * __get_page + * __add_ovflpage + * Internal + * overflow_page + * open_temp + */ + +#include <sys/types.h> + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#ifdef DEBUG +#include <assert.h> +#endif + +#include <db.h> +#include "hash.h" +#include "page.h" +#include "extern.h" + +static u_long *fetch_bitmap __P((HTAB *, int)); +static u_long first_free __P((u_long)); +static int open_temp __P((HTAB *)); +static u_short overflow_page __P((HTAB *)); +static void putpair __P((char *, const DBT *, const DBT *)); +static void squeeze_key __P((u_short *, const DBT *, const DBT *)); +static int ugly_split + __P((HTAB *, u_int, BUFHEAD *, BUFHEAD *, int, int)); + +#define PAGE_INIT(P) { \ + ((u_short *)(P))[0] = 0; \ + ((u_short *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_short); \ + ((u_short *)(P))[2] = hashp->BSIZE; \ +} + +/* + * This is called AFTER we have verified that there is room on the page for + * the pair (PAIRFITS has returned true) so we go right ahead and start moving + * stuff on. + */ +static void +putpair(p, key, val) + char *p; + const DBT *key, *val; +{ + register u_short *bp, n, off; + + bp = (u_short *)p; + + /* Enter the key first. */ + n = bp[0]; + + off = OFFSET(bp) - key->size; + memmove(p + off, key->data, key->size); + bp[++n] = off; + + /* Now the data. */ + off -= val->size; + memmove(p + off, val->data, val->size); + bp[++n] = off; + + /* Adjust page info. */ + bp[0] = n; + bp[n + 1] = off - ((n + 3) * sizeof(u_short)); + bp[n + 2] = off; +} + +/* + * Returns: + * 0 OK + * -1 error + */ +extern int +__delpair(hashp, bufp, ndx) + HTAB *hashp; + BUFHEAD *bufp; + register int ndx; +{ + register u_short *bp, newoff; + register int n; + u_short pairlen; + + bp = (u_short *)bufp->page; + n = bp[0]; + + if (bp[ndx + 1] < REAL_KEY) + return (__big_delete(hashp, bufp)); + if (ndx != 1) + newoff = bp[ndx - 1]; + else + newoff = hashp->BSIZE; + pairlen = newoff - bp[ndx + 1]; + + if (ndx != (n - 1)) { + /* Hard Case -- need to shuffle keys */ + register int i; + register char *src = bufp->page + (int)OFFSET(bp); + register char *dst = src + (int)pairlen; + memmove(dst, src, bp[ndx + 1] - OFFSET(bp)); + + /* Now adjust the pointers */ + for (i = ndx + 2; i <= n; i += 2) { + if (bp[i + 1] == OVFLPAGE) { + bp[i - 2] = bp[i]; + bp[i - 1] = bp[i + 1]; + } else { + bp[i - 2] = bp[i] + pairlen; + bp[i - 1] = bp[i + 1] + pairlen; + } + } + } + /* Finally adjust the page data */ + bp[n] = OFFSET(bp) + pairlen; + bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_short); + bp[0] = n - 2; + hashp->NKEYS--; + + bufp->flags |= BUF_MOD; + return (0); +} +/* + * Returns: + * 0 ==> OK + * -1 ==> Error + */ +extern int +__split_page(hashp, obucket, nbucket) + HTAB *hashp; + u_int obucket, nbucket; +{ + register BUFHEAD *new_bufp, *old_bufp; + register u_short *ino; + register char *np; + DBT key, val; + int n, ndx, retval; + u_short copyto, diff, off, moved; + char *op; + + copyto = (u_short)hashp->BSIZE; + off = (u_short)hashp->BSIZE; + old_bufp = __get_buf(hashp, obucket, NULL, 0); + if (old_bufp == NULL) + return (-1); + new_bufp = __get_buf(hashp, nbucket, NULL, 0); + if (new_bufp == NULL) + return (-1); + + old_bufp->flags |= (BUF_MOD | BUF_PIN); + new_bufp->flags |= (BUF_MOD | BUF_PIN); + + ino = (u_short *)(op = old_bufp->page); + np = new_bufp->page; + + moved = 0; + + for (n = 1, ndx = 1; n < ino[0]; n += 2) { + if (ino[n + 1] < REAL_KEY) { + retval = ugly_split(hashp, obucket, old_bufp, new_bufp, + (int)copyto, (int)moved); + old_bufp->flags &= ~BUF_PIN; + new_bufp->flags &= ~BUF_PIN; + return (retval); + + } + key.data = (u_char *)op + ino[n]; + key.size = off - ino[n]; + + if (__call_hash(hashp, key.data, key.size) == obucket) { + /* Don't switch page */ + diff = copyto - off; + if (diff) { + copyto = ino[n + 1] + diff; + memmove(op + copyto, op + ino[n + 1], + off - ino[n + 1]); + ino[ndx] = copyto + ino[n] - ino[n + 1]; + ino[ndx + 1] = copyto; + } else + copyto = ino[n + 1]; + ndx += 2; + } else { + /* Switch page */ + val.data = (u_char *)op + ino[n + 1]; + val.size = ino[n] - ino[n + 1]; + putpair(np, &key, &val); + moved += 2; + } + + off = ino[n + 1]; + } + + /* Now clean up the page */ + ino[0] -= moved; + FREESPACE(ino) = copyto - sizeof(u_short) * (ino[0] + 3); + OFFSET(ino) = copyto; + +#ifdef DEBUG3 + (void)fprintf(stderr, "split %d/%d\n", + ((u_short *)np)[0] / 2, + ((u_short *)op)[0] / 2); +#endif + /* unpin both pages */ + old_bufp->flags &= ~BUF_PIN; + new_bufp->flags &= ~BUF_PIN; + return (0); +} + +/* + * Called when we encounter an overflow or big key/data page during split + * handling. This is special cased since we have to begin checking whether + * the key/data pairs fit on their respective pages and because we may need + * overflow pages for both the old and new pages. + * + * The first page might be a page with regular key/data pairs in which case + * we have a regular overflow condition and just need to go on to the next + * page or it might be a big key/data pair in which case we need to fix the + * big key/data pair. + * + * Returns: + * 0 ==> success + * -1 ==> failure + */ +static int +ugly_split(hashp, obucket, old_bufp, new_bufp, copyto, moved) + HTAB *hashp; + u_int obucket; /* Same as __split_page. */ + BUFHEAD *old_bufp, *new_bufp; + int copyto; /* First byte on page which contains key/data values. */ + int moved; /* Number of pairs moved to new page. */ +{ + register BUFHEAD *bufp; /* Buffer header for ino */ + register u_short *ino; /* Page keys come off of */ + register u_short *np; /* New page */ + register u_short *op; /* Page keys go on to if they aren't moving */ + + BUFHEAD *last_bfp; /* Last buf header OVFL needing to be freed */ + DBT key, val; + SPLIT_RETURN ret; + u_short n, off, ov_addr, scopyto; + char *cino; /* Character value of ino */ + + bufp = old_bufp; + ino = (u_short *)old_bufp->page; + np = (u_short *)new_bufp->page; + op = (u_short *)old_bufp->page; + last_bfp = NULL; + scopyto = (u_short)copyto; /* ANSI */ + + n = ino[0] - 1; + while (n < ino[0]) { + if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) { + if (__big_split(hashp, old_bufp, + new_bufp, bufp, bufp->addr, obucket, &ret)) + return (-1); + old_bufp = ret.oldp; + if (!old_bufp) + return (-1); + op = (u_short *)old_bufp->page; + new_bufp = ret.newp; + if (!new_bufp) + return (-1); + np = (u_short *)new_bufp->page; + bufp = ret.nextp; + if (!bufp) + return (0); + cino = (char *)bufp->page; + ino = (u_short *)cino; + last_bfp = ret.nextp; + } else if (ino[n + 1] == OVFLPAGE) { + ov_addr = ino[n]; + /* + * Fix up the old page -- the extra 2 are the fields + * which contained the overflow information. + */ + ino[0] -= (moved + 2); + FREESPACE(ino) = + scopyto - sizeof(u_short) * (ino[0] + 3); + OFFSET(ino) = scopyto; + + bufp = __get_buf(hashp, ov_addr, bufp, 0); + if (!bufp) + return (-1); + + ino = (u_short *)bufp->page; + n = 1; + scopyto = hashp->BSIZE; + moved = 0; + + if (last_bfp) + __free_ovflpage(hashp, last_bfp); + last_bfp = bufp; + } + /* Move regular sized pairs of there are any */ + off = hashp->BSIZE; + for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) { + cino = (char *)ino; + key.data = (u_char *)cino + ino[n]; + key.size = off - ino[n]; + val.data = (u_char *)cino + ino[n + 1]; + val.size = ino[n] - ino[n + 1]; + off = ino[n + 1]; + + if (__call_hash(hashp, key.data, key.size) == obucket) { + /* Keep on old page */ + if (PAIRFITS(op, (&key), (&val))) + putpair((char *)op, &key, &val); + else { + old_bufp = + __add_ovflpage(hashp, old_bufp); + if (!old_bufp) + return (-1); + op = (u_short *)old_bufp->page; + putpair((char *)op, &key, &val); + } + old_bufp->flags |= BUF_MOD; + } else { + /* Move to new page */ + if (PAIRFITS(np, (&key), (&val))) + putpair((char *)np, &key, &val); + else { + new_bufp = + __add_ovflpage(hashp, new_bufp); + if (!new_bufp) + return (-1); + np = (u_short *)new_bufp->page; + putpair((char *)np, &key, &val); + } + new_bufp->flags |= BUF_MOD; + } + } + } + if (last_bfp) + __free_ovflpage(hashp, last_bfp); + return (0); +} + +/* + * Add the given pair to the page + * + * Returns: + * 0 ==> OK + * 1 ==> failure + */ +extern int +__addel(hashp, bufp, key, val) + HTAB *hashp; + BUFHEAD *bufp; + const DBT *key, *val; +{ + register u_short *bp, *sop; + int do_expand; + + bp = (u_short *)bufp->page; + do_expand = 0; + while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY)) + /* Exception case */ + if (bp[2] == FULL_KEY_DATA && bp[0] == 2) + /* This is the last page of a big key/data pair + and we need to add another page */ + break; + else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) { + bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); + if (!bufp) + return (-1); + bp = (u_short *)bufp->page; + } else + /* Try to squeeze key on this page */ + if (FREESPACE(bp) > PAIRSIZE(key, val)) { + squeeze_key(bp, key, val); + return (0); + } else { + bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); + if (!bufp) + return (-1); + bp = (u_short *)bufp->page; + } + + if (PAIRFITS(bp, key, val)) + putpair(bufp->page, key, val); + else { + do_expand = 1; + bufp = __add_ovflpage(hashp, bufp); + if (!bufp) + return (-1); + sop = (u_short *)bufp->page; + + if (PAIRFITS(sop, key, val)) + putpair((char *)sop, key, val); + else + if (__big_insert(hashp, bufp, key, val)) + return (-1); + } + bufp->flags |= BUF_MOD; + /* + * If the average number of keys per bucket exceeds the fill factor, + * expand the table. + */ + hashp->NKEYS++; + if (do_expand || + (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR)) + return (__expand_table(hashp)); + return (0); +} + +/* + * + * Returns: + * pointer on success + * NULL on error + */ +extern BUFHEAD * +__add_ovflpage(hashp, bufp) + HTAB *hashp; + BUFHEAD *bufp; +{ + register u_short *sp; + u_short ndx, ovfl_num; +#ifdef DEBUG1 + int tmp1, tmp2; +#endif + sp = (u_short *)bufp->page; + + /* Check if we are dynamically determining the fill factor */ + if (hashp->FFACTOR == DEF_FFACTOR) { + hashp->FFACTOR = sp[0] >> 1; + if (hashp->FFACTOR < MIN_FFACTOR) + hashp->FFACTOR = MIN_FFACTOR; + } + bufp->flags |= BUF_MOD; + ovfl_num = overflow_page(hashp); +#ifdef DEBUG1 + tmp1 = bufp->addr; + tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0; +#endif + if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, ovfl_num, bufp, 1))) + return (NULL); + bufp->ovfl->flags |= BUF_MOD; +#ifdef DEBUG1 + (void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n", + tmp1, tmp2, bufp->ovfl->addr); +#endif + ndx = sp[0]; + /* + * Since a pair is allocated on a page only if there's room to add + * an overflow page, we know that the OVFL information will fit on + * the page. + */ + sp[ndx + 4] = OFFSET(sp); + sp[ndx + 3] = FREESPACE(sp) - OVFLSIZE; + sp[ndx + 1] = ovfl_num; + sp[ndx + 2] = OVFLPAGE; + sp[0] = ndx + 2; +#ifdef HASH_STATISTICS + hash_overflows++; +#endif + return (bufp->ovfl); +} + +/* + * Returns: + * 0 indicates SUCCESS + * -1 indicates FAILURE + */ +extern int +__get_page(hashp, p, bucket, is_bucket, is_disk, is_bitmap) + HTAB *hashp; + char *p; + u_int bucket; + int is_bucket, is_disk, is_bitmap; +{ + register int fd, page, size; + int rsize; + u_short *bp; + + fd = hashp->fp; + size = hashp->BSIZE; + + if ((fd == -1) || !is_disk) { + PAGE_INIT(p); + return (0); + } + if (is_bucket) + page = BUCKET_TO_PAGE(bucket); + else + page = OADDR_TO_PAGE(bucket); + if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) || + ((rsize = read(fd, p, size)) == -1)) + return (-1); + bp = (u_short *)p; + if (!rsize) + bp[0] = 0; /* We hit the EOF, so initialize a new page */ + else + if (rsize != size) { + errno = EFTYPE; + return (-1); + } + if (!is_bitmap && !bp[0]) { + PAGE_INIT(p); + } else + if (hashp->LORDER != BYTE_ORDER) { + register int i, max; + + if (is_bitmap) { + max = hashp->BSIZE >> 2; /* divide by 4 */ + for (i = 0; i < max; i++) + M_32_SWAP(((long *)p)[i]); + } else { + M_16_SWAP(bp[0]); + max = bp[0] + 2; + for (i = 1; i <= max; i++) + M_16_SWAP(bp[i]); + } + } + return (0); +} + +/* + * Write page p to disk + * + * Returns: + * 0 ==> OK + * -1 ==>failure + */ +extern int +__put_page(hashp, p, bucket, is_bucket, is_bitmap) + HTAB *hashp; + char *p; + u_int bucket; + int is_bucket, is_bitmap; +{ + register int fd, page, size; + int wsize; + + size = hashp->BSIZE; + if ((hashp->fp == -1) && open_temp(hashp)) + return (-1); + fd = hashp->fp; + + if (hashp->LORDER != BYTE_ORDER) { + register int i; + register int max; + + if (is_bitmap) { + max = hashp->BSIZE >> 2; /* divide by 4 */ + for (i = 0; i < max; i++) + M_32_SWAP(((long *)p)[i]); + } else { + max = ((u_short *)p)[0] + 2; + for (i = 0; i <= max; i++) + M_16_SWAP(((u_short *)p)[i]); + } + } + if (is_bucket) + page = BUCKET_TO_PAGE(bucket); + else + page = OADDR_TO_PAGE(bucket); + if ((lseek(fd, (off_t)page << hashp->BSHIFT, SEEK_SET) == -1) || + ((wsize = write(fd, p, size)) == -1)) + /* Errno is set */ + return (-1); + if (wsize != size) { + errno = EFTYPE; + return (-1); + } + return (0); +} + +#define BYTE_MASK ((1 << INT_BYTE_SHIFT) -1) +/* + * Initialize a new bitmap page. Bitmap pages are left in memory + * once they are read in. + */ +extern int +__init_bitmap(hashp, pnum, nbits, ndx) + HTAB *hashp; + int pnum, nbits, ndx; +{ + u_long *ip; + int clearbytes, clearints; + + if ((ip = (u_long *)malloc(hashp->BSIZE)) == NULL) + return (1); + hashp->nmaps++; + clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1; + clearbytes = clearints << INT_TO_BYTE; + (void)memset((char *)ip, 0, clearbytes); + (void)memset(((char *)ip) + clearbytes, 0xFF, + hashp->BSIZE - clearbytes); + ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK); + SETBIT(ip, 0); + hashp->BITMAPS[ndx] = (u_short)pnum; + hashp->mapp[ndx] = ip; + return (0); +} + +static u_long +first_free(map) + u_long map; +{ + register u_long i, mask; + + mask = 0x1; + for (i = 0; i < BITS_PER_MAP; i++) { + if (!(mask & map)) + return (i); + mask = mask << 1; + } + return (i); +} + +static u_short +overflow_page(hashp) + HTAB *hashp; +{ + register u_long *freep; + register int max_free, offset, splitnum; + u_short addr; + int bit, first_page, free_bit, free_page, i, in_use_bits, j; +#ifdef DEBUG2 + int tmp1, tmp2; +#endif + splitnum = hashp->OVFL_POINT; + max_free = hashp->SPARES[splitnum]; + + free_page = (max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT); + free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1); + + /* Look through all the free maps to find the first free block */ + first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT); + for ( i = first_page; i <= free_page; i++ ) { + if (!(freep = (u_long *)hashp->mapp[i]) && + !(freep = fetch_bitmap(hashp, i))) + return (NULL); + if (i == free_page) + in_use_bits = free_bit; + else + in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1; + + if (i == first_page) { + bit = hashp->LAST_FREED & + ((hashp->BSIZE << BYTE_SHIFT) - 1); + j = bit / BITS_PER_MAP; + bit = bit & ~(BITS_PER_MAP - 1); + } else { + bit = 0; + j = 0; + } + for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) + if (freep[j] != ALL_SET) + goto found; + } + + /* No Free Page Found */ + hashp->LAST_FREED = hashp->SPARES[splitnum]; + hashp->SPARES[splitnum]++; + offset = hashp->SPARES[splitnum] - + (splitnum ? hashp->SPARES[splitnum - 1] : 0); + +#define OVMSG "HASH: Out of overflow pages. Increase page size\n" + if (offset > SPLITMASK) { + if (++splitnum >= NCACHED) { + (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); + return (NULL); + } + hashp->OVFL_POINT = splitnum; + hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; + hashp->SPARES[splitnum-1]--; + offset = 1; + } + + /* Check if we need to allocate a new bitmap page */ + if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) { + free_page++; + if (free_page >= NCACHED) { + (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); + return (NULL); + } + /* + * This is tricky. The 1 indicates that you want the new page + * allocated with 1 clear bit. Actually, you are going to + * allocate 2 pages from this map. The first is going to be + * the map page, the second is the overflow page we were + * looking for. The init_bitmap routine automatically, sets + * the first bit of itself to indicate that the bitmap itself + * is in use. We would explicitly set the second bit, but + * don't have to if we tell init_bitmap not to leave it clear + * in the first place. + */ + if (__init_bitmap(hashp, (int)OADDR_OF(splitnum, offset), + 1, free_page)) + return (NULL); + hashp->SPARES[splitnum]++; +#ifdef DEBUG2 + free_bit = 2; +#endif + offset++; + if (offset > SPLITMASK) { + if (++splitnum >= NCACHED) { + (void)write(STDERR_FILENO, OVMSG, + sizeof(OVMSG) - 1); + return (NULL); + } + hashp->OVFL_POINT = splitnum; + hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; + hashp->SPARES[splitnum-1]--; + offset = 0; + } + } else { + /* + * Free_bit addresses the last used bit. Bump it to address + * the first available bit. + */ + free_bit++; + SETBIT(freep, free_bit); + } + + /* Calculate address of the new overflow page */ + addr = OADDR_OF(splitnum, offset); +#ifdef DEBUG2 + (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", + addr, free_bit, free_page); +#endif + return (addr); + +found: + bit = bit + first_free(freep[j]); + SETBIT(freep, bit); +#ifdef DEBUG2 + tmp1 = bit; + tmp2 = i; +#endif + /* + * Bits are addressed starting with 0, but overflow pages are addressed + * beginning at 1. Bit is a bit addressnumber, so we need to increment + * it to convert it to a page number. + */ + bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT)); + if (bit >= hashp->LAST_FREED) + hashp->LAST_FREED = bit - 1; + + /* Calculate the split number for this page */ + for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++); + offset = (i ? bit - hashp->SPARES[i - 1] : bit); + if (offset >= SPLITMASK) + return (NULL); /* Out of overflow pages */ + addr = OADDR_OF(i, offset); +#ifdef DEBUG2 + (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", + addr, tmp1, tmp2); +#endif + + /* Allocate and return the overflow page */ + return (addr); +} + +/* + * Mark this overflow page as free. + */ +extern void +__free_ovflpage(hashp, obufp) + HTAB *hashp; + BUFHEAD *obufp; +{ + register u_short addr; + u_long *freep; + int bit_address, free_page, free_bit; + u_short ndx; + + addr = obufp->addr; +#ifdef DEBUG1 + (void)fprintf(stderr, "Freeing %d\n", addr); +#endif + ndx = (((u_short)addr) >> SPLITSHIFT); + bit_address = + (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1; + if (bit_address < hashp->LAST_FREED) + hashp->LAST_FREED = bit_address; + free_page = (bit_address >> (hashp->BSHIFT + BYTE_SHIFT)); + free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1); + + if (!(freep = hashp->mapp[free_page])) + freep = fetch_bitmap(hashp, free_page); +#ifdef DEBUG + /* + * This had better never happen. It means we tried to read a bitmap + * that has already had overflow pages allocated off it, and we + * failed to read it from the file. + */ + if (!freep) + assert(0); +#endif + CLRBIT(freep, free_bit); +#ifdef DEBUG2 + (void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n", + obufp->addr, free_bit, free_page); +#endif + __reclaim_buf(hashp, obufp); +} + +/* + * Returns: + * 0 success + * -1 failure + */ +static int +open_temp(hashp) + HTAB *hashp; +{ + sigset_t set, oset; + static char namestr[] = "_hashXXXXXX"; + + /* Block signals; make sure file goes away at process exit. */ + (void)sigfillset(&set); + (void)sigprocmask(SIG_BLOCK, &set, &oset); + if ((hashp->fp = mkstemp(namestr)) != -1) { + (void)unlink(namestr); + (void)fcntl(hashp->fp, F_SETFD, 1); + } + (void)sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL); + return (hashp->fp != -1 ? 0 : -1); +} + +/* + * We have to know that the key will fit, but the last entry on the page is + * an overflow pair, so we need to shift things. + */ +static void +squeeze_key(sp, key, val) + u_short *sp; + const DBT *key, *val; +{ + register char *p; + u_short free_space, n, off, pageno; + + p = (char *)sp; + n = sp[0]; + free_space = FREESPACE(sp); + off = OFFSET(sp); + + pageno = sp[n - 1]; + off -= key->size; + sp[n - 1] = off; + memmove(p + off, key->data, key->size); + off -= val->size; + sp[n] = off; + memmove(p + off, val->data, val->size); + sp[0] = n + 2; + sp[n + 1] = pageno; + sp[n + 2] = OVFLPAGE; + FREESPACE(sp) = free_space - PAIRSIZE(key, val); + OFFSET(sp) = off; +} + +static u_long * +fetch_bitmap(hashp, ndx) + HTAB *hashp; + int ndx; +{ + if (ndx >= hashp->nmaps) + return (NULL); + if ((hashp->mapp[ndx] = (u_long *)malloc(hashp->BSIZE)) == NULL) + return (NULL); + if (__get_page(hashp, + (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) { + free(hashp->mapp[ndx]); + return (NULL); + } + return (hashp->mapp[ndx]); +} + +#ifdef DEBUG4 +int +print_chain(addr) + int addr; +{ + BUFHEAD *bufp; + short *bp, oaddr; + + (void)fprintf(stderr, "%d ", addr); + bufp = __get_buf(hashp, addr, NULL, 0); + bp = (short *)bufp->page; + while (bp[0] && ((bp[bp[0]] == OVFLPAGE) || + ((bp[0] > 2) && bp[2] < REAL_KEY))) { + oaddr = bp[bp[0] - 1]; + (void)fprintf(stderr, "%d ", (int)oaddr); + bufp = __get_buf(hashp, (int)oaddr, bufp, 0); + bp = (short *)bufp->page; + } + (void)fprintf(stderr, "\n"); +} +#endif diff --git a/lib/libc/db/hash/hsearch.c b/lib/libc/db/hash/hsearch.c new file mode 100644 index 0000000..e4914b1 --- /dev/null +++ b/lib/libc/db/hash/hsearch.c @@ -0,0 +1,107 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)hsearch.c 8.3 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <fcntl.h> +#include <string.h> + +#include <db.h> +#include "search.h" + +static DB *dbp = NULL; +static ENTRY retval; + +extern int +hcreate(nel) + u_int nel; +{ + HASHINFO info; + + info.nelem = nel; + info.bsize = 256; + info.ffactor = 8; + info.cachesize = NULL; + info.hash = NULL; + info.lorder = 0; + dbp = (DB *)__hash_open(NULL, O_CREAT | O_RDWR, 0600, &info, 0); + return ((int)dbp); +} + +extern ENTRY * +hsearch(item, action) + ENTRY item; + ACTION action; +{ + DBT key, val; + int status; + + if (!dbp) + return (NULL); + key.data = (u_char *)item.key; + key.size = strlen(item.key) + 1; + + if (action == ENTER) { + val.data = (u_char *)item.data; + val.size = strlen(item.data) + 1; + status = (dbp->put)(dbp, &key, &val, R_NOOVERWRITE); + if (status) + return (NULL); + } else { + /* FIND */ + status = (dbp->get)(dbp, &key, &val, 0); + if (status) + return (NULL); + else + item.data = (char *)val.data; + } + retval.key = item.key; + retval.data = item.data; + return (&retval); +} + +extern void +hdestroy() +{ + if (dbp) { + (void)(dbp->close)(dbp); + dbp = NULL; + } +} diff --git a/lib/libc/db/hash/ndbm.c b/lib/libc/db/hash/ndbm.c new file mode 100644 index 0000000..89b9916 --- /dev/null +++ b/lib/libc/db/hash/ndbm.c @@ -0,0 +1,202 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)ndbm.c 8.2 (Berkeley) 9/11/93"; +#endif /* LIBC_SCCS and not lint */ + +/* + * This package provides a dbm compatible interface to the new hashing + * package described in db(3). + */ + +#include <sys/param.h> + +#include <ndbm.h> +#include <stdio.h> +#include <string.h> + +#include "hash.h" + +/* + * Returns: + * *DBM on success + * NULL on failure + */ +extern DBM * +dbm_open(file, flags, mode) + const char *file; + int flags, mode; +{ + HASHINFO info; + char path[MAXPATHLEN]; + + info.bsize = 4096; + info.ffactor = 40; + info.nelem = 1; + info.cachesize = NULL; + info.hash = NULL; + info.lorder = 0; + (void)strcpy(path, file); + (void)strcat(path, DBM_SUFFIX); + return ((DBM *)__hash_open(path, flags, mode, &info, 0)); +} + +extern void +dbm_close(db) + DBM *db; +{ + (void)(db->close)(db); +} + +/* + * Returns: + * DATUM on success + * NULL on failure + */ +extern datum +dbm_fetch(db, key) + DBM *db; + datum key; +{ + datum retval; + int status; + + status = (db->get)(db, (DBT *)&key, (DBT *)&retval, 0); + if (status) { + retval.dptr = NULL; + retval.dsize = 0; + } + return (retval); +} + +/* + * Returns: + * DATUM on success + * NULL on failure + */ +extern datum +dbm_firstkey(db) + DBM *db; +{ + int status; + datum retdata, retkey; + + status = (db->seq)(db, (DBT *)&retkey, (DBT *)&retdata, R_FIRST); + if (status) + retkey.dptr = NULL; + return (retkey); +} + +/* + * Returns: + * DATUM on success + * NULL on failure + */ +extern datum +dbm_nextkey(db) + DBM *db; +{ + int status; + datum retdata, retkey; + + status = (db->seq)(db, (DBT *)&retkey, (DBT *)&retdata, R_NEXT); + if (status) + retkey.dptr = NULL; + return (retkey); +} +/* + * Returns: + * 0 on success + * <0 failure + */ +extern int +dbm_delete(db, key) + DBM *db; + datum key; +{ + int status; + + status = (db->del)(db, (DBT *)&key, 0); + if (status) + return (-1); + else + return (0); +} + +/* + * Returns: + * 0 on success + * <0 failure + * 1 if DBM_INSERT and entry exists + */ +extern int +dbm_store(db, key, content, flags) + DBM *db; + datum key, content; + int flags; +{ + return ((db->put)(db, (DBT *)&key, (DBT *)&content, + (flags == DBM_INSERT) ? R_NOOVERWRITE : 0)); +} + +extern int +dbm_error(db) + DBM *db; +{ + HTAB *hp; + + hp = (HTAB *)db->internal; + return (hp->errno); +} + +extern int +dbm_clearerr(db) + DBM *db; +{ + HTAB *hp; + + hp = (HTAB *)db->internal; + hp->errno = 0; + return (0); +} + +extern int +dbm_dirfno(db) + DBM *db; +{ + return(((HTAB *)db->internal)->fp); +} diff --git a/lib/libc/db/hash/page.h b/lib/libc/db/hash/page.h new file mode 100644 index 0000000..dae82ae --- /dev/null +++ b/lib/libc/db/hash/page.h @@ -0,0 +1,92 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)page.h 8.1 (Berkeley) 6/6/93 + */ + +/* + * Definitions for hashing page file format. + */ + +/* + * routines dealing with a data page + * + * page format: + * +------------------------------+ + * p | n | keyoff | datoff | keyoff | + * +------------+--------+--------+ + * | datoff | free | ptr | --> | + * +--------+---------------------+ + * | F R E E A R E A | + * +--------------+---------------+ + * | <---- - - - | data | + * +--------+-----+----+----------+ + * | key | data | key | + * +--------+----------+----------+ + * + * Pointer to the free space is always: p[p[0] + 2] + * Amount of free space on the page is: p[p[0] + 1] + */ + +/* + * How many bytes required for this pair? + * 2 shorts in the table at the top of the page + room for the + * key and room for the data + * + * We prohibit entering a pair on a page unless there is also room to append + * an overflow page. The reason for this it that you can get in a situation + * where a single key/data pair fits on a page, but you can't append an + * overflow page and later you'd have to split the key/data and handle like + * a big pair. + * You might as well do this up front. + */ + +#define PAIRSIZE(K,D) (2*sizeof(u_short) + (K)->size + (D)->size) +#define BIGOVERHEAD (4*sizeof(u_short)) +#define KEYSIZE(K) (4*sizeof(u_short) + (K)->size); +#define OVFLSIZE (2*sizeof(u_short)) +#define FREESPACE(P) ((P)[(P)[0]+1]) +#define OFFSET(P) ((P)[(P)[0]+2]) +#define PAIRFITS(P,K,D) \ + (((P)[2] >= REAL_KEY) && \ + (PAIRSIZE((K),(D)) + OVFLSIZE) <= FREESPACE((P))) +#define PAGE_META(N) (((N)+3) * sizeof(u_short)) + +typedef struct { + BUFHEAD *newp; + BUFHEAD *oldp; + BUFHEAD *nextp; + u_short next_addr; +} SPLIT_RETURN; diff --git a/lib/libc/db/hash/search.h b/lib/libc/db/hash/search.h new file mode 100644 index 0000000..4d3b914 --- /dev/null +++ b/lib/libc/db/hash/search.h @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)search.h 8.1 (Berkeley) 6/4/93 + */ + +/* Backward compatibility to hsearch interface. */ +typedef struct entry { + char *key; + char *data; +} ENTRY; + +typedef enum { + FIND, ENTER +} ACTION; + +int hcreate __P((unsigned int)); +void hdestroy __P((void)); +ENTRY *hsearch __P((ENTRY, ACTION)); diff --git a/lib/libc/db/man/Makefile.inc b/lib/libc/db/man/Makefile.inc new file mode 100644 index 0000000..d8d93f9 --- /dev/null +++ b/lib/libc/db/man/Makefile.inc @@ -0,0 +1,7 @@ +# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 + +.PATH: ${.CURDIR}/db/man + +# mpool.3 +MAN3+= btree.0 dbopen.0 hash.0 recno.0 +MLINKS+= dbopen.3 db.3 diff --git a/lib/libc/db/man/btree.3 b/lib/libc/db/man/btree.3 new file mode 100644 index 0000000..a51e1e5 --- /dev/null +++ b/lib/libc/db/man/btree.3 @@ -0,0 +1,226 @@ +.\" Copyright (c) 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)btree.3 8.3 (Berkeley) 2/21/94 +.\" +.TH BTREE 3 "February 21, 1994" +.\".UC 7 +.SH NAME +btree \- btree database access method +.SH SYNOPSIS +.nf +.ft B +#include <sys/types.h> +#include <db.h> +.ft R +.fi +.SH DESCRIPTION +The routine +.IR dbopen +is the library interface to database files. +One of the supported file formats is btree files. +The general description of the database access methods is in +.IR dbopen (3), +this manual page describes only the btree specific information. +.PP +The btree data structure is a sorted, balanced tree structure storing +associated key/data pairs. +.PP +The btree access method specific data structure provided to +.I dbopen +is defined in the <db.h> include file as follows: +.PP +typedef struct { +.RS +u_long flags; +.br +u_int cachesize; +.br +int maxkeypage; +.br +int minkeypage; +.br +u_int psize; +.br +int (*compare)(const DBT *key1, const DBT *key2); +.br +size_t (*prefix)(const DBT *key1, const DBT *key2); +.br +int lorder; +.RE +} BTREEINFO; +.PP +The elements of this structure are as follows: +.TP +flags +The flag value is specified by +.IR or 'ing +any of the following values: +.RS +.TP +R_DUP +Permit duplicate keys in the tree, i.e. permit insertion if the key to be +inserted already exists in the tree. +The default behavior, as described in +.IR dbopen (3), +is to overwrite a matching key when inserting a new key or to fail if +the R_NOOVERWRITE flag is specified. +The R_DUP flag is overridden by the R_NOOVERWRITE flag, and if the +R_NOOVERWRITE flag is specified, attempts to insert duplicate keys into +the tree will fail. +.IP +If the database contains duplicate keys, the order of retrieval of +key/data pairs is undefined if the +.I get +routine is used, however, +.I seq +routine calls with the R_CURSOR flag set will always return the logical +``first'' of any group of duplicate keys. +.RE +.TP +cachesize +A suggested maximum size (in bytes) of the memory cache. +This value is +.B only +advisory, and the access method will allocate more memory rather than fail. +Since every search examines the root page of the tree, caching the most +recently used pages substantially improves access time. +In addition, physical writes are delayed as long as possible, so a moderate +cache can reduce the number of I/O operations significantly. +Obviously, using a cache increases (but only increases) the likelihood of +corruption or lost data if the system crashes while a tree is being modified. +If +.I cachesize +is 0 (no size is specified) a default cache is used. +.TP +maxkeypage +The maximum number of keys which will be stored on any single page. +Not currently implemented. +.\" The maximum number of keys which will be stored on any single page. +.\" Because of the way the btree data structure works, +.\" .I maxkeypage +.\" must always be greater than or equal to 2. +.\" If +.\" .I maxkeypage +.\" is 0 (no maximum number of keys is specified) the page fill factor is +.\" made as large as possible (which is almost invariably what is wanted). +.TP +minkeypage +The minimum number of keys which will be stored on any single page. +This value is used to determine which keys will be stored on overflow +pages, i.e. if a key or data item is longer than the pagesize divided +by the minkeypage value, it will be stored on overflow pages instead +of in the page itself. +If +.I minkeypage +is 0 (no minimum number of keys is specified) a value of 2 is used. +.TP +psize +Page size is the size (in bytes) of the pages used for nodes in the tree. +The minimum page size is 512 bytes and the maximum page size is 64K. +If +.I psize +is 0 (no page size is specified) a page size is chosen based on the +underlying file system I/O block size. +.TP +compare +Compare is the key comparison function. +It must return an integer less than, equal to, or greater than zero if the +first key argument is considered to be respectively less than, equal to, +or greater than the second key argument. +The same comparison function must be used on a given tree every time it +is opened. +If +.I compare +is NULL (no comparison function is specified), the keys are compared +lexically, with shorter keys considered less than longer keys. +.TP +prefix +Prefix is the prefix comparison function. +If specified, this routine must return the number of bytes of the second key +argument which are necessary to determine that it is greater than the first +key argument. +If the keys are equal, the key length should be returned. +Note, the usefulness of this routine is very data dependent, but, in some +data sets can produce significantly reduced tree sizes and search times. +If +.I prefix +is NULL (no prefix function is specified), +.B and +no comparison function is specified, a default lexical comparison routine +is used. +If +.I prefix +is NULL and a comparison routine is specified, no prefix comparison is +done. +.TP +lorder +The byte order for integers in the stored database metadata. +The number should represent the order as an integer; for example, +big endian order would be the number 4,321. +If +.I lorder +is 0 (no order is specified) the current host order is used. +.PP +If the file already exists (and the O_TRUNC flag is not specified), the +values specified for the parameters flags, lorder and psize are ignored +in favor of the values used when the tree was created. +.PP +Forward sequential scans of a tree are from the least key to the greatest. +.PP +Space freed up by deleting key/data pairs from the tree is never reclaimed, +although it is normally made available for reuse. +This means that the btree storage structure is grow-only. +The only solutions are to avoid excessive deletions, or to create a fresh +tree periodically from a scan of an existing one. +.PP +Searches, insertions, and deletions in a btree will all complete in +O lg base N where base is the average fill factor. +Often, inserting ordered data into btrees results in a low fill factor. +This implementation has been modified to make ordered insertion the best +case, resulting in a much better than normal page fill factor. +.SH "SEE ALSO" +.IR dbopen (3), +.IR hash (3), +.IR mpool (3), +.IR recno (3) +.sp +.IR "The Ubiquitous B-tree" , +Douglas Comer, ACM Comput. Surv. 11, 2 (June 1979), 121-138. +.sp +.IR "Prefix B-trees" , +Bayer and Unterauer, ACM Transactions on Database Systems, Vol. 2, 1 +(March 1977), 11-26. +.sp +.IR "The Art of Computer Programming Vol. 3: Sorting and Searching" , +D.E. Knuth, 1968, pp 471-480. +.SH BUGS +Only big and little endian byte order is supported. diff --git a/lib/libc/db/man/dbopen.3 b/lib/libc/db/man/dbopen.3 new file mode 100644 index 0000000..f06a4ef --- /dev/null +++ b/lib/libc/db/man/dbopen.3 @@ -0,0 +1,476 @@ +.\" Copyright (c) 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)dbopen.3 8.5 (Berkeley) 1/2/94 +.\" +.TH DBOPEN 3 "January 2, 1994" +.UC 7 +.SH NAME +dbopen \- database access methods +.SH SYNOPSIS +.nf +.ft B +#include <sys/types.h> +#include <limits.h> +#include <db.h> + +DB * +dbopen(const char *file, int flags, int mode, DBTYPE type, +.ti +5 +const void *openinfo); +.ft R +.fi +.SH DESCRIPTION +.IR Dbopen +is the library interface to database files. +The supported file formats are btree, hashed and UNIX file oriented. +The btree format is a representation of a sorted, balanced tree structure. +The hashed format is an extensible, dynamic hashing scheme. +The flat-file format is a byte stream file with fixed or variable length +records. +The formats and file format specific information are described in detail +in their respective manual pages +.IR btree (3), +.IR hash (3) +and +.IR recno (3). +.PP +Dbopen opens +.I file +for reading and/or writing. +Files never intended to be preserved on disk may be created by setting +the file parameter to NULL. +.PP +The +.I flags +and +.I mode arguments +are as specified to the +.IR open (2) +routine, however, only the O_CREAT, O_EXCL, O_EXLOCK, O_NONBLOCK, +O_RDONLY, O_RDWR, O_SHLOCK and O_TRUNC flags are meaningful. +(Note, opening a database file O_WRONLY is not possible.) +.\"Three additional options may be specified by +.\".IR or 'ing +.\"them into the +.\".I flags +.\"argument. +.\".TP +.\"DB_LOCK +.\"Do the necessary locking in the database to support concurrent access. +.\"If concurrent access isn't needed or the database is read-only this +.\"flag should not be set, as it tends to have an associated performance +.\"penalty. +.\".TP +.\"DB_SHMEM +.\"Place the underlying memory pool used by the database in shared +.\"memory. +.\"Necessary for concurrent access. +.\".TP +.\"DB_TXN +.\"Support transactions in the database. +.\"The DB_LOCK and DB_SHMEM flags must be set as well. +.PP +The +.I type +argument is of type DBTYPE (as defined in the <db.h> include file) and +may be set to DB_BTREE, DB_HASH or DB_RECNO. +.PP +The +.I openinfo +argument is a pointer to an access method specific structure described +in the access method's manual page. +If +.I openinfo +is NULL, each access method will use defaults appropriate for the system +and the access method. +.PP +.I Dbopen +returns a pointer to a DB structure on success and NULL on error. +The DB structure is defined in the <db.h> include file, and contains at +least the following fields: +.sp +.nf +typedef struct { +.RS +DBTYPE type; +int (*close)(const DB *db); +int (*del)(const DB *db, const DBT *key, u_int flags); +int (*fd)(const DB *db); +int (*get)(const DB *db, DBT *key, DBT *data, u_int flags); +int (*put)(const DB *db, DBT *key, const DBT *data, +.ti +5 +u_int flags); +int (*sync)(const DB *db, u_int flags); +int (*seq)(const DB *db, DBT *key, DBT *data, u_int flags); +.RE +} DB; +.fi +.PP +These elements describe a database type and a set of functions performing +various actions. +These functions take a pointer to a structure as returned by +.IR dbopen , +and sometimes one or more pointers to key/data structures and a flag value. +.TP +type +The type of the underlying access method (and file format). +.TP +close +A pointer to a routine to flush any cached information to disk, free any +allocated resources, and close the underlying file(s). +Since key/data pairs may be cached in memory, failing to sync the file +with a +.I close +or +.I sync +function may result in inconsistent or lost information. +.I Close +routines return -1 on error (setting +.IR errno ) +and 0 on success. +.TP +del +A pointer to a routine to remove key/data pairs from the database. +.IP +The parameter +.I flag +may be set to the following value: +.RS +.TP +R_CURSOR +Delete the record referenced by the cursor. +The cursor must have previously been initialized. +.RE +.IP +.I Delete +routines return -1 on error (setting +.IR errno ), +0 on success, and 1 if the specified +.I key +was not in the file. +.TP +fd +A pointer to a routine which returns a file descriptor representative +of the underlying database. +A file descriptor referencing the same file will be returned to all +processes which call +.I dbopen +with the same +.I file +name. +This file descriptor may be safely used as an argument to the +.IR fcntl (2) +and +.IR flock (2) +locking functions. +The file descriptor is not necessarily associated with any of the +underlying files used by the access method. +No file descriptor is available for in memory databases. +.I Fd +routines return -1 on error (setting +.IR errno ), +and the file descriptor on success. +.TP +get +A pointer to a routine which is the interface for keyed retrieval from +the database. +The address and length of the data associated with the specified +.I key +are returned in the structure referenced by +.IR data . +.I Get +routines return -1 on error (setting +.IR errno ), +0 on success, and 1 if the +.I key +was not in the file. +.TP +put +A pointer to a routine to store key/data pairs in the database. +.IP +The parameter +.I flag +may be set to one of the following values: +.RS +.TP +R_CURSOR +Replace the key/data pair referenced by the cursor. +The cursor must have previously been initialized. +.TP +R_IAFTER +Append the data immediately after the data referenced by +.IR key , +creating a new key/data pair. +The record number of the appended key/data pair is returned in the +.I key +structure. +(Applicable only to the DB_RECNO access method.) +.TP +R_IBEFORE +Insert the data immediately before the data referenced by +.IR key , +creating a new key/data pair. +The record number of the inserted key/data pair is returned in the +.I key +structure. +(Applicable only to the DB_RECNO access method.) +.TP +R_NOOVERWRITE +Enter the new key/data pair only if the key does not previously exist. +.TP +R_SETCURSOR +Store the key/data pair, setting or initializing the position of the +cursor to reference it. +(Applicable only to the DB_BTREE and DB_RECNO access methods.) +.RE +.IP +R_SETCURSOR is available only for the DB_BTREE and DB_RECNO access +methods because it implies that the keys have an inherent order +which does not change. +.IP +R_IAFTER and R_IBEFORE are available only for the DB_RECNO +access method because they each imply that the access method is able to +create new keys. +This is only true if the keys are ordered and independent, record numbers +for example. +.IP +The default behavior of the +.I put +routines is to enter the new key/data pair, replacing any previously +existing key. +.IP +.I Put +routines return -1 on error (setting +.IR errno ), +0 on success, and 1 if the R_NOOVERWRITE +.I flag +was set and the key already exists in the file. +.TP +seq +A pointer to a routine which is the interface for sequential +retrieval from the database. +The address and length of the key are returned in the structure +referenced by +.IR key , +and the address and length of the data are returned in the +structure referenced +by +.IR data . +.IP +Sequential key/data pair retrieval may begin at any time, and the +position of the ``cursor'' is not affected by calls to the +.IR del , +.IR get , +.IR put , +or +.I sync +routines. +Modifications to the database during a sequential scan will be reflected +in the scan, i.e. records inserted behind the cursor will not be returned +while records inserted in front of the cursor will be returned. +.IP +The flag value +.B must +be set to one of the following values: +.RS +.TP +R_CURSOR +The data associated with the specified key is returned. +This differs from the +.I get +routines in that it sets or initializes the cursor to the location of +the key as well. +(Note, for the DB_BTREE access method, the returned key is not necessarily an +exact match for the specified key. +The returned key is the smallest key greater than or equal to the specified +key, permitting partial key matches and range searches.) +.TP +R_FIRST +The first key/data pair of the database is returned, and the cursor +is set or initialized to reference it. +.TP +R_LAST +The last key/data pair of the database is returned, and the cursor +is set or initialized to reference it. +(Applicable only to the DB_BTREE and DB_RECNO access methods.) +.TP +R_NEXT +Retrieve the key/data pair immediately after the cursor. +If the cursor is not yet set, this is the same as the R_FIRST flag. +.TP +R_PREV +Retrieve the key/data pair immediately before the cursor. +If the cursor is not yet set, this is the same as the R_LAST flag. +(Applicable only to the DB_BTREE and DB_RECNO access methods.) +.RE +.IP +R_LAST and R_PREV are available only for the DB_BTREE and DB_RECNO +access methods because they each imply that the keys have an inherent +order which does not change. +.IP +.I Seq +routines return -1 on error (setting +.IR errno ), +0 on success and 1 if there are no key/data pairs less than or greater +than the specified or current key. +If the DB_RECNO access method is being used, and if the database file +is a character special file and no complete key/data pairs are currently +available, the +.I seq +routines return 2. +.TP +sync +A pointer to a routine to flush any cached information to disk. +If the database is in memory only, the +.I sync +routine has no effect and will always succeed. +.IP +The flag value may be set to the following value: +.RS +.TP +R_RECNOSYNC +If the DB_RECNO access method is being used, this flag causes +the sync routine to apply to the btree file which underlies the +recno file, not the recno file itself. +(See the +.I bfname +field of the +.IR recno (3) +manual page for more information.) +.RE +.IP +.I Sync +routines return -1 on error (setting +.IR errno ) +and 0 on success. +.SH "KEY/DATA PAIRS" +Access to all file types is based on key/data pairs. +Both keys and data are represented by the following data structure: +.PP +typedef struct { +.RS +void *data; +.br +size_t size; +.RE +} DBT; +.PP +The elements of the DBT structure are defined as follows: +.TP +data +A pointer to a byte string. +.TP +size +The length of the byte string. +.PP +Key and data byte strings may reference strings of essentially unlimited +length although any two of them must fit into available memory at the same +time. +It should be noted that the access methods provide no guarantees about +byte string alignment. +.SH ERRORS +The +.I dbopen +routine may fail and set +.I errno +for any of the errors specified for the library routines +.IR open (2) +and +.IR malloc (3) +or the following: +.TP +[EFTYPE] +A file is incorrectly formatted. +.TP +[EINVAL] +A parameter has been specified (hash function, pad byte etc.) that is +incompatible with the current file specification or which is not +meaningful for the function (for example, use of the cursor without +prior initialization) or there is a mismatch between the version +number of file and the software. +.PP +The +.I close +routines may fail and set +.I errno +for any of the errors specified for the library routines +.IR close (2), +.IR read (2), +.IR write (2), +.IR free (3), +or +.IR fsync (2). +.PP +The +.IR del , +.IR get , +.I put +and +.I seq +routines may fail and set +.I errno +for any of the errors specified for the library routines +.IR read (2), +.IR write (2), +.IR free (3) +or +.IR malloc (3). +.PP +The +.I fd +routines will fail and set +.I errno +to ENOENT for in memory databases. +.PP +The +.I sync +routines may fail and set +.I errno +for any of the errors specified for the library routine +.IR fsync (2). +.SH "SEE ALSO" +.IR btree (3), +.IR hash (3), +.IR mpool (3), +.IR recno (3) +.sp +.IR "LIBTP: Portable, Modular Transactions for UNIX" , +Margo Seltzer, Michael Olson, USENIX proceedings, Winter 1992. +.SH BUGS +The typedef DBT is a mnemonic for ``data base thang'', and was used +because noone could think of a reasonable name that wasn't already used. +.PP +The file descriptor interface is a kluge and will be deleted in a +future version of the interface. +.PP +None of the access methods provide any form of concurrent access, +locking, or transactions. diff --git a/lib/libc/db/man/hash.3 b/lib/libc/db/man/hash.3 new file mode 100644 index 0000000..3ae00a2 --- /dev/null +++ b/lib/libc/db/man/hash.3 @@ -0,0 +1,152 @@ +.\" Copyright (c) 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)hash.3 8.5 (Berkeley) 2/21/94 +.\" +.TH HASH 3 "February 21, 1994" +.UC 7 +.SH NAME +hash \- hash database access method +.SH SYNOPSIS +.nf +.ft B +#include <sys/types.h> +#include <db.h> +.ft R +.fi +.SH DESCRIPTION +The routine +.IR dbopen +is the library interface to database files. +One of the supported file formats is hash files. +The general description of the database access methods is in +.IR dbopen (3), +this manual page describes only the hash specific information. +.PP +The hash data structure is an extensible, dynamic hashing scheme. +.PP +The access method specific data structure provided to +.I dbopen +is defined in the <db.h> include file as follows: +.sp +typedef struct { +.RS +u_int bsize; +.br +u_int ffactor; +.br +u_int nelem; +.br +u_int cachesize; +.br +u_int32_t (*hash)(const void *, size_t); +.br +int lorder; +.RE +} HASHINFO; +.PP +The elements of this structure are as follows: +.TP +bsize +.I Bsize +defines the hash table bucket size, and is, by default, 256 bytes. +It may be preferable to increase the page size for disk-resident tables +and tables with large data items. +.TP +ffactor +.I Ffactor +indicates a desired density within the hash table. +It is an approximation of the number of keys allowed to accumulate in any +one bucket, determining when the hash table grows or shrinks. +The default value is 8. +.TP +nelem +.I Nelem +is an estimate of the final size of the hash table. +If not set or set too low, hash tables will expand gracefully as keys +are entered, although a slight performance degradation may be noticed. +The default value is 1. +.TP +cachesize +A suggested maximum size, in bytes, of the memory cache. +This value is +.B only +advisory, and the access method will allocate more memory rather +than fail. +.TP +hash +.I Hash +is a user defined hash function. +Since no hash function performs equally well on all possible data, the +user may find that the built-in hash function does poorly on a particular +data set. +User specified hash functions must take two arguments (a pointer to a byte +string and a length) and return a 32-bit quantity to be used as the hash +value. +.TP +lorder +The byte order for integers in the stored database metadata. +The number should represent the order as an integer; for example, +big endian order would be the number 4,321. +If +.I lorder +is 0 (no order is specified) the current host order is used. +If the file already exists, the specified value is ignored and the +value specified when the tree was created is used. +.PP +If the file already exists (and the O_TRUNC flag is not specified), the +values specified for the parameters bsize, ffactor, lorder and nelem are +ignored and the values specified when the tree was created are used. +.PP +If a hash function is specified, +.I hash_open +will attempt to determine if the hash function specified is the same as +the one with which the database was created, and will fail if it is not. +.PP +Backward compatible interfaces to the routines described in +.IR dbm (3), +and +.IR ndbm (3) +are provided, however these interfaces are not compatible with +previous file formats. +.SH "SEE ALSO" +.IR btree (3), +.IR dbopen (3), +.IR mpool (3), +.IR recno (3) +.sp +.IR "Dynamic Hash Tables" , +Per-Ake Larson, Communications of the ACM, April 1988. +.sp +.IR "A New Hash Package for UNIX" , +Margo Seltzer, USENIX Proceedings, Winter 1991. +.SH BUGS +Only big and little endian byte order is supported. diff --git a/lib/libc/db/man/mpool.3 b/lib/libc/db/man/mpool.3 new file mode 100644 index 0000000..c17606e --- /dev/null +++ b/lib/libc/db/man/mpool.3 @@ -0,0 +1,219 @@ +.\" Copyright (c) 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)mpool.3 8.1 (Berkeley) 6/4/93 +.\" +.TH MPOOL 3 "June 4, 1993" +.UC 7 +.SH NAME +mpool \- shared memory buffer pool +.SH SYNOPSIS +.nf +.ft B +#include <db.h> +#include <mpool.h> + +MPOOL * +mpool_open (DBT *key, int fd, pgno_t pagesize, pgno_t maxcache); + +void +mpool_filter (MPOOL *mp, void (*pgin)(void *, pgno_t, void *), +.ti +5 +void (*pgout)(void *, pgno_t, void *), void *pgcookie); + +void * +mpool_new (MPOOL *mp, pgno_t *pgnoaddr); + +void * +mpool_get (MPOOL *mp, pgno_t pgno, u_int flags); + +int +mpool_put (MPOOL *mp, void *pgaddr, u_int flags); + +int +mpool_sync (MPOOL *mp); + +int +mpool_close (MPOOL *mp); +.ft R +.fi +.SH DESCRIPTION +.IR Mpool +is the library interface intended to provide page oriented buffer management +of files. +The buffers may be shared between processes. +.PP +The function +.I mpool_open +initializes a memory pool. +The +.I key +argument is the byte string used to negotiate between multiple +processes wishing to share buffers. +If the file buffers are mapped in shared memory, all processes using +the same key will share the buffers. +If +.I key +is NULL, the buffers are mapped into private memory. +The +.I fd +argument is a file descriptor for the underlying file, which must be seekable. +If +.I key +is non-NULL and matches a file already being mapped, the +.I fd +argument is ignored. +.PP +The +.I pagesize +argument is the size, in bytes, of the pages into which the file is broken up. +The +.I maxcache +argument is the maximum number of pages from the underlying file to cache +at any one time. +This value is not relative to the number of processes which share a file's +buffers, but will be the largest value specified by any of the processes +sharing the file. +.PP +The +.I mpool_filter +function is intended to make transparent input and output processing of the +pages possible. +If the +.I pgin +function is specified, it is called each time a buffer is read into the memory +pool from the backing file. +If the +.I pgout +function is specified, it is called each time a buffer is written into the +backing file. +Both functions are are called with the +.I pgcookie +pointer, the page number and a pointer to the page to being read or written. +.PP +The function +.I mpool_new +takes an MPOOL pointer and an address as arguments. +If a new page can be allocated, a pointer to the page is returned and +the page number is stored into the +.I pgnoaddr +address. +Otherwise, NULL is returned and errno is set. +.PP +The function +.I mpool_get +takes a MPOOL pointer and a page number as arguments. +If the page exists, a pointer to the page is returned. +Otherwise, NULL is returned and errno is set. +The flags parameter is not currently used. +.PP +The function +.I mpool_put +unpins the page referenced by +.IR pgaddr . +.I Pgaddr +must be an address previously returned by +.I mpool_get +or +.IR mpool_new . +The flag value is specified by +.IR or 'ing +any of the following values: +.TP +MPOOL_DIRTY +The page has been modified and needs to be written to the backing file. +.PP +.I Mpool_put +returns 0 on success and -1 if an error occurs. +.PP +The function +.I mpool_sync +writes all modified pages associated with the MPOOL pointer to the +backing file. +.I Mpool_sync +returns 0 on success and -1 if an error occurs. +.PP +The +.I mpool_close +function free's up any allocated memory associated with the memory pool +cookie. +Modified pages are +.B not +written to the backing file. +.I Mpool_close +returns 0 on success and -1 if an error occurs. +.SH ERRORS +The +.I mpool_open +function may fail and set +.I errno +for any of the errors specified for the library routine +.IR malloc (3). +.PP +The +.I mpool_get +function may fail and set +.I errno +for the following: +.TP 15 +[EINVAL] +The requested record doesn't exist. +.PP +The +.I mpool_new +and +.I mpool_get +functions may fail and set +.I errno +for any of the errors specified for the library routines +.IR read (2) , +.IR write (2) , +and +.IR malloc (3). +.PP +The +.I mpool_sync +function may fail and set +.I errno +for any of the errors specified for the library routine +.IR write (2). +.PP +The +.I mpool_close +function may fail and set +.I errno +for any of the errors specified for the library routine +.IR free (3). +.SH "SEE ALSO" +.IR dbopen (3), +.IR btree (3), +.IR hash (3), +.IR recno (3) diff --git a/lib/libc/db/man/recno.3 b/lib/libc/db/man/recno.3 new file mode 100644 index 0000000..a40e6c9 --- /dev/null +++ b/lib/libc/db/man/recno.3 @@ -0,0 +1,196 @@ +.\" Copyright (c) 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)recno.3 8.3 (Berkeley) 2/21/94 +.\" +.TH RECNO 3 "February 21, 1994" +.UC 7 +.SH NAME +recno \- record number database access method +.SH SYNOPSIS +.nf +.ft B +#include <sys/types.h> +#include <db.h> +.ft R +.fi +.SH DESCRIPTION +The routine +.IR dbopen +is the library interface to database files. +One of the supported file formats is record number files. +The general description of the database access methods is in +.IR dbopen (3), +this manual page describes only the recno specific information. +.PP +The record number data structure is either variable or fixed-length +records stored in a flat-file format, accessed by the logical record +number. +The existence of record number five implies the existence of records +one through four, and the deletion of record number one causes +record number five to be renumbered to record number four, as well +as the cursor, if positioned after record number one, to shift down +one record. +.PP +The recno access method specific data structure provided to +.I dbopen +is defined in the <db.h> include file as follows: +.PP +typedef struct { +.RS +u_long flags; +.br +u_int cachesize; +.br +u_int psize; +.br +int lorder; +.br +size_t reclen; +.br +u_char bval; +.br +char *bfname; +.RE +} RECNOINFO; +.PP +The elements of this structure are defined as follows: +.TP +flags +The flag value is specified by +.IR or 'ing +any of the following values: +.RS +.TP +R_FIXEDLEN +The records are fixed-length, not byte delimited. +The structure element +.I reclen +specifies the length of the record, and the structure element +.I bval +is used as the pad character. +.TP +R_NOKEY +In the interface specified by +.IR dbopen , +the sequential record retrieval fills in both the caller's key and +data structures. +If the R_NOKEY flag is specified, the +.I cursor +routines are not required to fill in the key structure. +This permits applications to retrieve records at the end of files without +reading all of the intervening records. +.TP +R_SNAPSHOT +This flag requires that a snapshot of the file be taken when +.I dbopen +is called, instead of permitting any unmodified records to be read from +the original file. +.RE +.TP +cachesize +A suggested maximum size, in bytes, of the memory cache. +This value is +.B only +advisory, and the access method will allocate more memory rather than fail. +If +.I cachesize +is 0 (no size is specified) a default cache is used. +.TP +psize +The recno access method stores the in-memory copies of its records +in a btree. +This value is the size (in bytes) of the pages used for nodes in that tree. +If +.I psize +is 0 (no page size is specified) a page size is chosen based on the +underlying file system I/O block size. +See +.IR btree (3) +for more information. +.TP +lorder +The byte order for integers in the stored database metadata. +The number should represent the order as an integer; for example, +big endian order would be the number 4,321. +If +.I lorder +is 0 (no order is specified) the current host order is used. +.TP +reclen +The length of a fixed-length record. +.TP +bval +The delimiting byte to be used to mark the end of a record for +variable-length records, and the pad character for fixed-length +records. +If no value is specified, newlines (``\en'') are used to mark the end +of variable-length records and fixed-length records are padded with +spaces. +.TP +bfname +The recno access method stores the in-memory copies of its records +in a btree. +If bfname is non-NULL, it specifies the name of the btree file, +as if specified as the file name for a dbopen of a btree file. +.PP +The data part of the key/data pair used by the recno access method +is the same as other access methods. +The key is different. +The +.I data +field of the key should be a pointer to a memory location of type +.IR recno_t , +as defined in the <db.h> include file. +This type is normally the largest unsigned integral type available to +the implementation. +The +.I size +field of the key should be the size of that type. +.PP +In the interface specified by +.IR dbopen , +using the +.I put +interface to create a new record will cause the creation of multiple, +empty records if the record number is more than one greater than the +largest record currently in the database. +.SH "SEE ALSO" +.IR dbopen (3), +.IR hash (3), +.IR mpool (3), +.IR recno (3) +.sp +.IR "Document Processing in a Relational Database System" , +Michael Stonebraker, Heidi Stettner, Joseph Kalash, Antonin Guttman, +Nadene Lynn, Memorandum No. UCB/ERL M82/32, May 1982. +.SH BUGS +Only big and little endian byte order is supported. diff --git a/lib/libc/db/mpool/Makefile.inc b/lib/libc/db/mpool/Makefile.inc new file mode 100644 index 0000000..93210c8 --- /dev/null +++ b/lib/libc/db/mpool/Makefile.inc @@ -0,0 +1,5 @@ +# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 + +.PATH: ${.CURDIR}/db/mpool + +SRCS+= mpool.c diff --git a/lib/libc/db/mpool/README b/lib/libc/db/mpool/README new file mode 100644 index 0000000..0f01fbc --- /dev/null +++ b/lib/libc/db/mpool/README @@ -0,0 +1,7 @@ +# @(#)README 8.1 (Berkeley) 6/4/93 + +These are the current memory pool routines. +They aren't ready for prime time, yet, and +the interface is expected to change. + +--keith diff --git a/lib/libc/db/mpool/mpool.c b/lib/libc/db/mpool/mpool.c new file mode 100644 index 0000000..72627bd --- /dev/null +++ b/lib/libc/db/mpool/mpool.c @@ -0,0 +1,534 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)mpool.c 8.2 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> +#include <sys/stat.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <db.h> +#define __MPOOLINTERFACE_PRIVATE +#include "mpool.h" + +static BKT *mpool_bkt __P((MPOOL *)); +static BKT *mpool_look __P((MPOOL *, pgno_t)); +static int mpool_write __P((MPOOL *, BKT *)); +#ifdef DEBUG +static void __mpoolerr __P((const char *fmt, ...)); +#endif + +/* + * MPOOL_OPEN -- initialize a memory pool. + * + * Parameters: + * key: Shared buffer key. + * fd: File descriptor. + * pagesize: File page size. + * maxcache: Max number of cached pages. + * + * Returns: + * MPOOL pointer, NULL on error. + */ +MPOOL * +mpool_open(key, fd, pagesize, maxcache) + DBT *key; + int fd; + pgno_t pagesize, maxcache; +{ + struct stat sb; + MPOOL *mp; + int entry; + + if (fstat(fd, &sb)) + return (NULL); + /* XXX + * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so + * that stat(2) returns true for ISSOCK on pipes. Until then, this is + * fairly close. + */ + if (!S_ISREG(sb.st_mode)) { + errno = ESPIPE; + return (NULL); + } + + if ((mp = (MPOOL *)malloc(sizeof(MPOOL))) == NULL) + return (NULL); + mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; + mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; + for (entry = 0; entry < HASHSIZE; ++entry) + mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = + mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = + (BKT *)&mp->hashtable[entry]; + mp->curcache = 0; + mp->maxcache = maxcache; + mp->pagesize = pagesize; + mp->npages = sb.st_size / pagesize; + mp->fd = fd; + mp->pgcookie = NULL; + mp->pgin = mp->pgout = NULL; + +#ifdef STATISTICS + mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = + mp->pageget = mp->pagenew = mp->pageput = mp->pageread = + mp->pagewrite = 0; +#endif + return (mp); +} + +/* + * MPOOL_FILTER -- initialize input/output filters. + * + * Parameters: + * pgin: Page in conversion routine. + * pgout: Page out conversion routine. + * pgcookie: Cookie for page in/out routines. + */ +void +mpool_filter(mp, pgin, pgout, pgcookie) + MPOOL *mp; + void (*pgin) __P((void *, pgno_t, void *)); + void (*pgout) __P((void *, pgno_t, void *)); + void *pgcookie; +{ + mp->pgin = pgin; + mp->pgout = pgout; + mp->pgcookie = pgcookie; +} + +/* + * MPOOL_NEW -- get a new page + * + * Parameters: + * mp: mpool cookie + * pgnoadddr: place to store new page number + * Returns: + * RET_ERROR, RET_SUCCESS + */ +void * +mpool_new(mp, pgnoaddr) + MPOOL *mp; + pgno_t *pgnoaddr; +{ + BKT *b; + BKTHDR *hp; + +#ifdef STATISTICS + ++mp->pagenew; +#endif + /* + * Get a BKT from the cache. Assign a new page number, attach it to + * the hash and lru chains and return. + */ + if ((b = mpool_bkt(mp)) == NULL) + return (NULL); + *pgnoaddr = b->pgno = mp->npages++; + b->flags = MPOOL_PINNED; + inshash(b, b->pgno); + inschain(b, &mp->lru); + return (b->page); +} + +/* + * MPOOL_GET -- get a page from the pool + * + * Parameters: + * mp: mpool cookie + * pgno: page number + * flags: not used + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +void * +mpool_get(mp, pgno, flags) + MPOOL *mp; + pgno_t pgno; + u_int flags; /* XXX not used? */ +{ + BKT *b; + BKTHDR *hp; + off_t off; + int nr; + + /* + * If asking for a specific page that is already in the cache, find + * it and return it. + */ + if (b = mpool_look(mp, pgno)) { +#ifdef STATISTICS + ++mp->pageget; +#endif +#ifdef DEBUG + if (b->flags & MPOOL_PINNED) + __mpoolerr("mpool_get: page %d already pinned", + b->pgno); +#endif + rmchain(b); + inschain(b, &mp->lru); + b->flags |= MPOOL_PINNED; + return (b->page); + } + + /* Not allowed to retrieve a non-existent page. */ + if (pgno >= mp->npages) { + errno = EINVAL; + return (NULL); + } + + /* Get a page from the cache. */ + if ((b = mpool_bkt(mp)) == NULL) + return (NULL); + b->pgno = pgno; + b->flags = MPOOL_PINNED; + +#ifdef STATISTICS + ++mp->pageread; +#endif + /* Read in the contents. */ + off = mp->pagesize * pgno; + if (lseek(mp->fd, off, SEEK_SET) != off) + return (NULL); + if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { + if (nr >= 0) + errno = EFTYPE; + return (NULL); + } + if (mp->pgin) + (mp->pgin)(mp->pgcookie, b->pgno, b->page); + + inshash(b, b->pgno); + inschain(b, &mp->lru); +#ifdef STATISTICS + ++mp->pageget; +#endif + return (b->page); +} + +/* + * MPOOL_PUT -- return a page to the pool + * + * Parameters: + * mp: mpool cookie + * page: page pointer + * pgno: page number + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +mpool_put(mp, page, flags) + MPOOL *mp; + void *page; + u_int flags; +{ + BKT *baddr; +#ifdef DEBUG + BKT *b; +#endif + +#ifdef STATISTICS + ++mp->pageput; +#endif + baddr = (BKT *)((char *)page - sizeof(BKT)); +#ifdef DEBUG + if (!(baddr->flags & MPOOL_PINNED)) + __mpoolerr("mpool_put: page %d not pinned", b->pgno); + for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { + if (b == (BKT *)&mp->lru) + __mpoolerr("mpool_put: %0x: bad address", baddr); + if (b == baddr) + break; + } +#endif + baddr->flags &= ~MPOOL_PINNED; + baddr->flags |= flags & MPOOL_DIRTY; + return (RET_SUCCESS); +} + +/* + * MPOOL_CLOSE -- close the buffer pool + * + * Parameters: + * mp: mpool cookie + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +mpool_close(mp) + MPOOL *mp; +{ + BKT *b, *next; + + /* Free up any space allocated to the lru pages. */ + for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { + next = b->cprev; + free(b); + } + free(mp); + return (RET_SUCCESS); +} + +/* + * MPOOL_SYNC -- sync the file to disk. + * + * Parameters: + * mp: mpool cookie + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +mpool_sync(mp) + MPOOL *mp; +{ + BKT *b; + + for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) + if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) + return (RET_ERROR); + return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); +} + +/* + * MPOOL_BKT -- get/create a BKT from the cache + * + * Parameters: + * mp: mpool cookie + * + * Returns: + * NULL on failure and a pointer to the BKT on success + */ +static BKT * +mpool_bkt(mp) + MPOOL *mp; +{ + BKT *b; + + if (mp->curcache < mp->maxcache) + goto new; + + /* + * If the cache is maxxed out, search the lru list for a buffer we + * can flush. If we find one, write it if necessary and take it off + * any lists. If we don't find anything we grow the cache anyway. + * The cache never shrinks. + */ + for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) + if (!(b->flags & MPOOL_PINNED)) { + if (b->flags & MPOOL_DIRTY && + mpool_write(mp, b) == RET_ERROR) + return (NULL); + rmhash(b); + rmchain(b); +#ifdef STATISTICS + ++mp->pageflush; +#endif +#ifdef DEBUG + { + void *spage; + spage = b->page; + memset(b, 0xff, sizeof(BKT) + mp->pagesize); + b->page = spage; + } +#endif + return (b); + } + +new: if ((b = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) + return (NULL); +#ifdef STATISTICS + ++mp->pagealloc; +#endif +#ifdef DEBUG + memset(b, 0xff, sizeof(BKT) + mp->pagesize); +#endif + b->page = (char *)b + sizeof(BKT); + ++mp->curcache; + return (b); +} + +/* + * MPOOL_WRITE -- sync a page to disk + * + * Parameters: + * mp: mpool cookie + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +static int +mpool_write(mp, b) + MPOOL *mp; + BKT *b; +{ + off_t off; + + if (mp->pgout) + (mp->pgout)(mp->pgcookie, b->pgno, b->page); + +#ifdef STATISTICS + ++mp->pagewrite; +#endif + off = mp->pagesize * b->pgno; + if (lseek(mp->fd, off, SEEK_SET) != off) + return (RET_ERROR); + if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) + return (RET_ERROR); + b->flags &= ~MPOOL_DIRTY; + return (RET_SUCCESS); +} + +/* + * MPOOL_LOOK -- lookup a page + * + * Parameters: + * mp: mpool cookie + * pgno: page number + * + * Returns: + * NULL on failure and a pointer to the BKT on success + */ +static BKT * +mpool_look(mp, pgno) + MPOOL *mp; + pgno_t pgno; +{ + register BKT *b; + register BKTHDR *tb; + + /* XXX + * If find the buffer, put it first on the hash chain so can + * find it again quickly. + */ + tb = &mp->hashtable[HASHKEY(pgno)]; + for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) + if (b->pgno == pgno) { +#ifdef STATISTICS + ++mp->cachehit; +#endif + return (b); + } +#ifdef STATISTICS + ++mp->cachemiss; +#endif + return (NULL); +} + +#ifdef STATISTICS +/* + * MPOOL_STAT -- cache statistics + * + * Parameters: + * mp: mpool cookie + */ +void +mpool_stat(mp) + MPOOL *mp; +{ + BKT *b; + int cnt; + char *sep; + + (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); + (void)fprintf(stderr, + "page size %lu, cacheing %lu pages of %lu page max cache\n", + mp->pagesize, mp->curcache, mp->maxcache); + (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", + mp->pageput, mp->pageget, mp->pagenew); + (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", + mp->pagealloc, mp->pageflush); + if (mp->cachehit + mp->cachemiss) + (void)fprintf(stderr, + "%.0f%% cache hit rate (%lu hits, %lu misses)\n", + ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) + * 100, mp->cachehit, mp->cachemiss); + (void)fprintf(stderr, "%lu page reads, %lu page writes\n", + mp->pageread, mp->pagewrite); + + sep = ""; + cnt = 0; + for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { + (void)fprintf(stderr, "%s%d", sep, b->pgno); + if (b->flags & MPOOL_DIRTY) + (void)fprintf(stderr, "d"); + if (b->flags & MPOOL_PINNED) + (void)fprintf(stderr, "P"); + if (++cnt == 10) { + sep = "\n"; + cnt = 0; + } else + sep = ", "; + + } + (void)fprintf(stderr, "\n"); +} +#endif + +#ifdef DEBUG +#if __STDC__ +#include <stdarg.h> +#else +#include <varargs.h> +#endif + +static void +#if __STDC__ +__mpoolerr(const char *fmt, ...) +#else +__mpoolerr(fmt, va_alist) + char *fmt; + va_dcl +#endif +{ + va_list ap; +#if __STDC__ + va_start(ap, fmt); +#else + va_start(ap); +#endif + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + (void)fprintf(stderr, "\n"); + abort(); + /* NOTREACHED */ +} +#endif diff --git a/lib/libc/db/recno/Makefile.inc b/lib/libc/db/recno/Makefile.inc new file mode 100644 index 0000000..e49e225 --- /dev/null +++ b/lib/libc/db/recno/Makefile.inc @@ -0,0 +1,6 @@ +# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 + +.PATH: ${.CURDIR}/db/recno + +SRCS+= rec_close.c rec_delete.c rec_get.c rec_open.c rec_put.c rec_search.c \ + rec_seq.c rec_utils.c diff --git a/lib/libc/db/recno/extern.h b/lib/libc/db/recno/extern.h new file mode 100644 index 0000000..2188131 --- /dev/null +++ b/lib/libc/db/recno/extern.h @@ -0,0 +1,54 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.2 (Berkeley) 2/21/94 + */ + +#include "../btree/extern.h" + +int __rec_close __P((DB *)); +int __rec_delete __P((const DB *, const DBT *, u_int)); +int __rec_dleaf __P((BTREE *, PAGE *, indx_t)); +int __rec_fd __P((const DB *)); +int __rec_fmap __P((BTREE *, recno_t)); +int __rec_fout __P((BTREE *)); +int __rec_fpipe __P((BTREE *, recno_t)); +int __rec_get __P((const DB *, const DBT *, DBT *, u_int)); +int __rec_iput __P((BTREE *, recno_t, const DBT *, u_int)); +int __rec_put __P((const DB *dbp, DBT *, const DBT *, u_int)); +int __rec_ret __P((BTREE *, EPG *, recno_t, DBT *, DBT *)); +EPG *__rec_search __P((BTREE *, recno_t, enum SRCHOP)); +int __rec_seq __P((const DB *, DBT *, DBT *, u_int)); +int __rec_sync __P((const DB *, u_int)); +int __rec_vmap __P((BTREE *, recno_t)); +int __rec_vout __P((BTREE *)); +int __rec_vpipe __P((BTREE *, recno_t)); diff --git a/lib/libc/db/recno/rec_close.c b/lib/libc/db/recno/rec_close.c new file mode 100644 index 0000000..a96d4f1 --- /dev/null +++ b/lib/libc/db/recno/rec_close.c @@ -0,0 +1,163 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_close.c 8.3 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/mman.h> + +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <unistd.h> + +#include <db.h> +#include "recno.h" + +/* + * __REC_CLOSE -- Close a recno tree. + * + * Parameters: + * dbp: pointer to access method + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_close(dbp) + DB *dbp; +{ + BTREE *t; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + if (__rec_sync(dbp, 0) == RET_ERROR) + return (RET_ERROR); + + /* Committed to closing. */ + status = RET_SUCCESS; + if (ISSET(t, R_MEMMAPPED) && munmap(t->bt_smap, t->bt_msize)) + status = RET_ERROR; + + if (!ISSET(t, R_INMEM)) + if (ISSET(t, R_CLOSEFP)) { + if (fclose(t->bt_rfp)) + status = RET_ERROR; + } else + if (close(t->bt_rfd)) + status = RET_ERROR; + + if (__bt_close(dbp) == RET_ERROR) + status = RET_ERROR; + + return (status); +} + +/* + * __REC_SYNC -- sync the recno tree to disk. + * + * Parameters: + * dbp: pointer to access method + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__rec_sync(dbp, flags) + const DB *dbp; + u_int flags; +{ + struct iovec iov[2]; + BTREE *t; + DBT data, key; + off_t off; + recno_t scursor, trec; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + if (flags == R_RECNOSYNC) + return (__bt_sync(dbp, 0)); + + if (ISSET(t, R_RDONLY | R_INMEM) || !ISSET(t, R_MODIFIED)) + return (RET_SUCCESS); + + /* Read any remaining records into the tree. */ + if (!ISSET(t, R_EOF) && t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) + return (RET_ERROR); + + /* Rewind the file descriptor. */ + if (lseek(t->bt_rfd, (off_t)0, SEEK_SET) != 0) + return (RET_ERROR); + + iov[1].iov_base = "\n"; + iov[1].iov_len = 1; + scursor = t->bt_rcursor; + + key.size = sizeof(recno_t); + key.data = &trec; + + status = (dbp->seq)(dbp, &key, &data, R_FIRST); + while (status == RET_SUCCESS) { + iov[0].iov_base = data.data; + iov[0].iov_len = data.size; + if (writev(t->bt_rfd, iov, 2) != data.size + 1) + return (RET_ERROR); + status = (dbp->seq)(dbp, &key, &data, R_NEXT); + } + t->bt_rcursor = scursor; + if (status == RET_ERROR) + return (RET_ERROR); + if ((off = lseek(t->bt_rfd, (off_t)0, SEEK_CUR)) == -1) + return (RET_ERROR); + if (ftruncate(t->bt_rfd, off)) + return (RET_ERROR); + CLR(t, R_MODIFIED); + return (RET_SUCCESS); +} diff --git a/lib/libc/db/recno/rec_delete.c b/lib/libc/db/recno/rec_delete.c new file mode 100644 index 0000000..35f56b9 --- /dev/null +++ b/lib/libc/db/recno/rec_delete.c @@ -0,0 +1,197 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_delete.c 8.4 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <string.h> + +#include <db.h> +#include "recno.h" + +static int rec_rdelete __P((BTREE *, recno_t)); + +/* + * __REC_DELETE -- Delete the item(s) referenced by a key. + * + * Parameters: + * dbp: pointer to access method + * key: key to delete + * flags: R_CURSOR if deleting what the cursor references + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + */ +int +__rec_delete(dbp, key, flags) + const DB *dbp; + const DBT *key; + u_int flags; +{ + BTREE *t; + recno_t nrec; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + switch(flags) { + case 0: + if ((nrec = *(recno_t *)key->data) == 0) + goto einval; + if (nrec > t->bt_nrecs) + return (RET_SPECIAL); + --nrec; + status = rec_rdelete(t, nrec); + break; + case R_CURSOR: + if (!ISSET(t, B_SEQINIT)) + goto einval; + if (t->bt_nrecs == 0) + return (RET_SPECIAL); + status = rec_rdelete(t, t->bt_rcursor - 1); + if (status == RET_SUCCESS) + --t->bt_rcursor; + break; + default: +einval: errno = EINVAL; + return (RET_ERROR); + } + + if (status == RET_SUCCESS) + SET(t, B_MODIFIED | R_MODIFIED); + return (status); +} + +/* + * REC_RDELETE -- Delete the data matching the specified key. + * + * Parameters: + * tree: tree + * nrec: record to delete + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + */ +static int +rec_rdelete(t, nrec) + BTREE *t; + recno_t nrec; +{ + EPG *e; + PAGE *h; + int status; + + /* Find the record; __rec_search pins the page. */ + if ((e = __rec_search(t, nrec, SDELETE)) == NULL) + return (RET_ERROR); + + /* Delete the record. */ + h = e->page; + status = __rec_dleaf(t, h, e->index); + if (status != RET_SUCCESS) { + mpool_put(t->bt_mp, h, 0); + return (status); + } + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + return (RET_SUCCESS); +} + +/* + * __REC_DLEAF -- Delete a single record from a recno leaf page. + * + * Parameters: + * t: tree + * index: index on current page to delete + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__rec_dleaf(t, h, index) + BTREE *t; + PAGE *h; + indx_t index; +{ + register RLEAF *rl; + register indx_t *ip, cnt, offset; + register size_t nbytes; + char *from; + void *to; + + /* + * Delete a record from a recno leaf page. Internal records are never + * deleted from internal pages, regardless of the records that caused + * them to be added being deleted. Pages made empty by deletion are + * not reclaimed. They are, however, made available for reuse. + * + * Pack the remaining entries at the end of the page, shift the indices + * down, overwriting the deleted record and its index. If the record + * uses overflow pages, make them available for reuse. + */ + to = rl = GETRLEAF(h, index); + if (rl->flags & P_BIGDATA && __ovfl_delete(t, rl->bytes) == RET_ERROR) + return (RET_ERROR); + nbytes = NRLEAF(rl); + + /* + * Compress the key/data pairs. Compress and adjust the [BR]LEAF + * offsets. Reset the headers. + */ + from = (char *)h + h->upper; + memmove(from + nbytes, from, (char *)to - from); + h->upper += nbytes; + + offset = h->linp[index]; + for (cnt = &h->linp[index] - (ip = &h->linp[0]); cnt--; ++ip) + if (ip[0] < offset) + ip[0] += nbytes; + for (cnt = &h->linp[NEXTINDEX(h)] - ip; --cnt; ++ip) + ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1]; + h->lower -= sizeof(indx_t); + --t->bt_nrecs; + return (RET_SUCCESS); +} diff --git a/lib/libc/db/recno/rec_get.c b/lib/libc/db/recno/rec_get.c new file mode 100644 index 0000000..3555575 --- /dev/null +++ b/lib/libc/db/recno/rec_get.c @@ -0,0 +1,298 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_get.c 8.4 (Berkeley) 3/1/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <db.h> +#include "recno.h" + +/* + * __REC_GET -- Get a record from the btree. + * + * Parameters: + * dbp: pointer to access method + * key: key to find + * data: data to return + * flag: currently unused + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found. + */ +int +__rec_get(dbp, key, data, flags) + const DB *dbp; + const DBT *key; + DBT *data; + u_int flags; +{ + BTREE *t; + EPG *e; + recno_t nrec; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* Get currently doesn't take any flags, and keys of 0 are illegal. */ + if (flags || (nrec = *(recno_t *)key->data) == 0) { + errno = EINVAL; + return (RET_ERROR); + } + + /* + * If we haven't seen this record yet, try to find it in the + * original file. + */ + if (nrec > t->bt_nrecs) { + if (ISSET(t, R_EOF | R_INMEM)) + return (RET_SPECIAL); + if ((status = t->bt_irec(t, nrec)) != RET_SUCCESS) + return (status); + } + + --nrec; + if ((e = __rec_search(t, nrec, SEARCH)) == NULL) + return (RET_ERROR); + + status = __rec_ret(t, e, 0, NULL, data); + if (ISSET(t, B_DB_LOCK)) + mpool_put(t->bt_mp, e->page, 0); + else + t->bt_pinned = e->page; + return (status); +} + +/* + * __REC_FPIPE -- Get fixed length records from a pipe. + * + * Parameters: + * t: tree + * cnt: records to read + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_fpipe(t, top) + BTREE *t; + recno_t top; +{ + DBT data; + recno_t nrec; + size_t len; + int ch; + char *p; + + if (t->bt_dbufsz < t->bt_reclen) { + if ((t->bt_dbuf = + (char *)realloc(t->bt_dbuf, t->bt_reclen)) == NULL) + return (RET_ERROR); + t->bt_dbufsz = t->bt_reclen; + } + data.data = t->bt_dbuf; + data.size = t->bt_reclen; + + for (nrec = t->bt_nrecs; nrec < top; ++nrec) { + len = t->bt_reclen; + for (p = t->bt_dbuf;; *p++ = ch) + if ((ch = getc(t->bt_rfp)) == EOF || !len--) { + if (__rec_iput(t, nrec, &data, 0) + != RET_SUCCESS) + return (RET_ERROR); + break; + } + if (ch == EOF) + break; + } + if (nrec < top) { + SET(t, R_EOF); + return (RET_SPECIAL); + } + return (RET_SUCCESS); +} + +/* + * __REC_VPIPE -- Get variable length records from a pipe. + * + * Parameters: + * t: tree + * cnt: records to read + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_vpipe(t, top) + BTREE *t; + recno_t top; +{ + DBT data; + recno_t nrec; + indx_t len; + size_t sz; + int bval, ch; + char *p; + + bval = t->bt_bval; + for (nrec = t->bt_nrecs; nrec < top; ++nrec) { + for (p = t->bt_dbuf, sz = t->bt_dbufsz;; *p++ = ch, --sz) { + if ((ch = getc(t->bt_rfp)) == EOF || ch == bval) { + data.data = t->bt_dbuf; + data.size = p - t->bt_dbuf; + if (ch == EOF && data.size == 0) + break; + if (__rec_iput(t, nrec, &data, 0) + != RET_SUCCESS) + return (RET_ERROR); + break; + } + if (sz == 0) { + len = p - t->bt_dbuf; + t->bt_dbufsz += (sz = 256); + if ((t->bt_dbuf = (char *)realloc(t->bt_dbuf, + t->bt_dbufsz)) == NULL) + return (RET_ERROR); + p = t->bt_dbuf + len; + } + } + if (ch == EOF) + break; + } + if (nrec < top) { + SET(t, R_EOF); + return (RET_SPECIAL); + } + return (RET_SUCCESS); +} + +/* + * __REC_FMAP -- Get fixed length records from a file. + * + * Parameters: + * t: tree + * cnt: records to read + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_fmap(t, top) + BTREE *t; + recno_t top; +{ + DBT data; + recno_t nrec; + caddr_t sp, ep; + size_t len; + char *p; + + if (t->bt_dbufsz < t->bt_reclen) { + if ((t->bt_dbuf = + (char *)realloc(t->bt_dbuf, t->bt_reclen)) == NULL) + return (RET_ERROR); + t->bt_dbufsz = t->bt_reclen; + } + data.data = t->bt_dbuf; + data.size = t->bt_reclen; + + sp = t->bt_cmap; + ep = t->bt_emap; + for (nrec = t->bt_nrecs; nrec < top; ++nrec) { + if (sp >= ep) { + SET(t, R_EOF); + return (RET_SPECIAL); + } + len = t->bt_reclen; + for (p = t->bt_dbuf; sp < ep && len--; *p++ = *sp++); + memset(p, t->bt_bval, len); + if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS) + return (RET_ERROR); + } + t->bt_cmap = sp; + return (RET_SUCCESS); +} + +/* + * __REC_VMAP -- Get variable length records from a file. + * + * Parameters: + * t: tree + * cnt: records to read + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_vmap(t, top) + BTREE *t; + recno_t top; +{ + DBT data; + caddr_t sp, ep; + recno_t nrec; + int bval; + + sp = t->bt_cmap; + ep = t->bt_emap; + bval = t->bt_bval; + + for (nrec = t->bt_nrecs; nrec < top; ++nrec) { + if (sp >= ep) { + SET(t, R_EOF); + return (RET_SPECIAL); + } + for (data.data = sp; sp < ep && *sp != bval; ++sp); + data.size = sp - (caddr_t)data.data; + if (__rec_iput(t, nrec, &data, 0) != RET_SUCCESS) + return (RET_ERROR); + ++sp; + } + t->bt_cmap = sp; + return (RET_SUCCESS); +} diff --git a/lib/libc/db/recno/rec_open.c b/lib/libc/db/recno/rec_open.c new file mode 100644 index 0000000..2a0f354 --- /dev/null +++ b/lib/libc/db/recno/rec_open.c @@ -0,0 +1,230 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_open.c 8.6 (Berkeley) 2/22/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <unistd.h> + +#include <db.h> +#include "recno.h" + +DB * +__rec_open(fname, flags, mode, openinfo, dflags) + const char *fname; + int flags, mode, dflags; + const RECNOINFO *openinfo; +{ + BTREE *t; + BTREEINFO btopeninfo; + DB *dbp; + PAGE *h; + struct stat sb; + int rfd, sverrno; + + /* Open the user's file -- if this fails, we're done. */ + if (fname != NULL && (rfd = open(fname, flags, mode)) < 0) + return (NULL); + + /* Create a btree in memory (backed by disk). */ + dbp = NULL; + if (openinfo) { + if (openinfo->flags & ~(R_FIXEDLEN | R_NOKEY | R_SNAPSHOT)) + goto einval; + btopeninfo.flags = 0; + btopeninfo.cachesize = openinfo->cachesize; + btopeninfo.maxkeypage = 0; + btopeninfo.minkeypage = 0; + btopeninfo.psize = openinfo->psize; + btopeninfo.compare = NULL; + btopeninfo.prefix = NULL; + btopeninfo.lorder = openinfo->lorder; + dbp = __bt_open(openinfo->bfname, + O_RDWR, S_IRUSR | S_IWUSR, &btopeninfo, dflags); + } else + dbp = __bt_open(NULL, O_RDWR, S_IRUSR | S_IWUSR, NULL, dflags); + if (dbp == NULL) + goto err; + + /* + * Some fields in the tree structure are recno specific. Fill them + * in and make the btree structure look like a recno structure. We + * don't change the bt_ovflsize value, it's close enough and slightly + * bigger. + */ + t = dbp->internal; + if (openinfo) { + if (openinfo->flags & R_FIXEDLEN) { + SET(t, R_FIXLEN); + t->bt_reclen = openinfo->reclen; + if (t->bt_reclen == 0) + goto einval; + } + t->bt_bval = openinfo->bval; + } else + t->bt_bval = '\n'; + + SET(t, R_RECNO); + if (fname == NULL) + SET(t, R_EOF | R_INMEM); + else + t->bt_rfd = rfd; + t->bt_rcursor = 0; + + if (fname != NULL) { + /* + * In 4.4BSD, stat(2) returns true for ISSOCK on pipes. + * Unfortunately, that's not portable, so we use lseek + * and check the errno values. + */ + errno = 0; + if (lseek(rfd, (off_t)0, SEEK_CUR) == -1 && errno == ESPIPE) { + switch (flags & O_ACCMODE) { + case O_RDONLY: + SET(t, R_RDONLY); + break; + default: + goto einval; + } +slow: if ((t->bt_rfp = fdopen(rfd, "r")) == NULL) + goto err; + SET(t, R_CLOSEFP); + t->bt_irec = + ISSET(t, R_FIXLEN) ? __rec_fpipe : __rec_vpipe; + } else { + switch (flags & O_ACCMODE) { + case O_RDONLY: + SET(t, R_RDONLY); + break; + case O_RDWR: + break; + default: + goto einval; + } + + if (fstat(rfd, &sb)) + goto err; + /* + * Kluge -- we'd like to test to see if the file is too + * big to mmap. Since, we don't know what size or type + * off_t's or size_t's are, what the largest unsigned + * integral type is, or what random insanity the local + * C compiler will perpetrate, doing the comparison in + * a portable way is flatly impossible. Hope that mmap + * fails if the file is too large. + */ + if (sb.st_size == 0) + SET(t, R_EOF); + else { + t->bt_msize = sb.st_size; + if ((t->bt_smap = mmap(NULL, t->bt_msize, + PROT_READ, MAP_PRIVATE, rfd, + (off_t)0)) == (caddr_t)-1) + goto slow; + t->bt_cmap = t->bt_smap; + t->bt_emap = t->bt_smap + sb.st_size; + t->bt_irec = ISSET(t, R_FIXLEN) ? + __rec_fmap : __rec_vmap; + SET(t, R_MEMMAPPED); + } + } + } + + /* Use the recno routines. */ + dbp->close = __rec_close; + dbp->del = __rec_delete; + dbp->fd = __rec_fd; + dbp->get = __rec_get; + dbp->put = __rec_put; + dbp->seq = __rec_seq; + dbp->sync = __rec_sync; + + /* If the root page was created, reset the flags. */ + if ((h = mpool_get(t->bt_mp, P_ROOT, 0)) == NULL) + goto err; + if ((h->flags & P_TYPE) == P_BLEAF) { + h->flags = h->flags & ~P_TYPE | P_RLEAF; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + } else + mpool_put(t->bt_mp, h, 0); + + if (openinfo && openinfo->flags & R_SNAPSHOT && + !ISSET(t, R_EOF | R_INMEM) && + t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) + goto err; + return (dbp); + +einval: errno = EINVAL; +err: sverrno = errno; + if (dbp != NULL) + (void)__bt_close(dbp); + if (fname != NULL) + (void)close(rfd); + errno = sverrno; + return (NULL); +} + +int +__rec_fd(dbp) + const DB *dbp; +{ + BTREE *t; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + /* In-memory database can't have a file descriptor. */ + if (ISSET(t, R_INMEM)) { + errno = ENOENT; + return (-1); + } + return (t->bt_rfd); +} diff --git a/lib/libc/db/recno/rec_put.c b/lib/libc/db/recno/rec_put.c new file mode 100644 index 0000000..8c3bae5 --- /dev/null +++ b/lib/libc/db/recno/rec_put.c @@ -0,0 +1,253 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_put.c 8.3 (Berkeley) 3/1/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <db.h> +#include "recno.h" + +/* + * __REC_PUT -- Add a recno item to the tree. + * + * Parameters: + * dbp: pointer to access method + * key: key + * data: data + * flag: R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE + * + * Returns: + * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is + * already in the tree and R_NOOVERWRITE specified. + */ +int +__rec_put(dbp, key, data, flags) + const DB *dbp; + DBT *key; + const DBT *data; + u_int flags; +{ + BTREE *t; + DBT tdata; + recno_t nrec; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + switch (flags) { + case R_CURSOR: + if (!ISSET(t, B_SEQINIT)) + goto einval; + nrec = t->bt_rcursor; + break; + case R_SETCURSOR: + if ((nrec = *(recno_t *)key->data) == 0) + goto einval; + break; + case R_IAFTER: + if ((nrec = *(recno_t *)key->data) == 0) { + nrec = 1; + flags = R_IBEFORE; + } + break; + case 0: + case R_IBEFORE: + if ((nrec = *(recno_t *)key->data) == 0) + goto einval; + break; + case R_NOOVERWRITE: + if ((nrec = *(recno_t *)key->data) == 0) + goto einval; + if (nrec <= t->bt_nrecs) + return (RET_SPECIAL); + break; + default: +einval: errno = EINVAL; + return (RET_ERROR); + } + + /* + * Make sure that records up to and including the put record are + * already in the database. If skipping records, create empty ones. + */ + if (nrec > t->bt_nrecs) { + if (!ISSET(t, R_EOF | R_INMEM) && + t->bt_irec(t, nrec) == RET_ERROR) + return (RET_ERROR); + if (nrec > t->bt_nrecs + 1) { + if (ISSET(t, R_FIXLEN)) { + if ((tdata.data = + (void *)malloc(t->bt_reclen)) == NULL) + return (RET_ERROR); + tdata.size = t->bt_reclen; + memset(tdata.data, t->bt_bval, tdata.size); + } else { + tdata.data = NULL; + tdata.size = 0; + } + while (nrec > t->bt_nrecs + 1) + if (__rec_iput(t, + t->bt_nrecs, &tdata, 0) != RET_SUCCESS) + return (RET_ERROR); + if (ISSET(t, R_FIXLEN)) + free(tdata.data); + } + } + + if ((status = __rec_iput(t, nrec - 1, data, flags)) != RET_SUCCESS) + return (status); + + if (flags == R_SETCURSOR) + t->bt_rcursor = nrec; + + SET(t, R_MODIFIED); + return (__rec_ret(t, NULL, nrec, key, NULL)); +} + +/* + * __REC_IPUT -- Add a recno item to the tree. + * + * Parameters: + * t: tree + * nrec: record number + * data: data + * + * Returns: + * RET_ERROR, RET_SUCCESS + */ +int +__rec_iput(t, nrec, data, flags) + BTREE *t; + recno_t nrec; + const DBT *data; + u_int flags; +{ + DBT tdata; + EPG *e; + PAGE *h; + indx_t index, nxtindex; + pgno_t pg; + size_t nbytes; + int dflags, status; + char *dest, db[NOVFLSIZE]; + + /* + * If the data won't fit on a page, store it on indirect pages. + * + * XXX + * If the insert fails later on, these pages aren't recovered. + */ + if (data->size > t->bt_ovflsize) { + if (__ovfl_put(t, data, &pg) == RET_ERROR) + return (RET_ERROR); + tdata.data = db; + tdata.size = NOVFLSIZE; + *(pgno_t *)db = pg; + *(size_t *)(db + sizeof(pgno_t)) = data->size; + dflags = P_BIGDATA; + data = &tdata; + } else + dflags = 0; + + /* __rec_search pins the returned page. */ + if ((e = __rec_search(t, nrec, + nrec > t->bt_nrecs || flags == R_IAFTER || flags == R_IBEFORE ? + SINSERT : SEARCH)) == NULL) + return (RET_ERROR); + + h = e->page; + index = e->index; + + /* + * Add the specified key/data pair to the tree. The R_IAFTER and + * R_IBEFORE flags insert the key after/before the specified key. + * + * Pages are split as required. + */ + switch (flags) { + case R_IAFTER: + ++index; + break; + case R_IBEFORE: + break; + default: + if (nrec < t->bt_nrecs && + __rec_dleaf(t, h, index) == RET_ERROR) { + mpool_put(t->bt_mp, h, 0); + return (RET_ERROR); + } + break; + } + + /* + * If not enough room, split the page. The split code will insert + * the key and data and unpin the current page. If inserting into + * the offset array, shift the pointers up. + */ + nbytes = NRLEAFDBT(data->size); + if (h->upper - h->lower < nbytes + sizeof(indx_t)) { + status = __bt_split(t, h, NULL, data, dflags, nbytes, index); + if (status == RET_SUCCESS) + ++t->bt_nrecs; + return (status); + } + + if (index < (nxtindex = NEXTINDEX(h))) + memmove(h->linp + index + 1, h->linp + index, + (nxtindex - index) * sizeof(indx_t)); + h->lower += sizeof(indx_t); + + h->linp[index] = h->upper -= nbytes; + dest = (char *)h + h->upper; + WR_RLEAF(dest, data, dflags); + + ++t->bt_nrecs; + SET(t, B_MODIFIED); + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + + return (RET_SUCCESS); +} diff --git a/lib/libc/db/recno/rec_search.c b/lib/libc/db/recno/rec_search.c new file mode 100644 index 0000000..5d5df3c --- /dev/null +++ b/lib/libc/db/recno/rec_search.c @@ -0,0 +1,127 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_search.c 8.3 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <stdio.h> + +#include <db.h> +#include "recno.h" + +/* + * __REC_SEARCH -- Search a btree for a key. + * + * Parameters: + * t: tree to search + * recno: key to find + * op: search operation + * + * Returns: + * EPG for matching record, if any, or the EPG for the location of the + * key, if it were inserted into the tree. + * + * Returns: + * The EPG for matching record, if any, or the EPG for the location + * of the key, if it were inserted into the tree, is entered into + * the bt_cur field of the tree. A pointer to the field is returned. + */ +EPG * +__rec_search(t, recno, op) + BTREE *t; + recno_t recno; + enum SRCHOP op; +{ + register indx_t index; + register PAGE *h; + EPGNO *parent; + RINTERNAL *r; + pgno_t pg; + indx_t top; + recno_t total; + int sverrno; + + BT_CLR(t); + for (pg = P_ROOT, total = 0;;) { + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) + goto err; + if (h->flags & P_RLEAF) { + t->bt_cur.page = h; + t->bt_cur.index = recno - total; + return (&t->bt_cur); + } + for (index = 0, top = NEXTINDEX(h);;) { + r = GETRINTERNAL(h, index); + if (++index == top || total + r->nrecs > recno) + break; + total += r->nrecs; + } + + if (__bt_push(t, pg, index - 1) == RET_ERROR) + return (NULL); + + pg = r->pgno; + switch (op) { + case SDELETE: + --GETRINTERNAL(h, (index - 1))->nrecs; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + break; + case SINSERT: + ++GETRINTERNAL(h, (index - 1))->nrecs; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + break; + case SEARCH: + mpool_put(t->bt_mp, h, 0); + break; + } + + } + /* Try and recover the tree. */ +err: sverrno = errno; + if (op != SEARCH) + while ((parent = BT_POP(t)) != NULL) { + if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL) + break; + if (op == SINSERT) + --GETRINTERNAL(h, parent->index)->nrecs; + else + ++GETRINTERNAL(h, parent->index)->nrecs; + mpool_put(t->bt_mp, h, MPOOL_DIRTY); + } + errno = sverrno; + return (NULL); +} diff --git a/lib/libc/db/recno/rec_seq.c b/lib/libc/db/recno/rec_seq.c new file mode 100644 index 0000000..bc66d1c --- /dev/null +++ b/lib/libc/db/recno/rec_seq.c @@ -0,0 +1,131 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char sccsid[] = "@(#)rec_seq.c 8.2 (Berkeley) 9/7/93"; +#endif /* not lint */ + +#include <sys/types.h> + +#include <errno.h> +#include <limits.h> +#include <stdio.h> +#include <string.h> + +#include <db.h> +#include "recno.h" + +/* + * __REC_SEQ -- Recno sequential scan interface. + * + * Parameters: + * dbp: pointer to access method + * key: key for positioning and return value + * data: data return value + * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV. + * + * Returns: + * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key. + */ +int +__rec_seq(dbp, key, data, flags) + const DB *dbp; + DBT *key, *data; + u_int flags; +{ + BTREE *t; + EPG *e; + recno_t nrec; + int status; + + t = dbp->internal; + + /* Toss any page pinned across calls. */ + if (t->bt_pinned != NULL) { + mpool_put(t->bt_mp, t->bt_pinned, 0); + t->bt_pinned = NULL; + } + + switch(flags) { + case R_CURSOR: + if ((nrec = *(recno_t *)key->data) == 0) + goto einval; + break; + case R_NEXT: + if (ISSET(t, B_SEQINIT)) { + nrec = t->bt_rcursor + 1; + break; + } + /* FALLTHROUGH */ + case R_FIRST: + nrec = 1; + break; + case R_PREV: + if (ISSET(t, B_SEQINIT)) { + if ((nrec = t->bt_rcursor - 1) == 0) + return (RET_SPECIAL); + break; + } + /* FALLTHROUGH */ + case R_LAST: + if (!ISSET(t, R_EOF | R_INMEM) && + t->bt_irec(t, MAX_REC_NUMBER) == RET_ERROR) + return (RET_ERROR); + nrec = t->bt_nrecs; + break; + default: +einval: errno = EINVAL; + return (RET_ERROR); + } + + if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) { + if (!ISSET(t, R_EOF | R_INMEM) && + (status = t->bt_irec(t, nrec)) != RET_SUCCESS) + return (status); + if (t->bt_nrecs == 0 || nrec > t->bt_nrecs) + return (RET_SPECIAL); + } + + if ((e = __rec_search(t, nrec - 1, SEARCH)) == NULL) + return (RET_ERROR); + + SET(t, B_SEQINIT); + t->bt_rcursor = nrec; + + status = __rec_ret(t, e, nrec, key, data); + if (ISSET(t, B_DB_LOCK)) + mpool_put(t->bt_mp, e->page, 0); + else + t->bt_pinned = e->page; + return (status); +} diff --git a/lib/libc/db/recno/rec_utils.c b/lib/libc/db/recno/rec_utils.c new file mode 100644 index 0000000..f7fb145 --- /dev/null +++ b/lib/libc/db/recno/rec_utils.c @@ -0,0 +1,114 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)rec_utils.c 8.3 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <db.h> +#include "recno.h" + +/* + * __REC_RET -- Build return data as a result of search or scan. + * + * Parameters: + * t: tree + * d: LEAF to be returned to the user. + * data: user's data structure + * + * Returns: + * RET_SUCCESS, RET_ERROR. + */ +int +__rec_ret(t, e, nrec, key, data) + BTREE *t; + EPG *e; + recno_t nrec; + DBT *key, *data; +{ + register RLEAF *rl; + register void *p; + + if (data == NULL) + goto retkey; + + rl = GETRLEAF(e->page, e->index); + + /* + * We always copy big data to make it contigous. Otherwise, we + * leave the page pinned and don't copy unless the user specified + * concurrent access. + */ + if (rl->flags & P_BIGDATA) { + if (__ovfl_get(t, rl->bytes, + &data->size, &t->bt_dbuf, &t->bt_dbufsz)) + return (RET_ERROR); + data->data = t->bt_dbuf; + } else if (ISSET(t, B_DB_LOCK)) { + /* Use +1 in case the first record retrieved is 0 length. */ + if (rl->dsize + 1 > t->bt_dbufsz) { + if ((p = + (void *)realloc(t->bt_dbuf, rl->dsize + 1)) == NULL) + return (RET_ERROR); + t->bt_dbuf = p; + t->bt_dbufsz = rl->dsize + 1; + } + memmove(t->bt_dbuf, rl->bytes, rl->dsize); + data->size = rl->dsize; + data->data = t->bt_dbuf; + } else { + data->size = rl->dsize; + data->data = rl->bytes; + } + +retkey: if (key == NULL) + return (RET_SUCCESS); + + /* We have to copy the key, it's not on the page. */ + if (sizeof(recno_t) > t->bt_kbufsz) { + if ((p = (void *)realloc(t->bt_kbuf, sizeof(recno_t))) == NULL) + return (RET_ERROR); + t->bt_kbuf = p; + t->bt_kbufsz = sizeof(recno_t); + } + memmove(t->bt_kbuf, &nrec, sizeof(recno_t)); + key->size = sizeof(recno_t); + key->data = t->bt_kbuf; + return (RET_SUCCESS); +} diff --git a/lib/libc/db/recno/recno.h b/lib/libc/db/recno/recno.h new file mode 100644 index 0000000..bec772c --- /dev/null +++ b/lib/libc/db/recno/recno.h @@ -0,0 +1,39 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)recno.h 8.1 (Berkeley) 6/4/93 + */ + +enum SRCHOP { SDELETE, SINSERT, SEARCH}; /* Rec_search operation. */ + +#include "../btree/btree.h" +#include "extern.h" diff --git a/lib/libc/db/test/Makefile b/lib/libc/db/test/Makefile new file mode 100644 index 0000000..c816432 --- /dev/null +++ b/lib/libc/db/test/Makefile @@ -0,0 +1,17 @@ +# @(#)Makefile 8.9 (Berkeley) 2/21/94 + +PROG= dbtest +OBJS= dbtest.o strerror.o + +# Add -DSTATISTICS to CFLAGS to get btree statistical use info. +# Note, the db library has to be compiled for statistics as well. +CFLAGS= -D__DBINTERFACE_PRIVATE -DDEBUG -O ${INC} + +dbtest: ${OBJS} ${LIB} + ${CC} -o $@ ${OBJS} ${LIB} + +strerror.o: ../PORT/clib/strerror.c + ${CC} -c ../PORT/clib/strerror.c + +clean: + rm -f gmon.out ${OBJS} ${PROG} t1 t2 t3 diff --git a/lib/libc/db/test/README b/lib/libc/db/test/README new file mode 100644 index 0000000..8631c77 --- /dev/null +++ b/lib/libc/db/test/README @@ -0,0 +1,55 @@ +# @(#)README 8.2 (Berkeley) 2/21/94 + +To build this portably, try something like: + + make INC="-I../PORT/MACH/ -I../PORT/MACH/include" LIB=../PORT/MACH/libdb.a + +where MACH is the machine, i.e. "sunos.4.1.1". + +To run the tests, enter "sh run.test". If your system dictionary isn't +in /usr/share/dict/words, edit run.test to reflect the correct place. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +The script file consists of lines with a initial character which is +the "command" for that line. Legal characters are as follows: + +c: compare a record + + must be followed by [kK][dD]; the data value in the database + associated with the specified key is compared to the specified + data value. +e: echo a string + + writes out the rest of the line into the output file; if the + last character is not a carriage-return, a newline is appended. +g: do a get command + + must be followed by [kK] + + writes out the retrieved data DBT. +p: do a put command + + must be followed by [kK][dD] +r: do a del command + + must be followed by [kK] +s: do a seq command + + writes out the retrieved data DBT. +f: set the flags for the next command + + no value zero's the flags +D [file]: data file + + set the current data value to the contents of the file +d [data]: + + set the current key value to the contents of the line. +K [file]: key file + + set the current key value to the contents of the file +k [data]: + + set the current key value to the contents of the line. +o [r]: dump [reverse] + + dump the database out, if 'r' is set, in reverse order. + +Options to dbtest are as follows: + + -f: Use the file argument as the database file. + -i: Use the rest of the argument to set elements in the info + structure. If the type is btree, then "-i cachesize=10240" + will set BTREEINFO.cachesize to 10240. + -o: The rest of the argument is the output file instead of + using stdout. + +Dbtest requires two arguments, the type of access "hash", "recno" or +"btree", and the script name. diff --git a/lib/libc/db/test/btree.tests/main.c b/lib/libc/db/test/btree.tests/main.c new file mode 100644 index 0000000..f26e193 --- /dev/null +++ b/lib/libc/db/test/btree.tests/main.c @@ -0,0 +1,765 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Olson. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)main.c 8.1 (Berkeley) 6/4/93"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/param.h> +#include <fcntl.h> +#include <db.h> +#include <errno.h> +#include <stdio.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include "btree.h" + +typedef struct cmd_table { + char *cmd; + int nargs; + int rconv; + void (*func) __P((DB *, char **)); + char *usage, *descrip; +} cmd_table; + +int stopstop; +DB *globaldb; + +void append __P((DB *, char **)); +void bstat __P((DB *, char **)); +void cursor __P((DB *, char **)); +void delcur __P((DB *, char **)); +void delete __P((DB *, char **)); +void dump __P((DB *, char **)); +void first __P((DB *, char **)); +void get __P((DB *, char **)); +void help __P((DB *, char **)); +void iafter __P((DB *, char **)); +void ibefore __P((DB *, char **)); +void icursor __P((DB *, char **)); +void insert __P((DB *, char **)); +void keydata __P((DBT *, DBT *)); +void last __P((DB *, char **)); +void list __P((DB *, char **)); +void load __P((DB *, char **)); +void mstat __P((DB *, char **)); +void next __P((DB *, char **)); +int parse __P((char *, char **, int)); +void previous __P((DB *, char **)); +void show __P((DB *, char **)); +void usage __P((void)); +void user __P((DB *)); + +cmd_table commands[] = { + "?", 0, 0, help, "help", NULL, + "a", 2, 1, append, "append key def", "append key with data def", + "b", 0, 0, bstat, "bstat", "stat btree", + "c", 1, 1, cursor, "cursor word", "move cursor to word", + "delc", 0, 0, delcur, "delcur", "delete key the cursor references", + "dele", 1, 1, delete, "delete word", "delete word", + "d", 0, 0, dump, "dump", "dump database", + "f", 0, 0, first, "first", "move cursor to first record", + "g", 1, 1, get, "get key", "locate key", + "h", 0, 0, help, "help", "print command summary", + "ia", 2, 1, iafter, "iafter key data", "insert data after key", + "ib", 2, 1, ibefore, "ibefore key data", "insert data before key", + "ic", 2, 1, icursor, "icursor key data", "replace cursor", + "in", 2, 1, insert, "insert key def", "insert key with data def", + "la", 0, 0, last, "last", "move cursor to last record", + "li", 1, 1, list, "list file", "list to a file", + "loa", 1, 0, load, "load file", NULL, + "loc", 1, 1, get, "get key", NULL, + "m", 0, 0, mstat, "mstat", "stat memory pool", + "n", 0, 0, next, "next", "move cursor forward one record", + "p", 0, 0, previous, "previous", "move cursor back one record", + "q", 0, 0, NULL, "quit", "quit", + "sh", 1, 0, show, "show page", "dump a page", + { NULL }, +}; + +int recno; /* use record numbers */ +char *dict = "words"; /* default dictionary */ +char *progname; + +int +main(argc, argv) + int argc; + char **argv; +{ + int c; + DB *db; + BTREEINFO b; + + progname = *argv; + + b.flags = 0; + b.cachesize = 0; + b.maxkeypage = 0; + b.minkeypage = 0; + b.psize = 0; + b.compare = NULL; + b.prefix = NULL; + b.lorder = 0; + + while ((c = getopt(argc, argv, "bc:di:lp:ru")) != EOF) { + switch (c) { + case 'b': + b.lorder = BIG_ENDIAN; + break; + case 'c': + b.cachesize = atoi(optarg); + break; + case 'd': + b.flags |= R_DUP; + break; + case 'i': + dict = optarg; + break; + case 'l': + b.lorder = LITTLE_ENDIAN; + break; + case 'p': + b.psize = atoi(optarg); + break; + case 'r': + recno = 1; + break; + case 'u': + b.flags = 0; + break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (recno) + db = dbopen(*argv == NULL ? NULL : *argv, O_RDWR, + 0, DB_RECNO, NULL); + else + db = dbopen(*argv == NULL ? NULL : *argv, O_CREAT|O_RDWR, + 0600, DB_BTREE, &b); + + if (db == NULL) { + (void)fprintf(stderr, "dbopen: %s\n", strerror(errno)); + exit(1); + } + globaldb = db; + user(db); + exit(0); + /* NOTREACHED */ +} + +void +user(db) + DB *db; +{ + FILE *ifp; + int argc, i, last; + char *lbuf, *argv[4], buf[512]; + + if ((ifp = fopen("/dev/tty", "r")) == NULL) { + (void)fprintf(stderr, + "/dev/tty: %s\n", strerror(errno)); + exit(1); + } + for (last = 0;;) { + (void)printf("> "); + (void)fflush(stdout); + if ((lbuf = fgets(&buf[0], 512, ifp)) == NULL) + break; + if (lbuf[0] == '\n') { + i = last; + goto uselast; + } + lbuf[strlen(lbuf) - 1] = '\0'; + + if (lbuf[0] == 'q') + break; + + argc = parse(lbuf, &argv[0], 3); + if (argc == 0) + continue; + + for (i = 0; commands[i].cmd != NULL; i++) + if (strncmp(commands[i].cmd, argv[0], + strlen(commands[i].cmd)) == 0) + break; + + if (commands[i].cmd == NULL) { + (void)fprintf(stderr, + "%s: command unknown ('help' for help)\n", lbuf); + continue; + } + + if (commands[i].nargs != argc - 1) { + (void)fprintf(stderr, "usage: %s\n", commands[i].usage); + continue; + } + + if (recno && commands[i].rconv) { + static recno_t nlong; + nlong = atoi(argv[1]); + argv[1] = (char *)&nlong; + } +uselast: last = i; + (*commands[i].func)(db, argv); + } + if ((db->sync)(db) == RET_ERROR) + perror("dbsync"); + else if ((db->close)(db) == RET_ERROR) + perror("dbclose"); +} + +int +parse(lbuf, argv, maxargc) + char *lbuf, **argv; + int maxargc; +{ + int argc = 0; + char *c; + + c = lbuf; + while (isspace(*c)) + c++; + while (*c != '\0' && argc < maxargc) { + *argv++ = c; + argc++; + while (!isspace(*c) && *c != '\0') { + c++; + } + while (isspace(*c)) + *c++ = '\0'; + } + return (argc); +} + +void +append(db, argv) + DB *db; + char **argv; +{ + DBT key, data; + int status; + + if (!recno) { + (void)fprintf(stderr, + "append only available for recno db's.\n"); + return; + } + key.data = argv[1]; + key.size = sizeof(recno_t); + data.data = argv[2]; + data.size = strlen(data.data); + status = (db->put)(db, &key, &data, R_APPEND); + switch (status) { + case RET_ERROR: + perror("append/put"); + break; + case RET_SPECIAL: + (void)printf("%s (duplicate key)\n", argv[1]); + break; + case RET_SUCCESS: + break; + } +} + +void +cursor(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + key.data = argv[1]; + if (recno) + key.size = sizeof(recno_t); + else + key.size = strlen(argv[1]) + 1; + status = (*db->seq)(db, &key, &data, R_CURSOR); + switch (status) { + case RET_ERROR: + perror("cursor/seq"); + break; + case RET_SPECIAL: + (void)printf("key not found\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +delcur(db, argv) + DB *db; + char **argv; +{ + int status; + + status = (*db->del)(db, NULL, R_CURSOR); + + if (status == RET_ERROR) + perror("delcur/del"); +} + +void +delete(db, argv) + DB *db; + char **argv; +{ + DBT key; + int status; + + key.data = argv[1]; + if (recno) + key.size = sizeof(recno_t); + else + key.size = strlen(argv[1]) + 1; + + status = (*db->del)(db, &key, 0); + switch (status) { + case RET_ERROR: + perror("delete/del"); + break; + case RET_SPECIAL: + (void)printf("key not found\n"); + break; + case RET_SUCCESS: + break; + } +} + +void +dump(db, argv) + DB *db; + char **argv; +{ + __bt_dump(db); +} + +void +first(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + status = (*db->seq)(db, &key, &data, R_FIRST); + + switch (status) { + case RET_ERROR: + perror("first/seq"); + break; + case RET_SPECIAL: + (void)printf("no more keys\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +get(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + key.data = argv[1]; + if (recno) + key.size = sizeof(recno_t); + else + key.size = strlen(argv[1]) + 1; + + status = (*db->get)(db, &key, &data, 0); + + switch (status) { + case RET_ERROR: + perror("get/get"); + break; + case RET_SPECIAL: + (void)printf("key not found\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +help(db, argv) + DB *db; + char **argv; +{ + int i; + + for (i = 0; commands[i].cmd; i++) + if (commands[i].descrip) + (void)printf("%s: %s\n", + commands[i].usage, commands[i].descrip); +} + +void +iafter(db, argv) + DB *db; + char **argv; +{ + DBT key, data; + int status; + + if (!recno) { + (void)fprintf(stderr, + "iafter only available for recno db's.\n"); + return; + } + key.data = argv[1]; + key.size = sizeof(recno_t); + data.data = argv[2]; + data.size = strlen(data.data); + status = (db->put)(db, &key, &data, R_IAFTER); + switch (status) { + case RET_ERROR: + perror("iafter/put"); + break; + case RET_SPECIAL: + (void)printf("%s (duplicate key)\n", argv[1]); + break; + case RET_SUCCESS: + break; + } +} + +void +ibefore(db, argv) + DB *db; + char **argv; +{ + DBT key, data; + int status; + + if (!recno) { + (void)fprintf(stderr, + "ibefore only available for recno db's.\n"); + return; + } + key.data = argv[1]; + key.size = sizeof(recno_t); + data.data = argv[2]; + data.size = strlen(data.data); + status = (db->put)(db, &key, &data, R_IBEFORE); + switch (status) { + case RET_ERROR: + perror("ibefore/put"); + break; + case RET_SPECIAL: + (void)printf("%s (duplicate key)\n", argv[1]); + break; + case RET_SUCCESS: + break; + } +} + +void +icursor(db, argv) + DB *db; + char **argv; +{ + int status; + DBT data, key; + + key.data = argv[1]; + if (recno) + key.size = sizeof(recno_t); + else + key.size = strlen(argv[1]) + 1; + data.data = argv[2]; + data.size = strlen(argv[2]) + 1; + + status = (*db->put)(db, &key, &data, R_CURSOR); + switch (status) { + case RET_ERROR: + perror("icursor/put"); + break; + case RET_SPECIAL: + (void)printf("%s (duplicate key)\n", argv[1]); + break; + case RET_SUCCESS: + break; + } +} + +void +insert(db, argv) + DB *db; + char **argv; +{ + int status; + DBT data, key; + + key.data = argv[1]; + if (recno) + key.size = sizeof(recno_t); + else + key.size = strlen(argv[1]) + 1; + data.data = argv[2]; + data.size = strlen(argv[2]) + 1; + + status = (*db->put)(db, &key, &data, R_NOOVERWRITE); + switch (status) { + case RET_ERROR: + perror("insert/put"); + break; + case RET_SPECIAL: + (void)printf("%s (duplicate key)\n", argv[1]); + break; + case RET_SUCCESS: + break; + } +} + +void +last(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + status = (*db->seq)(db, &key, &data, R_LAST); + + switch (status) { + case RET_ERROR: + perror("last/seq"); + break; + case RET_SPECIAL: + (void)printf("no more keys\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +list(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + FILE *fp; + int status; + + if ((fp = fopen(argv[1], "w")) == NULL) { + (void)fprintf(stderr, "%s: %s\n", argv[1], strerror(errno)); + return; + } + status = (*db->seq)(db, &key, &data, R_FIRST); + while (status == RET_SUCCESS) { + (void)fprintf(fp, "%s\n", key.data); + status = (*db->seq)(db, &key, &data, R_NEXT); + } + if (status == RET_ERROR) + perror("list/seq"); +} + +DB *BUGdb; +void +load(db, argv) + DB *db; + char **argv; +{ + register char *p, *t; + FILE *fp; + DBT data, key; + recno_t cnt; + size_t len; + int status; + char *lp, buf[16 * 1024]; + + BUGdb = db; + if ((fp = fopen(argv[1], "r")) == NULL) { + (void)fprintf(stderr, "%s: %s\n", argv[1], strerror(errno)); + return; + } + (void)printf("loading %s...\n", argv[1]); + + for (cnt = 1; (lp = fgetline(fp, &len)) != NULL; ++cnt) { + if (recno) { + key.data = &cnt; + key.size = sizeof(recno_t); + data.data = lp; + data.size = len + 1; + } else { + key.data = lp; + key.size = len + 1; + for (p = lp + len - 1, t = buf; p >= lp; *t++ = *p--); + *t = '\0'; + data.data = buf; + data.size = len + 1; + } + + status = (*db->put)(db, &key, &data, R_NOOVERWRITE); + switch (status) { + case RET_ERROR: + perror("load/put"); + exit(1); + case RET_SPECIAL: + if (recno) + (void)fprintf(stderr, + "duplicate: %ld {%s}\n", cnt, data.data); + else + (void)fprintf(stderr, + "duplicate: %ld {%s}\n", cnt, key.data); + exit(1); + case RET_SUCCESS: + break; + } + } + (void)fclose(fp); +} + +void +next(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + status = (*db->seq)(db, &key, &data, R_NEXT); + + switch (status) { + case RET_ERROR: + perror("next/seq"); + break; + case RET_SPECIAL: + (void)printf("no more keys\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +previous(db, argv) + DB *db; + char **argv; +{ + DBT data, key; + int status; + + status = (*db->seq)(db, &key, &data, R_PREV); + + switch (status) { + case RET_ERROR: + perror("previous/seq"); + break; + case RET_SPECIAL: + (void)printf("no more keys\n"); + break; + case RET_SUCCESS: + keydata(&key, &data); + break; + } +} + +void +show(db, argv) + DB *db; + char **argv; +{ + BTREE *t; + PAGE *h; + pgno_t pg; + + pg = atoi(argv[1]); + t = db->internal; + if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) { + (void)printf("getpage of %ld failed\n", pg); + return; + } + if (pg == 0) + __bt_dmpage(h); + else + __bt_dpage(h); + mpool_put(t->bt_mp, h, 0); +} + +void +bstat(db, argv) + DB *db; + char **argv; +{ + (void)printf("BTREE\n"); + __bt_stat(db); +} + +void +mstat(db, argv) + DB *db; + char **argv; +{ + (void)printf("MPOOL\n"); + mpool_stat(((BTREE *)db->internal)->bt_mp); +} + +void +keydata(key, data) + DBT *key, *data; +{ + if (!recno && key->size > 0) + (void)printf("%s/", key->data); + if (data->size > 0) + (void)printf("%s", data->data); + (void)printf("\n"); +} + +void +usage() +{ + (void)fprintf(stderr, + "usage: %s [-bdlu] [-c cache] [-i file] [-p page] [file]\n", + progname); + exit (1); +} diff --git a/lib/libc/db/test/dbtest.c b/lib/libc/db/test/dbtest.c new file mode 100644 index 0000000..b8f97dc --- /dev/null +++ b/lib/libc/db/test/dbtest.c @@ -0,0 +1,666 @@ +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1992, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)dbtest.c 8.8 (Berkeley) 2/21/94"; +#endif /* not lint */ + +#include <sys/param.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <db.h> + +enum S { COMMAND, COMPARE, GET, PUT, REMOVE, SEQ, SEQFLAG, KEY, DATA }; + +void compare __P((DBT *, DBT *)); +DBTYPE dbtype __P((char *)); +void dump __P((DB *, int)); +void err __P((const char *, ...)); +void get __P((DB *, DBT *)); +void getdata __P((DB *, DBT *, DBT *)); +void put __P((DB *, DBT *, DBT *)); +void rem __P((DB *, DBT *)); +void *rfile __P((char *, size_t *)); +void seq __P((DB *, DBT *)); +u_int setflags __P((char *)); +void *setinfo __P((DBTYPE, char *)); +void usage __P((void)); +void *xmalloc __P((char *, size_t)); + +DBTYPE type; +void *infop; +u_long lineno; +u_int flags; +int ofd = STDOUT_FILENO; + +DB *XXdbp; /* Global for gdb. */ + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern int optind; + extern char *optarg; + enum S command, state; + DB *dbp; + DBT data, key, keydata; + size_t len; + int ch, oflags; + char *fname, *infoarg, *p, buf[8 * 1024]; + + infoarg = NULL; + fname = NULL; + oflags = O_CREAT | O_RDWR; + while ((ch = getopt(argc, argv, "f:i:lo:")) != EOF) + switch(ch) { + case 'f': + fname = optarg; + break; + case 'i': + infoarg = optarg; + break; + case 'l': + oflags |= DB_LOCK; + break; + case 'o': + if ((ofd = open(optarg, + O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0) + err("%s: %s", optarg, strerror(errno)); + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (argc != 2) + usage(); + + /* Set the type. */ + type = dbtype(*argv++); + + /* Open the descriptor file. */ + if (freopen(*argv, "r", stdin) == NULL) + err("%s: %s", *argv, strerror(errno)); + + /* Set up the db structure as necessary. */ + if (infoarg == NULL) + infop = NULL; + else + for (p = strtok(infoarg, ",\t "); p != NULL; + p = strtok(0, ",\t ")) + if (*p != '\0') + infop = setinfo(type, p); + + /* Open the DB. */ + if (fname == NULL) { + p = getenv("TMPDIR"); + if (p == NULL) + p = "/var/tmp"; + (void)sprintf(buf, "%s/__dbtest", p); + fname = buf; + (void)unlink(buf); + } + if ((dbp = dbopen(fname, + oflags, S_IRUSR | S_IWUSR, type, infop)) == NULL) + err("dbopen: %s", strerror(errno)); + XXdbp = dbp; + + state = COMMAND; + for (lineno = 1; + (p = fgets(buf, sizeof(buf), stdin)) != NULL; ++lineno) { + len = strlen(buf); + switch(*p) { + case 'c': /* compare */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + state = KEY; + command = COMPARE; + break; + case 'e': /* echo */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + /* Don't display the newline, if CR at EOL. */ + if (p[len - 2] == '\r') + --len; + if (write(ofd, p + 1, len - 1) != len - 1) + err("write: %s", strerror(errno)); + break; + case 'g': /* get */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + state = KEY; + command = GET; + break; + case 'p': /* put */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + state = KEY; + command = PUT; + break; + case 'r': /* remove */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + state = KEY; + command = REMOVE; + break; + case 's': /* seq */ + if (state != COMMAND) + err("line %lu: not expecting command", lineno); + if (flags == R_CURSOR) { + state = KEY; + command = SEQ; + } else + seq(dbp, &key); + break; + case 'f': + flags = setflags(p + 1); + break; + case 'D': /* data file */ + if (state != DATA) + err("line %lu: not expecting data", lineno); + data.data = rfile(p + 1, &data.size); + goto ldata; + case 'd': /* data */ + if (state != DATA) + err("line %lu: not expecting data", lineno); + data.data = xmalloc(p + 1, len - 1); + data.size = len - 1; +ldata: switch(command) { + case COMPARE: + compare(&keydata, &data); + break; + case PUT: + put(dbp, &key, &data); + break; + default: + err("line %lu: command doesn't take data", + lineno); + } + if (type != DB_RECNO) + free(key.data); + free(data.data); + state = COMMAND; + break; + case 'K': /* key file */ + if (state != KEY) + err("line %lu: not expecting a key", lineno); + if (type == DB_RECNO) + err("line %lu: 'K' not available for recno", + lineno); + key.data = rfile(p + 1, &key.size); + goto lkey; + case 'k': /* key */ + if (state != KEY) + err("line %lu: not expecting a key", lineno); + if (type == DB_RECNO) { + static recno_t recno; + recno = atoi(p + 1); + key.data = &recno; + key.size = sizeof(recno); + } else { + key.data = xmalloc(p + 1, len - 1); + key.size = len - 1; + } +lkey: switch(command) { + case COMPARE: + getdata(dbp, &key, &keydata); + state = DATA; + break; + case GET: + get(dbp, &key); + if (type != DB_RECNO) + free(key.data); + state = COMMAND; + break; + case PUT: + state = DATA; + break; + case REMOVE: + rem(dbp, &key); + if (type != DB_RECNO) + free(key.data); + state = COMMAND; + break; + case SEQ: + seq(dbp, &key); + if (type != DB_RECNO) + free(key.data); + state = COMMAND; + break; + default: + err("line %lu: command doesn't take a key", + lineno); + } + break; + case 'o': + dump(dbp, p[1] == 'r'); + break; + default: + err("line %lu: %s: unknown command character", + p, lineno); + } + } +#ifdef STATISTICS + if (type == DB_BTREE) + __bt_stat(dbp); +#endif + if (dbp->close(dbp)) + err("db->close: %s", strerror(errno)); + (void)close(ofd); + exit(0); +} + +#define NOOVERWRITE "put failed, would overwrite key\n" +#define NOSUCHKEY "get failed, no such key\n" + +void +compare(db1, db2) + DBT *db1, *db2; +{ + register size_t len; + register u_char *p1, *p2; + + if (db1->size != db2->size) + printf("compare failed: key->data len %lu != data len %lu\n", + db1->size, db2->size); + + len = MIN(db1->size, db2->size); + for (p1 = db1->data, p2 = db2->data; len--;) + if (*p1++ != *p2++) { + printf("compare failed at offset %d\n", + p1 - (u_char *)db1->data); + break; + } +} + +void +get(dbp, kp) + DB *dbp; + DBT *kp; +{ + DBT data; + + switch(dbp->get(dbp, kp, &data, flags)) { + case 0: + (void)write(ofd, data.data, data.size); + break; + case -1: + err("line %lu: get: %s", lineno, strerror(errno)); + /* NOTREACHED */ + case 1: + (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); + (void)fprintf(stderr, "%d: %.*s: %s\n", + lineno, kp->size, kp->data, NOSUCHKEY); + break; + } +} + +void +getdata(dbp, kp, dp) + DB *dbp; + DBT *kp, *dp; +{ + switch(dbp->get(dbp, kp, dp, flags)) { + case 0: + return; + case -1: + err("line %lu: getdata: %s", lineno, strerror(errno)); + /* NOTREACHED */ + case 1: + err("line %lu: get failed, no such key", lineno); + /* NOTREACHED */ + } +} + +void +put(dbp, kp, dp) + DB *dbp; + DBT *kp, *dp; +{ + switch(dbp->put(dbp, kp, dp, flags)) { + case 0: + break; + case -1: + err("line %lu: put: %s", lineno, strerror(errno)); + /* NOTREACHED */ + case 1: + (void)write(ofd, NOOVERWRITE, sizeof(NOOVERWRITE) - 1); + break; + } +} + +void +rem(dbp, kp) + DB *dbp; + DBT *kp; +{ + switch(dbp->del(dbp, kp, flags)) { + case 0: + break; + case -1: + err("line %lu: get: %s", lineno, strerror(errno)); + /* NOTREACHED */ + case 1: + (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); + break; + } +} + +void +seq(dbp, kp) + DB *dbp; + DBT *kp; +{ + DBT data; + + switch(dbp->seq(dbp, kp, &data, flags)) { + case 0: + (void)write(ofd, data.data, data.size); + break; + case -1: + err("line %lu: seq: %s", lineno, strerror(errno)); + /* NOTREACHED */ + case 1: + (void)write(ofd, NOSUCHKEY, sizeof(NOSUCHKEY) - 1); + break; + } +} + +void +dump(dbp, rev) + DB *dbp; + int rev; +{ + DBT key, data; + int flags, nflags; + + if (rev) { + flags = R_LAST; + nflags = R_PREV; + } else { + flags = R_FIRST; + nflags = R_NEXT; + } + for (;; flags = nflags) + switch(dbp->seq(dbp, &key, &data, flags)) { + case 0: + (void)write(ofd, data.data, data.size); + break; + case 1: + goto done; + case -1: + err("line %lu: (dump) seq: %s", + lineno, strerror(errno)); + /* NOTREACHED */ + } +done: return; +} + +u_int +setflags(s) + char *s; +{ + char *p, *index(); + + for (; isspace(*s); ++s); + if (*s == '\n') + return (0); + if ((p = index(s, '\n')) != NULL) + *p = '\0'; + if (!strcmp(s, "R_CURSOR")) + return (R_CURSOR); + if (!strcmp(s, "R_FIRST")) + return (R_FIRST); + if (!strcmp(s, "R_IAFTER")) + return (R_IAFTER); + if (!strcmp(s, "R_IBEFORE")) + return (R_IBEFORE); + if (!strcmp(s, "R_LAST")) + return (R_LAST); + if (!strcmp(s, "R_NEXT")) + return (R_NEXT); + if (!strcmp(s, "R_NOOVERWRITE")) + return (R_NOOVERWRITE); + if (!strcmp(s, "R_PREV")) + return (R_PREV); + if (!strcmp(s, "R_SETCURSOR")) + return (R_SETCURSOR); + err("line %lu: %s: unknown flag", lineno, s); + /* NOTREACHED */ +} + +DBTYPE +dbtype(s) + char *s; +{ + if (!strcmp(s, "btree")) + return (DB_BTREE); + if (!strcmp(s, "hash")) + return (DB_HASH); + if (!strcmp(s, "recno")) + return (DB_RECNO); + err("%s: unknown type (use btree, hash or recno)", s); + /* NOTREACHED */ +} + +void * +setinfo(type, s) + DBTYPE type; + char *s; +{ + static BTREEINFO ib; + static HASHINFO ih; + static RECNOINFO rh; + char *eq, *index(); + + if ((eq = index(s, '=')) == NULL) + err("%s: illegal structure set statement", s); + *eq++ = '\0'; + if (!isdigit(*eq)) + err("%s: structure set statement must be a number", s); + + switch(type) { + case DB_BTREE: + if (!strcmp("flags", s)) { + ib.flags = atoi(eq); + return (&ib); + } + if (!strcmp("cachesize", s)) { + ib.cachesize = atoi(eq); + return (&ib); + } + if (!strcmp("maxkeypage", s)) { + ib.maxkeypage = atoi(eq); + return (&ib); + } + if (!strcmp("minkeypage", s)) { + ib.minkeypage = atoi(eq); + return (&ib); + } + if (!strcmp("lorder", s)) { + ib.lorder = atoi(eq); + return (&ib); + } + if (!strcmp("psize", s)) { + ib.psize = atoi(eq); + return (&ib); + } + break; + case DB_HASH: + if (!strcmp("bsize", s)) { + ih.bsize = atoi(eq); + return (&ih); + } + if (!strcmp("ffactor", s)) { + ih.ffactor = atoi(eq); + return (&ih); + } + if (!strcmp("nelem", s)) { + ih.nelem = atoi(eq); + return (&ih); + } + if (!strcmp("cachesize", s)) { + ih.cachesize = atoi(eq); + return (&ih); + } + if (!strcmp("lorder", s)) { + ih.lorder = atoi(eq); + return (&ih); + } + break; + case DB_RECNO: + if (!strcmp("flags", s)) { + rh.flags = atoi(eq); + return (&rh); + } + if (!strcmp("cachesize", s)) { + rh.cachesize = atoi(eq); + return (&rh); + } + if (!strcmp("lorder", s)) { + rh.lorder = atoi(eq); + return (&rh); + } + if (!strcmp("reclen", s)) { + rh.reclen = atoi(eq); + return (&rh); + } + if (!strcmp("bval", s)) { + rh.bval = atoi(eq); + return (&rh); + } + if (!strcmp("psize", s)) { + rh.psize = atoi(eq); + return (&rh); + } + break; + } + err("%s: unknown structure value", s); + /* NOTREACHED */ +} + +void * +rfile(name, lenp) + char *name; + size_t *lenp; +{ + struct stat sb; + void *p; + int fd; + char *np, *index(); + + for (; isspace(*name); ++name); + if ((np = index(name, '\n')) != NULL) + *np = '\0'; + if ((fd = open(name, O_RDONLY, 0)) < 0 || + fstat(fd, &sb)) + err("%s: %s\n", name, strerror(errno)); +#ifdef NOT_PORTABLE + if (sb.st_size > (off_t)SIZE_T_MAX) + err("%s: %s\n", name, strerror(E2BIG)); +#endif + if ((p = (void *)malloc((u_int)sb.st_size)) == NULL) + err("%s", strerror(errno)); + (void)read(fd, p, (int)sb.st_size); + *lenp = sb.st_size; + (void)close(fd); + return (p); +} + +void * +xmalloc(text, len) + char *text; + size_t len; +{ + void *p; + + if ((p = (void *)malloc(len)) == NULL) + err("%s", strerror(errno)); + memmove(p, text, len); + return (p); +} + +void +usage() +{ + (void)fprintf(stderr, + "usage: dbtest [-l] [-f file] [-i info] [-o file] type script\n"); + exit(1); +} + +#if __STDC__ +#include <stdarg.h> +#else +#include <varargs.h> +#endif + +void +#if __STDC__ +err(const char *fmt, ...) +#else +err(fmt, va_alist) + char *fmt; + va_dcl +#endif +{ + va_list ap; +#if __STDC__ + va_start(ap, fmt); +#else + va_start(ap); +#endif + (void)fprintf(stderr, "dbtest: "); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + (void)fprintf(stderr, "\n"); + exit(1); + /* NOTREACHED */ +} diff --git a/lib/libc/db/test/hash.tests/driver2.c b/lib/libc/db/test/hash.tests/driver2.c new file mode 100644 index 0000000..2008a28 --- /dev/null +++ b/lib/libc/db/test/hash.tests/driver2.c @@ -0,0 +1,114 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)driver2.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +/* + * Test driver, to try to tackle the large ugly-split problem. + */ + +#include <sys/file.h> +#include <stdio.h> +#include "ndbm.h" + +int my_hash(key, len) + char *key; + int len; +{ + return(17); /* So I'm cruel... */ +} + +main(argc, argv) + int argc; +{ + DB *db; + DBT key, content; + char keybuf[2049]; + char contentbuf[2049]; + char buf[256]; + int i; + HASHINFO info; + + info.bsize = 1024; + info.ffactor = 5; + info.nelem = 1; + info.cachesize = NULL; +#ifdef HASH_ID_PROGRAM_SPECIFIED + info.hash_id = HASH_ID_PROGRAM_SPECIFIED; + info.hash_func = my_hash; +#else + info.hash = my_hash; +#endif + info.lorder = 0; + if (!(db = dbopen("bigtest", O_RDWR | O_CREAT, 0644, DB_HASH, &info))) { + sprintf(buf, "dbopen: failed on file bigtest"); + perror(buf); + exit(1); + } + srandom(17); + key.data = keybuf; + content.data = contentbuf; + bzero(keybuf, sizeof(keybuf)); + bzero(contentbuf, sizeof(contentbuf)); + for (i=1; i <= 500; i++) { + key.size = 128 + (random()&1023); + content.size = 128 + (random()&1023); +/* printf("%d: Key size %d, data size %d\n", i, key.size, + content.size); */ + sprintf(keybuf, "Key #%d", i); + sprintf(contentbuf, "Contents #%d", i); + if ((db->put)(db, &key, &content, R_NOOVERWRITE)) { + sprintf(buf, "dbm_store #%d", i); + perror(buf); + } + } + if ((db->close)(db)) { + perror("closing hash file"); + exit(1); + } + exit(0); +} + + + diff --git a/lib/libc/db/test/hash.tests/makedb.sh b/lib/libc/db/test/hash.tests/makedb.sh new file mode 100644 index 0000000..f28e281 --- /dev/null +++ b/lib/libc/db/test/hash.tests/makedb.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# +# @(#)makedb.sh 8.1 (Berkeley) 6/4/93 + +awk '{i++; print $0; print i;}' /usr/share/dict/words > WORDS +ls /bin /usr/bin /usr/ucb /etc | egrep '^(...|....|.....|......)$' | \ +sort | uniq | \ +awk '{ + printf "%s\n", $0 + for (i = 0; i < 1000; i++) + printf "%s+", $0 + printf "\n" +}' > LONG.DATA diff --git a/lib/libc/db/test/hash.tests/tcreat3.c b/lib/libc/db/test/hash.tests/tcreat3.c new file mode 100644 index 0000000..bd125ac --- /dev/null +++ b/lib/libc/db/test/hash.tests/tcreat3.c @@ -0,0 +1,105 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)tcreat3.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include <stdio.h> +#include <db.h> + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + +char wp1[8192]; +char wp2[8192]; +main(argc, argv) +char **argv; +{ + DBT item, key; + DB *dbp; + HASHINFO ctl; + FILE *fp; + int trash; + + int i = 0; + + argv++; + ctl.hash = NULL; + ctl.bsize = atoi(*argv++); + ctl.ffactor = atoi(*argv++); + ctl.nelem = atoi(*argv++); + ctl.lorder = 0; + if (!(dbp = dbopen( "hashtest", + O_CREAT|O_TRUNC|O_RDWR, 0600, DB_HASH, &ctl))){ + /* create table */ + fprintf(stderr, "cannot create: hash table (size %d)\n", + INITIAL); + exit(1); + } + + key.data = wp1; + item.data = wp2; + while ( fgets(wp1, 8192, stdin) && + fgets(wp2, 8192, stdin) && + i++ < MAXWORDS) { +/* +* put info in structure, and structure in the item +*/ + key.size = strlen(wp1); + item.size = strlen(wp2); + +/* + * enter key/data pair into the table + */ + if ((dbp->put)(dbp, &key, &item, R_NOOVERWRITE) != NULL) { + fprintf(stderr, "cannot enter: key %s\n", + item.data); + exit(1); + } + } + + (dbp->close)(dbp); + exit(0); +} diff --git a/lib/libc/db/test/hash.tests/tdel.c b/lib/libc/db/test/hash.tests/tdel.c new file mode 100644 index 0000000..ed3f90a --- /dev/null +++ b/lib/libc/db/test/hash.tests/tdel.c @@ -0,0 +1,122 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)tdel.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include <db.h> +#include <stdio.h> + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + +/* Usage: thash pagesize fillfactor file */ +char wp1[8192]; +char wp2[8192]; +main(argc, argv) +char **argv; +{ + DBT item, key; + DB *dbp; + HASHINFO ctl; + FILE *fp; + int stat; + + int i = 0; + + argv++; + ctl.nelem = INITIAL; + ctl.hash = NULL; + ctl.bsize = atoi(*argv++); + ctl.ffactor = atoi(*argv++); + ctl.cachesize = 1024 * 1024; /* 1 MEG */ + ctl.lorder = 0; + argc -= 2; + if (!(dbp = dbopen( NULL, O_CREAT|O_RDWR, 0400, DB_HASH, &ctl))) { + /* create table */ + fprintf(stderr, "cannot create: hash table size %d)\n", + INITIAL); + exit(1); + } + + key.data = wp1; + item.data = wp2; + while ( fgets(wp1, 8192, stdin) && + fgets(wp2, 8192, stdin) && + i++ < MAXWORDS) { +/* +* put info in structure, and structure in the item +*/ + key.size = strlen(wp1); + item.size = strlen(wp2); + +/* + * enter key/data pair into the table + */ + if ((dbp->put)(dbp, &key, &item, R_NOOVERWRITE) != NULL) { + fprintf(stderr, "cannot enter: key %s\n", + item.data); + exit(1); + } + } + + if ( --argc ) { + fp = fopen ( argv[0], "r"); + i = 0; + while ( fgets(wp1, 8192, fp) && + fgets(wp2, 8192, fp) && + i++ < MAXWORDS) { + key.size = strlen(wp1); + stat = (dbp->del)(dbp, &key, 0); + if (stat) { + fprintf ( stderr, "Error retrieving %s\n", key.data ); + exit(1); + } + } + fclose(fp); + } + (dbp->close)(dbp); + exit(0); +} diff --git a/lib/libc/db/test/hash.tests/testit b/lib/libc/db/test/hash.tests/testit new file mode 100644 index 0000000..039457a --- /dev/null +++ b/lib/libc/db/test/hash.tests/testit @@ -0,0 +1,147 @@ +#!/bin/csh -f +# +# @(#)testit 8.1 (Berkeley) 6/4/93 +# + +echo "" +echo "PAGE FILL " +set name=WORDS + set i = 256 + foreach j ( 11 14 21 ) + thash4 $i $j 25000 65536 $name < $name + end + set i = 512 + foreach j ( 21 28 43 ) + thash4 $i $j 25000 65536 $name < $name + end + set i = 1024 + foreach j ( 43 57 85 ) + thash4 $i $j 25000 65536 $name < $name + end + set i = 2048 + foreach j ( 85 114 171 ) + thash4 $i $j 25000 65536 $name < $name + end + set i = 4096 + foreach j ( 171 228 341 ) + thash4 $i $j 25000 65536 $name < $name + end + set i = 8192 + foreach j ( 341 455 683 ) + thash4 $i $j 25000 65536 $name < $name + end + echo "PAGE FILL " + set i = 256 + foreach j ( 11 14 21 ) + echo "$i"_"$j" + tcreat3 $i $j 25000 $name < $name + tread2 65536 < $name + tverify $name < $name + tseq > /dev/null + tdel $i $j $name < $name + end + set i = 512 + foreach j ( 21 28 43 ) + echo "$i"_"$j" + tcreat3 $i $j 25000 $name < $name + tread2 65536 < $name + tverify $name < $name + tseq > /dev/null + tdel $i $j $name < $name + end + set i = 1024 + foreach j ( 43 57 85 ) + echo "$i"_"$j" + tcreat3 $i $j 25000 $name < $name + tread2 65536 < $name + tverify $name < $name + tseq > /dev/null + tdel $i $j $name < $name + end + set i = 2048 + foreach j ( 85 114 171 ) + echo "$i"_"$j" + tcreat3 $i $j 25000 $name < $name + tread2 65536 < $name + tverify $name < $name + tseq > /dev/null + tdel $i $j $name < $name + end + set i = 4096 + foreach j ( 171 228 341 ) + echo "$i"_"$j" + tcreat3 $i $j 25000 $name < $name + tread2 65536 < $name + tverify $name < $name + tseq > /dev/null + tdel $i $j $name < $name + end + set i = 8192 + foreach j ( 341 455 683 ) + echo "$i"_"$j" + tcreat3 $i $j 25000 $name < $name + tread2 65536 < $name + tverify $name < $name + tseq > /dev/null + tdel $i $j $name < $name + end +set name=LONG.DATA + set i = 1024 + foreach j ( 1 2 4 ) + echo thash4 $i $j 600 65536 $name + thash4 $i $j 600 65536 $name < $name + end + + set i = 2048 + foreach j ( 1 2 4 ) + echo thash4 $i $j 600 65536 $name + thash4 $i $j 600 65536 $name < $name + end + set i = 4096 + foreach j ( 1 2 4 ) + echo thash4 $i $j 600 65536 $name + thash4 $i $j 600 65536 $name < $name + end + set i = 8192 + foreach j ( 2 4 8 ) + echo thash4 $i $j 600 65536 $name + thash4 $i $j 600 65536 $name < $name + end + echo "PAGE FILL " + set i = 1024 + foreach j ( 1 2 4 ) + echo "$i"_"$j" + tcreat3 $i $j 600 $name < $name + tread2 65536 < $name + tverify $name < $name + tseq > /dev/null + tdel $i $j $name < $name + end + set i = 2048 + foreach j ( 1 2 4 ) + echo "$i"_"$j" + tcreat3 $i $j 600 $name < $name + tread2 65536 < $name + tverify $name < $name + tseq > /dev/null + tdel $i $j $name < $name + end + set i = 4096 + foreach j ( 1 2 4 ) + echo "$i"_"$j" + tcreat3 $i $j 600 $name < $name + tread2 65536 < $name + tverify $name < $name + tseq > /dev/null + tdel $i $j $name < $name + end + set i = 8192 + foreach j ( 2 4 8 ) + echo "$i"_"$j" + tcreat3 $i $j 600 $name < $name + tread2 65536 < $name + tverify $name < $name + tseq > /dev/null + tdel $i $j $name < $name + end +driver2 diff --git a/lib/libc/db/test/hash.tests/thash4.c b/lib/libc/db/test/hash.tests/thash4.c new file mode 100644 index 0000000..9e344cb --- /dev/null +++ b/lib/libc/db/test/hash.tests/thash4.c @@ -0,0 +1,132 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)thash4.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include <sys/timeb.h> +#include <stdio.h> +#include <errno.h> +#include <db.h> + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + +/* Usage: thash pagesize fillfactor file */ +char wp1[8192]; +char wp2[8192]; +main(argc, argv) +char **argv; +{ + DBT item, key, res; + DB *dbp; + HASHINFO ctl; + FILE *fp; + int stat; + time_t t; + + int i = 0; + + argv++; + ctl.hash = NULL; + ctl.bsize = atoi(*argv++); + ctl.ffactor = atoi(*argv++); + ctl.nelem = atoi(*argv++); + ctl.cachesize = atoi(*argv++); + ctl.lorder = 0; + if (!(dbp = dbopen( NULL, O_CREAT|O_RDWR, 0400, DB_HASH, &ctl))) { + /* create table */ + fprintf(stderr, "cannot create: hash table size %d)\n", + INITIAL); + fprintf(stderr, "\terrno: %d\n", errno); + exit(1); + } + + key.data = wp1; + item.data = wp2; + while ( fgets(wp1, 8192, stdin) && + fgets(wp2, 8192, stdin) && + i++ < MAXWORDS) { +/* +* put info in structure, and structure in the item +*/ + key.size = strlen(wp1); + item.size = strlen(wp2); + +/* + * enter key/data pair into the table + */ + if ((dbp->put)(dbp, &key, &item, R_NOOVERWRITE) != NULL) { + fprintf(stderr, "cannot enter: key %s\n", + item.data); + fprintf(stderr, "\terrno: %d\n", errno); + exit(1); + } + } + + if ( --argc ) { + fp = fopen ( argv[0], "r"); + i = 0; + while ( fgets(wp1, 256, fp) && + fgets(wp2, 8192, fp) && + i++ < MAXWORDS) { + + key.size = strlen(wp1); + stat = (dbp->get)(dbp, &key, &res, 0); + if (stat < 0 ) { + fprintf ( stderr, "Error retrieving %s\n", key.data ); + fprintf(stderr, "\terrno: %d\n", errno); + exit(1); + } else if ( stat > 0 ) { + fprintf ( stderr, "%s not found\n", key.data ); + fprintf(stderr, "\terrno: %d\n", errno); + exit(1); + } + } + fclose(fp); + } + dbp->close(dbp); + exit(0); +} diff --git a/lib/libc/db/test/hash.tests/tread2.c b/lib/libc/db/test/hash.tests/tread2.c new file mode 100644 index 0000000..8f01556 --- /dev/null +++ b/lib/libc/db/test/hash.tests/tread2.c @@ -0,0 +1,105 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)tread2.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include <stdio.h> +#include <db.h> + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + +typedef struct { /* info to be stored */ + int num, siz; +} info; + +char wp1[8192]; +char wp2[8192]; +main(argc, argv) +char **argv; +{ + DBT item, key, res; + DB *dbp; + HASHINFO ctl; + int stat; + + int i = 0; + + ctl.nelem = INITIAL; + ctl.hash = NULL; + ctl.bsize = 64; + ctl.ffactor = 1; + ctl.cachesize = atoi(*argv++); + ctl.lorder = 0; + if (!(dbp = dbopen( "hashtest", O_RDONLY, 0400, DB_HASH, &ctl))) { + /* create table */ + fprintf(stderr, "cannot open: hash table\n" ); + exit(1); + } + + key.data = wp1; + item.data = wp2; + while ( fgets(wp1, 8192, stdin) && + fgets(wp2, 8192, stdin) && + i++ < MAXWORDS) { +/* +* put info in structure, and structure in the item +*/ + key.size = strlen(wp1); + item.size = strlen(wp2); + + stat = (dbp->get)(dbp, &key, &res,0); + if (stat < 0) { + fprintf ( stderr, "Error retrieving %s\n", key.data ); + exit(1); + } else if ( stat > 0 ) { + fprintf ( stderr, "%s not found\n", key.data ); + exit(1); + } + } + (dbp->close)(dbp); + exit(0); +} diff --git a/lib/libc/db/test/hash.tests/tseq.c b/lib/libc/db/test/hash.tests/tseq.c new file mode 100644 index 0000000..f45700e --- /dev/null +++ b/lib/libc/db/test/hash.tests/tseq.c @@ -0,0 +1,88 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)tseq.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include <stdio.h> +#include <db.h> + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + + +char wp[8192]; +char cp[8192]; +main(argc, argv) +char **argv; +{ + DBT item, key, res; + DB *dbp; + FILE *fp; + int stat; + + if (!(dbp = dbopen( "hashtest", O_RDONLY, 0400, DB_HASH, NULL))) { + /* create table */ + fprintf(stderr, "cannot open: hash table\n" ); + exit(1); + } + +/* +* put info in structure, and structure in the item +*/ + for ( stat = (dbp->seq) (dbp, &res, &item, 1 ); + stat == 0; + stat = (dbp->seq) (dbp, &res, &item, 0 ) ) { + + bcopy ( res.data, wp, res.size ); + wp[res.size] = 0; + bcopy ( item.data, cp, item.size ); + cp[item.size] = 0; + + printf ( "%s %s\n", wp, cp ); + } + (dbp->close)(dbp); + exit(0); +} diff --git a/lib/libc/db/test/hash.tests/tverify.c b/lib/libc/db/test/hash.tests/tverify.c new file mode 100644 index 0000000..ac5d2f9 --- /dev/null +++ b/lib/libc/db/test/hash.tests/tverify.c @@ -0,0 +1,107 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Margo Seltzer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)tverify.c 8.1 (Berkeley) 6/4/93"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/file.h> +#include <stdio.h> +#include <db.h> + +#define INITIAL 25000 +#define MAXWORDS 25000 /* # of elements in search table */ + +typedef struct { /* info to be stored */ + int num, siz; +} info; + +char wp1[8192]; +char wp2[8192]; +main(argc, argv) +char **argv; +{ + DBT key, res; + DB *dbp; + HASHINFO ctl; + int trash; + int stat; + + int i = 0; + + ctl.nelem = INITIAL; + ctl.hash = NULL; + ctl.bsize = 64; + ctl.ffactor = 1; + ctl.cachesize = 1024 * 1024; /* 1 MEG */ + ctl.lorder = 0; + if (!(dbp = dbopen( "hashtest", O_RDONLY, 0400, DB_HASH, &ctl))) { + /* create table */ + fprintf(stderr, "cannot open: hash table\n" ); + exit(1); + } + + key.data = wp1; + while ( fgets(wp1, 8192, stdin) && + fgets(wp2, 8192, stdin) && + i++ < MAXWORDS) { +/* +* put info in structure, and structure in the item +*/ + key.size = strlen(wp1); + + stat = (dbp->get)(dbp, &key, &res,0); + if (stat < 0) { + fprintf ( stderr, "Error retrieving %s\n", key.data ); + exit(1); + } else if ( stat > 0 ) { + fprintf ( stderr, "%s not found\n", key.data ); + exit(1); + } + if ( memcmp ( res.data, wp2, res.size ) ) { + fprintf ( stderr, "data for %s is incorrect. Data was %s. Should have been %s\n", key.data, res.data, wp2 ); + } + } + (dbp->close)(dbp); + exit(0); +} diff --git a/lib/libc/db/test/run.test b/lib/libc/db/test/run.test new file mode 100644 index 0000000..5eeaf74 --- /dev/null +++ b/lib/libc/db/test/run.test @@ -0,0 +1,699 @@ +#!/bin/sh - +# +# @(#)run.test 8.7 (Berkeley) 9/16/93 +# + +# db regression tests +main() +{ + +DICT=/usr/share/dict/words +#DICT=/usr/dict/words +PROG=./dbtest +TMP1=t1 +TMP2=t2 +TMP3=t3 + + if [ $# -eq 0 ]; then + for t in 1 2 3 4 5 6 7 8 9 10 11 12 13 20; do + test$t + done + else + while [ $# -gt 0 ] + do case "$1" in + test*) + $1;; + [0-9]*) + test$1;; + btree) + for t in 1 2 3 7 8 9 10 12 13; do + test$t + done;; + hash) + for t in 1 2 3 8 13 20; do + test$t + done;; + recno) + for t in 1 2 3 4 5 6 7 10 11; do + test$t + done;; + *) + echo "run.test: unknown test $1" + echo "usage: run.test test# | type" + exit 1 + esac + shift + done + fi + rm -f $TMP1 $TMP2 $TMP3 + exit 0 +} + +# Take the first hundred entries in the dictionary, and make them +# be key/data pairs. +test1() +{ + echo "Test 1: btree, hash: small key, small data pairs" + sed 200q $DICT > $TMP1 + for type in btree hash; do + rm -f $TMP2 $TMP3 + for i in `sed 200q $DICT`; do + echo p + echo k$i + echo d$i + echo g + echo k$i + done > $TMP2 + $PROG -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test1: type $type: failed" + exit 1 + fi + done + echo "Test 1: recno: small key, small data pairs" + rm -f $TMP2 $TMP3 + sed 200q $DICT | + awk '{ + ++i; + printf("p\nk%d\nd%s\ng\nk%d\n", i, $0, i); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test1: type recno: failed" + exit 1 + fi +} + +# Take the first 200 entries in the dictionary, and give them +# each a medium size data entry. +test2() +{ + echo "Test 2: btree, hash: small key, medium data pairs" + mdata=abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz + echo $mdata | + awk '{ for (i = 1; i < 201; ++i) print $0 }' > $TMP1 + for type in hash btree; do + rm -f $TMP2 $TMP3 + for i in `sed 200q $DICT`; do + echo p + echo k$i + echo d$mdata + echo g + echo k$i + done > $TMP2 + $PROG -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test2: type $type: failed" + exit 1 + fi + done + echo "Test 2: recno: small key, medium data pairs" + rm -f $TMP2 $TMP3 + echo $mdata | + awk '{ for (i = 1; i < 201; ++i) + printf("p\nk%d\nd%s\ng\nk%d\n", i, $0, i); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test2: type recno: failed" + exit 1 + fi +} + +# Insert the programs in /bin with their paths as their keys. +test3() +{ + echo "Test 3: hash: small key, big data pairs" + rm -f $TMP1 + (find /bin -type f -print | xargs cat) > $TMP1 + for type in hash; do + rm -f $TMP2 $TMP3 + for i in `find /bin -type f -print`; do + echo p + echo k$i + echo D$i + echo g + echo k$i + done > $TMP2 + $PROG -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test3: $type: failed" + exit 1 + fi + done + echo "Test 3: btree: small key, big data pairs" + for psize in 512 16384 65536; do + echo " page size $psize" + for type in btree; do + rm -f $TMP2 $TMP3 + for i in `find /bin -type f -print`; do + echo p + echo k$i + echo D$i + echo g + echo k$i + done > $TMP2 + $PROG -i psize=$psize -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test3: $type: page size $psize: failed" + exit 1 + fi + done + done + echo "Test 3: recno: big data pairs" + rm -f $TMP2 $TMP3 + find /bin -type f -print | + awk '{ + ++i; + printf("p\nk%d\nD%s\ng\nk%d\n", i, $0, i); + }' > $TMP2 + for psize in 512 16384 65536; do + echo " page size $psize" + $PROG -i psize=$psize -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test3: recno: page size $psize: failed" + exit 1 + fi + done +} + +# Do random recno entries. +test4() +{ + echo "Test 4: recno: random entries" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 37; i <= 37 + 88 * 17; i += 17) { + if (i % 41) + s = substr($0, 1, i % 41); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + for (i = 1; i <= 15; ++i) { + if (i % 41) + s = substr($0, 1, i % 41); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + for (i = 19234; i <= 19234 + 61 * 27; i += 27) { + if (i % 41) + s = substr($0, 1, i % 41); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + exit + }' > $TMP1 + rm -f $TMP2 $TMP3 + cat $TMP1 | + awk 'BEGIN { + i = 37; + incr = 17; + } + { + printf("p\nk%d\nd%s\n", i, $0); + if (i == 19234 + 61 * 27) + exit; + if (i == 37 + 88 * 17) { + i = 1; + incr = 1; + } else if (i == 15) { + i = 19234; + incr = 27; + } else + i += incr; + } + END { + for (i = 37; i <= 37 + 88 * 17; i += 17) + printf("g\nk%d\n", i); + for (i = 1; i <= 15; ++i) + printf("g\nk%d\n", i); + for (i = 19234; i <= 19234 + 61 * 27; i += 27) + printf("g\nk%d\n", i); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test4: type recno: failed" + exit 1 + fi +} + +# Do reverse order recno entries. +test5() +{ + echo "Test 5: recno: reverse order entries" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk ' { + for (i = 1500; i; --i) { + if (i % 34) + s = substr($0, 1, i % 34); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + cat $TMP1 | + awk 'BEGIN { + i = 1500; + } + { + printf("p\nk%d\nd%s\n", i, $0); + --i; + } + END { + for (i = 1500; i; --i) + printf("g\nk%d\n", i); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test5: type recno: failed" + exit 1 + fi +} + +# Do alternating order recno entries. +test6() +{ + echo "Test 6: recno: alternating order entries" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk ' { + for (i = 1; i < 1200; i += 2) { + if (i % 34) + s = substr($0, 1, i % 34); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + for (i = 2; i < 1200; i += 2) { + if (i % 34) + s = substr($0, 1, i % 34); + else + s = substr($0, 1); + printf("input key %d: %s\n", i, s); + } + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + cat $TMP1 | + awk 'BEGIN { + i = 1; + even = 0; + } + { + printf("p\nk%d\nd%s\n", i, $0); + i += 2; + if (i >= 1200) { + if (even == 1) + exit; + even = 1; + i = 2; + } + } + END { + for (i = 1; i < 1200; ++i) + printf("g\nk%d\n", i); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + sort -o $TMP1 $TMP1 + sort -o $TMP3 $TMP3 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test6: type recno: failed" + exit 1 + fi +} + +# Delete cursor record +test7() +{ + echo "Test 7: btree, recno: delete cursor record" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 1; i <= 120; ++i) + printf("%05d: input key %d: %s\n", i, i, $0); + printf("%05d: input key %d: %s\n", 120, 120, $0); + printf("get failed, no such key\n"); + printf("%05d: input key %d: %s\n", 1, 1, $0); + printf("%05d: input key %d: %s\n", 2, 2, $0); + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + + for type in btree recno; do + cat $TMP1 | + awk '{ + if (i == 120) + exit; + printf("p\nk%d\nd%s\n", ++i, $0); + } + END { + printf("fR_NEXT\n"); + for (i = 1; i <= 120; ++i) + printf("s\n"); + printf("fR_CURSOR\ns\nk120\n"); + printf("r\nk120\n"); + printf("fR_NEXT\ns\n"); + printf("fR_CURSOR\ns\nk1\n"); + printf("r\nk1\n"); + printf("fR_FIRST\ns\n"); + }' > $TMP2 + $PROG -o $TMP3 recno $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test7: type $type: failed" + exit 1 + fi + done +} + +# Make sure that overflow pages are reused. +test8() +{ + echo "Test 8: btree, hash: repeated small key, big data pairs" + rm -f $TMP1 + echo "" | + awk 'BEGIN { + for (i = 1; i <= 10; ++i) { + printf("p\nkkey1\nD/bin/sh\n"); + printf("p\nkkey2\nD/bin/csh\n"); + if (i % 8 == 0) { + printf("c\nkkey2\nD/bin/csh\n"); + printf("c\nkkey1\nD/bin/sh\n"); + printf("e\t%d of 10 (comparison)\r\n", i); + } else + printf("e\t%d of 10 \r\n", i); + printf("r\nkkey1\nr\nkkey2\n"); + } + printf("e\n"); + printf("eend of test8 run\n"); + }' > $TMP1 + $PROG btree $TMP1 +# $PROG hash $TMP1 + # No explicit test for success. +} + +# Test btree duplicate keys +test9() +{ + echo "Test 9: btree: duplicate keys" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 1; i <= 543; ++i) + printf("%05d: input key %d: %s\n", i, i, $0); + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + + for type in btree; do + cat $TMP1 | + awk '{ + if (i++ % 2) + printf("p\nkduplicatekey\nd%s\n", $0); + else + printf("p\nkunique%dkey\nd%s\n", i, $0); + } + END { + printf("o\n"); + }' > $TMP2 + $PROG -iflags=1 -o $TMP3 $type $TMP2 + sort -o $TMP3 $TMP3 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test9: type $type: failed" + exit 1 + fi + done +} + +# Test use of cursor flags without initialization +test10() +{ + echo "Test 10: btree, recno: test cursor flag use" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 1; i <= 20; ++i) + printf("%05d: input key %d: %s\n", i, i, $0); + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + + # Test that R_CURSOR doesn't succeed before cursor initialized + for type in btree recno; do + cat $TMP1 | + awk '{ + if (i == 10) + exit; + printf("p\nk%d\nd%s\n", ++i, $0); + } + END { + printf("fR_CURSOR\nr\nk1\n"); + printf("eR_CURSOR SHOULD HAVE FAILED\n"); + }' > $TMP2 + $PROG -o $TMP3 $type $TMP2 > /dev/null 2>&1 + if [ -s $TMP3 ] ; then + echo "Test 10: delete: R_CURSOR SHOULD HAVE FAILED" + exit 1 + fi + done + for type in btree recno; do + cat $TMP1 | + awk '{ + if (i == 10) + exit; + printf("p\nk%d\nd%s\n", ++i, $0); + } + END { + printf("fR_CURSOR\np\nk1\ndsome data\n"); + printf("eR_CURSOR SHOULD HAVE FAILED\n"); + }' > $TMP2 + $PROG -o $TMP3 $type $TMP2 > /dev/null 2>&1 + if [ -s $TMP3 ] ; then + echo "Test 10: put: R_CURSOR SHOULD HAVE FAILED" + exit 1 + fi + done +} + +# Test insert in reverse order. +test11() +{ + echo "Test 11: recno: reverse order insert" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 1; i <= 779; ++i) + printf("%05d: input key %d: %s\n", i, i, $0); + exit; + }' > $TMP1 + rm -f $TMP2 $TMP3 + + for type in recno; do + cat $TMP1 | + awk '{ + if (i == 0) { + i = 1; + printf("p\nk1\nd%s\n", $0); + printf("%s\n", "fR_IBEFORE"); + } else + printf("p\nk1\nd%s\n", $0); + } + END { + printf("or\n"); + }' > $TMP2 + $PROG -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test11: type $type: failed" + exit 1 + fi + done +} + +# Take the first 20000 entries in the dictionary, reverse them, and give +# them each a small size data entry. Use a small page size to make sure +# the btree split code gets hammered. +test12() +{ + echo "Test 12: btree: lots of keys, small page size" + mdata=abcdefghijklmnopqrstuvwxy + echo $mdata | + awk '{ for (i = 1; i < 20001; ++i) print $0 }' > $TMP1 + for type in btree; do + rm -f $TMP2 $TMP3 + for i in `sed 20000q $DICT | rev`; do + echo p + echo k$i + echo d$mdata + echo g + echo k$i + done > $TMP2 + $PROG -i psize=512 -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test12: type $type: failed" + exit 1 + fi + done +} + +# Test different byte orders. +test13() +{ + echo "Test 13: btree, hash: differing byte orders" + sed 50q $DICT > $TMP1 + for order in 1234 4321; do + for type in btree hash; do + rm -f byte.file $TMP2 $TMP3 + for i in `sed 50q $DICT`; do + echo p + echo k$i + echo d$i + echo g + echo k$i + done > $TMP2 + $PROG -ilorder=$order -f byte.file -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test13: $type/$order put failed" + exit 1 + fi + for i in `sed 50q $DICT`; do + echo g + echo k$i + done > $TMP2 + $PROG -ilorder=$order -f byte.file -o $TMP3 $type $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test13: $type/$order get failed" + exit 1 + fi + done + done + rm -f byte.file +} + +# Try a variety of bucketsizes and fill factors for hashing +test20() +{ + echo\ + "Test 20: hash: bucketsize, fill factor; nelem 25000 cachesize 65536" + echo "abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" | + awk '{ + for (i = 1; i <= 10000; ++i) { + if (i % 34) + s = substr($0, 1, i % 34); + else + s = substr($0, 1); + printf("%s\n", s); + } + exit; + }' > $TMP1 + sed 10000q $DICT | + awk 'BEGIN { + ds="abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg abcdefg" + } + { + if (++i % 34) + s = substr(ds, 1, i % 34); + else + s = substr(ds, 1); + printf("p\nk%s\nd%s\n", $0, s); + }' > $TMP2 + sed 10000q $DICT | + awk '{ + ++i; + printf("g\nk%s\n", $0); + }' >> $TMP2 + bsize=256 + for ffactor in 11 14 21; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done + bsize=512 + for ffactor in 21 28 43; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done + bsize=1024 + for ffactor in 43 57 85; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done + bsize=2048 + for ffactor in 85 114 171; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done + bsize=4096 + for ffactor in 171 228 341; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done + bsize=8192 + for ffactor in 341 455 683; do + echo " bucketsize $bsize, fill factor $ffactor" + $PROG -o$TMP3 \ + -ibsize=$bsize,ffactor=$ffactor,nelem=25000,cachesize=65536\ + hash $TMP2 + if (cmp -s $TMP1 $TMP3) ; then : + else + echo "test20: type hash:\ +bsize=$bsize ffactor=$ffactor nelem=25000 cachesize=65536 failed" + exit 1 + fi + done +} + +main $* |